Add option to define "rows" in Stream
Similarly to the "columns" parameter, this commit enables to also define "rows" visual dividerspull/126/head
parent
7d4c9e53c6
commit
dbabc5b1c1
|
|
@ -66,6 +66,7 @@ class Stream(BaseParser):
|
||||||
edge_tol=50,
|
edge_tol=50,
|
||||||
row_tol=2,
|
row_tol=2,
|
||||||
column_tol=0,
|
column_tol=0,
|
||||||
|
rows=None,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
self.table_regions = table_regions
|
self.table_regions = table_regions
|
||||||
|
|
@ -78,6 +79,7 @@ class Stream(BaseParser):
|
||||||
self.edge_tol = edge_tol
|
self.edge_tol = edge_tol
|
||||||
self.row_tol = row_tol
|
self.row_tol = row_tol
|
||||||
self.column_tol = column_tol
|
self.column_tol = column_tol
|
||||||
|
self.rows = rows
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _text_bbox(t_bbox):
|
def _text_bbox(t_bbox):
|
||||||
|
|
@ -330,7 +332,14 @@ class Stream(BaseParser):
|
||||||
|
|
||||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||||
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
|
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
if self.rows is not None and self.rows[table_idx] != "":
|
||||||
|
rows = self.rows[table_idx].split(",")
|
||||||
|
rows = [float(c) for c in rows]
|
||||||
|
rows.insert(0, text_y_max)
|
||||||
|
rows.append(text_y_min)
|
||||||
|
rows = [(rows[i], rows[i+1]) for i in range(0, len(rows) - 1)]
|
||||||
|
else:
|
||||||
|
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||||
elements = [len(r) for r in rows_grouped]
|
elements = [len(r) for r in rows_grouped]
|
||||||
|
|
||||||
if self.columns is not None and self.columns[table_idx] != "":
|
if self.columns is not None and self.columns[table_idx] != "":
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue