Add option to define "rows" in Stream
Similarly to the "columns" parameter, this commit enables to also define "rows" visual dividerspull/126/head
parent
7d4c9e53c6
commit
dbabc5b1c1
|
|
@ -66,6 +66,7 @@ class Stream(BaseParser):
|
|||
edge_tol=50,
|
||||
row_tol=2,
|
||||
column_tol=0,
|
||||
rows=None,
|
||||
**kwargs
|
||||
):
|
||||
self.table_regions = table_regions
|
||||
|
|
@ -78,6 +79,7 @@ class Stream(BaseParser):
|
|||
self.edge_tol = edge_tol
|
||||
self.row_tol = row_tol
|
||||
self.column_tol = column_tol
|
||||
self.rows = rows
|
||||
|
||||
@staticmethod
|
||||
def _text_bbox(t_bbox):
|
||||
|
|
@ -330,6 +332,13 @@ class Stream(BaseParser):
|
|||
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||
if self.rows is not None and self.rows[table_idx] != "":
|
||||
rows = self.rows[table_idx].split(",")
|
||||
rows = [float(c) for c in rows]
|
||||
rows.insert(0, text_y_max)
|
||||
rows.append(text_y_min)
|
||||
rows = [(rows[i], rows[i+1]) for i in range(0, len(rows) - 1)]
|
||||
else:
|
||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||
elements = [len(r) for r in rows_grouped]
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue