Add option to define "rows" in Stream

Similarly to the "columns" parameter, this commit enables to also define "rows" visual dividers
pull/126/head
Idan David 2020-04-05 13:38:10 +03:00
parent 7d4c9e53c6
commit dbabc5b1c1
1 changed files with 10 additions and 1 deletions

View File

@ -66,6 +66,7 @@ class Stream(BaseParser):
edge_tol=50,
row_tol=2,
column_tol=0,
rows=None,
**kwargs
):
self.table_regions = table_regions
@ -78,6 +79,7 @@ class Stream(BaseParser):
self.edge_tol = edge_tol
self.row_tol = row_tol
self.column_tol = column_tol
self.rows = rows
@staticmethod
def _text_bbox(t_bbox):
@ -330,6 +332,13 @@ class Stream(BaseParser):
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
if self.rows is not None and self.rows[table_idx] != "":
rows = self.rows[table_idx].split(",")
rows = [float(c) for c in rows]
rows.insert(0, text_y_max)
rows.append(text_y_min)
rows = [(rows[i], rows[i+1]) for i in range(0, len(rows) - 1)]
else:
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
elements = [len(r) for r in rows_grouped]