diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index 33f2fe5..98a8a83 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -66,6 +66,7 @@ class Stream(BaseParser): edge_tol=50, row_tol=2, column_tol=0, + rows=None, **kwargs ): self.table_regions = table_regions @@ -78,6 +79,7 @@ class Stream(BaseParser): self.edge_tol = edge_tol self.row_tol = row_tol self.column_tol = column_tol + self.rows = rows @staticmethod def _text_bbox(t_bbox): @@ -330,7 +332,14 @@ class Stream(BaseParser): text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox) rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol) - rows = self._join_rows(rows_grouped, text_y_max, text_y_min) + if self.rows is not None and self.rows[table_idx] != "": + rows = self.rows[table_idx].split(",") + rows = [float(c) for c in rows] + rows.insert(0, text_y_max) + rows.append(text_y_min) + rows = [(rows[i], rows[i+1]) for i in range(0, len(rows) - 1)] + else: + rows = self._join_rows(rows_grouped, text_y_max, text_y_min) elements = [len(r) for r in rows_grouped] if self.columns is not None and self.columns[table_idx] != "":