diff --git a/HISTORY.md b/HISTORY.md index 039578f..1957985 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ master * [#293](https://github.com/socialcopsdev/camelot/issues/293) Split text ignores all text to the right of last cut. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta. * [#277](https://github.com/socialcopsdev/camelot/issues/277) Sort TableList by order of tables in PDF. [#283](https://github.com/socialcopsdev/camelot/pull/283) by [Sym Roe](https://github.com/symroe). +* [#312](https://github.com/socialcopsdev/camelot/issues/312) `table_regions` throws `ValueError` when `flavor='stream'`. [#332](https://github.com/socialcopsdev/camelot/pull/332) by Vinayak Mehta. 0.7.2 (2019-01-10) ------------------ diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index 1efe144..f36fa40 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -286,7 +286,11 @@ class Stream(BaseParser): # filter horizontal text hor_text = [] for region in self.table_regions: - x1, y1, x2, y2 = region + x1, y1, x2, y2 = region.split(",") + x1 = float(x1) + y1 = float(y1) + x2 = float(x2) + y2 = float(y2) region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text) hor_text.extend(region_text) # find tables based on nurminen's detection algorithm diff --git a/tests/test_common.py b/tests/test_common.py index c04a151..7f5636b 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -69,6 +69,14 @@ def test_stream_two_tables(): assert df2.equals(tables[1].df) +def test_stream_table_regions(): + df = pd.DataFrame(data_stream_table_areas) + + filename = os.path.join(testdir, "tabula/us-007.pdf") + tables = camelot.read_pdf(filename, flavor="stream", table_regions=["320,460,573,335"]) + assert df.equals(tables[0].df) + + def test_stream_table_areas(): df = pd.DataFrame(data_stream_table_areas)