From b2a8348f13d138359eaf94884d71ad5989410e23 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sun, 26 May 2019 17:13:59 +0530 Subject: [PATCH 1/4] Fix #312 --- camelot/parsers/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index 1efe144..2fb7da3 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -286,7 +286,7 @@ class Stream(BaseParser): # filter horizontal text hor_text = [] for region in self.table_regions: - x1, y1, x2, y2 = region + x1, y1, x2, y2 = region.split(",") region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text) hor_text.extend(region_text) # find tables based on nurminen's detection algorithm From de3281c1b67b80d21a3079566d8e16c07cfb16ff Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Mon, 27 May 2019 22:18:23 +0530 Subject: [PATCH 2/4] Add test --- camelot/parsers/stream.py | 4 ++++ tests/test_common.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index 2fb7da3..f36fa40 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -287,6 +287,10 @@ class Stream(BaseParser): hor_text = [] for region in self.table_regions: x1, y1, x2, y2 = region.split(",") + x1 = float(x1) + y1 = float(y1) + x2 = float(x2) + y2 = float(y2) region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text) hor_text.extend(region_text) # find tables based on nurminen's detection algorithm diff --git a/tests/test_common.py b/tests/test_common.py index c04a151..dc2f5af 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -69,6 +69,14 @@ def test_stream_two_tables(): assert df2.equals(tables[1].df) +def test_stream_table_regions(): + df = pd.DataFrame(data_stream_table_areas) + + filename = os.path.join(testdir, "tabula/us-007.pdf") + tables = camelot.read_pdf(filename, flavor="stream", table_areas=["320,460,573,335"]) + assert df.equals(tables[0].df) + + def test_stream_table_areas(): df = pd.DataFrame(data_stream_table_areas) From 477568dea7a570295594eaa5cae56b3c0208db6d Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Mon, 27 May 2019 22:29:50 +0530 Subject: [PATCH 3/4] Fix test --- tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_common.py b/tests/test_common.py index dc2f5af..7f5636b 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -73,7 +73,7 @@ def test_stream_table_regions(): df = pd.DataFrame(data_stream_table_areas) filename = os.path.join(testdir, "tabula/us-007.pdf") - tables = camelot.read_pdf(filename, flavor="stream", table_areas=["320,460,573,335"]) + tables = camelot.read_pdf(filename, flavor="stream", table_regions=["320,460,573,335"]) assert df.equals(tables[0].df) From 8d9fdb740e3919c1921865be7ae562fe5ae34b64 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Mon, 27 May 2019 22:48:45 +0530 Subject: [PATCH 4/4] Update HISTORY.md --- HISTORY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.md b/HISTORY.md index 039578f..1957985 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ master * [#293](https://github.com/socialcopsdev/camelot/issues/293) Split text ignores all text to the right of last cut. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta. * [#277](https://github.com/socialcopsdev/camelot/issues/277) Sort TableList by order of tables in PDF. [#283](https://github.com/socialcopsdev/camelot/pull/283) by [Sym Roe](https://github.com/symroe). +* [#312](https://github.com/socialcopsdev/camelot/issues/312) `table_regions` throws `ValueError` when `flavor='stream'`. [#332](https://github.com/socialcopsdev/camelot/pull/332) by Vinayak Mehta. 0.7.2 (2019-01-10) ------------------