From 79ea4adcd13e4169f61173825c36b0ca1779b9bf Mon Sep 17 00:00:00 2001 From: Frh Date: Mon, 4 May 2020 17:41:57 -0700 Subject: [PATCH] Add baseline test for hybrid Fix first split merge issue --- camelot/parsers/hybrid.py | 5 +++++ tests/test_common.py | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/camelot/parsers/hybrid.py b/camelot/parsers/hybrid.py index e84dfa8..98d80c4 100644 --- a/camelot/parsers/hybrid.py +++ b/camelot/parsers/hybrid.py @@ -149,6 +149,11 @@ class Hybrid(BaseParser): # Our boundary is fully after the split, move on idx_boundaries = idx_boundaries - 1 previous_boundary = boundary + if idx_boundaries < 0: + # If this is the last boundary to the left, set its + # edge at the split + boundary[0] = split + idx_splits = idx_splits - 1 else: # The split is inside our boundary: split it new_boundary = [split, boundary[1]] diff --git a/tests/test_common.py b/tests/test_common.py index 6269ba7..c321e1d 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -286,6 +286,13 @@ def test_network_layout_kwargs(): # Hybrid parser +def test_hybrid(): + df = pd.DataFrame(data_stream) + + filename = os.path.join(testdir, "health.pdf") + tables = camelot.read_pdf(filename, flavor="hybrid") + assert_frame_equal(df, tables[0].df) + def test_hybrid_vertical_header(): """Tests a complex table with a vertically text header. """