Hybrid parser fixes
Improve parser comparison notebook to flag identical parses, display multiple tables correctly Fix tolerance parameter inclusion for hybrid.pull/153/head
parent
79ea4adcd1
commit
ae429fc248
|
|
@ -179,7 +179,9 @@ class Hybrid(BaseParser):
|
||||||
self.table_bbox_parses[lattice_bbox] = self.lattice_parser
|
self.table_bbox_parses[lattice_bbox] = self.lattice_parser
|
||||||
else:
|
else:
|
||||||
network_cols_boundaries = self._augment_boundaries_with_splits(
|
network_cols_boundaries = self._augment_boundaries_with_splits(
|
||||||
network_cols_boundaries, lattice_cols) # self.column_tol???
|
network_cols_boundaries,
|
||||||
|
lattice_cols,
|
||||||
|
self.lattice_parser.joint_tol)
|
||||||
augmented_bbox = (
|
augmented_bbox = (
|
||||||
network_cols_boundaries[0][0], network_bbox[1],
|
network_cols_boundaries[0][0], network_bbox[1],
|
||||||
network_cols_boundaries[-1][1], network_bbox[3],
|
network_cols_boundaries[-1][1], network_bbox[3],
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -293,6 +293,17 @@ def test_hybrid():
|
||||||
tables = camelot.read_pdf(filename, flavor="hybrid")
|
tables = camelot.read_pdf(filename, flavor="hybrid")
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
def test_hybrid_two_tables():
|
||||||
|
df1 = pd.DataFrame(data_network_two_tables_1)
|
||||||
|
df2 = pd.DataFrame(data_network_two_tables_2)
|
||||||
|
|
||||||
|
filename = os.path.join(testdir, "tabula/12s0324.pdf")
|
||||||
|
tables = camelot.read_pdf(filename, flavor="hybrid")
|
||||||
|
|
||||||
|
assert len(tables) == 2
|
||||||
|
assert df1.equals(tables[0].df)
|
||||||
|
assert df2.equals(tables[1].df)
|
||||||
|
|
||||||
def test_hybrid_vertical_header():
|
def test_hybrid_vertical_header():
|
||||||
"""Tests a complex table with a vertically text header.
|
"""Tests a complex table with a vertically text header.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue