Hybrid parser fixes
Improve parser comparison notebook to flag identical parses, display multiple tables correctly Fix tolerance parameter inclusion for hybrid.pull/153/head
parent
7fae107560
commit
63adfd5468
|
|
@ -179,7 +179,9 @@ class Hybrid(BaseParser):
|
|||
self.table_bbox_parses[lattice_bbox] = self.lattice_parser
|
||||
else:
|
||||
network_cols_boundaries = self._augment_boundaries_with_splits(
|
||||
network_cols_boundaries, lattice_cols) # self.column_tol???
|
||||
network_cols_boundaries,
|
||||
lattice_cols,
|
||||
self.lattice_parser.joint_tol)
|
||||
augmented_bbox = (
|
||||
network_cols_boundaries[0][0], network_bbox[1],
|
||||
network_cols_boundaries[-1][1], network_bbox[3],
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -299,6 +299,17 @@ def test_hybrid():
|
|||
tables = camelot.read_pdf(filename, flavor="hybrid")
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
def test_hybrid_two_tables():
|
||||
df1 = pd.DataFrame(data_network_two_tables_1)
|
||||
df2 = pd.DataFrame(data_network_two_tables_2)
|
||||
|
||||
filename = os.path.join(testdir, "tabula/12s0324.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="hybrid")
|
||||
|
||||
assert len(tables) == 2
|
||||
assert df1.equals(tables[0].df)
|
||||
assert df2.equals(tables[1].df)
|
||||
|
||||
def test_hybrid_vertical_header():
|
||||
"""Tests a complex table with a vertically text header.
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in New Issue