From 00d5d2ede4722ae6c88c0581cfec4d42812bf785 Mon Sep 17 00:00:00 2001 From: Francois Huet Date: Sat, 4 Apr 2020 14:09:12 -0700 Subject: [PATCH] [WIP] Remove heuristic of 5* row height Removed the heuristic that pads height by 5x the row height. Updated the 4 unit tests that got better results based on it. Still do: fix the 6 unit tests that got broken, plus my new target. --- camelot/core.py | 3 +-- tests/data.py | 60 ------------------------------------------------- 2 files changed, 1 insertion(+), 62 deletions(-) diff --git a/camelot/core.py b/camelot/core.py index b7a02b1..655e1d6 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -166,8 +166,7 @@ class TextEdges(object): x0 = area[0] - TABLE_AREA_PADDING y0 = area[1] - TABLE_AREA_PADDING x1 = area[2] + TABLE_AREA_PADDING - # add a constant since table headers can be relatively up - y1 = area[3] + average_row_height * 5 + y1 = area[3] + TABLE_AREA_PADDING return (x0, y0, x1, y1) # sort relevant textedges in reading order diff --git a/tests/data.py b/tests/data.py index 4503c05..9a90f09 100755 --- a/tests/data.py +++ b/tests/data.py @@ -819,42 +819,6 @@ data_stream_table_rotated = [ ] data_stream_two_tables_1 = [ - [ - "Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ], - [ - "by the FBI. Some persons may be arrested more than once during a year, therefore, the data in this table, in some cases,", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ], - [ - "could represent multiple arrests of the same person. See text, this section and source]", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ], ["", "", "Total", "", "", "Male", "", "", "Female", ""], [ "Offense charged", @@ -1281,15 +1245,6 @@ data_stream_two_tables_1 = [ ] data_stream_two_tables_2 = [ - ["Table 325. Arrests by Race: 2009", "", "", "", "", ""], - [ - "[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies", - "", - "", - "", - "", - "", - ], [ "with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]", "", @@ -1726,20 +1681,6 @@ data_stream_columns = [ ] data_stream_split_text = [ - [ - "FEB", - "RUAR", - "Y 2014 M27 (BUS)", - "", - "", - "", - "", - "", - "", - "" - ], - ["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"], - ["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""], ["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""], [ "NUMBER", @@ -2291,7 +2232,6 @@ data_stream_flag_size = [ ] data_stream_strip_text = [ - ["VinsauVerre", ""], ["LesBlancs", "12.5CL"], ["A.O.PCôtesduRhône", ""], ["DomainedelaGuicharde«Autourdelachapelle»2016", "8€"],