[WIP] Remove heuristic of 5* row height

Removed the heuristic that pads height by 5x the row height.
Updated the 4 unit tests that got better results based on it.
Still do: fix the 6 unit tests that got broken, plus my new target.
pull/127/head
Francois Huet 2020-04-04 14:09:12 -07:00
parent 912efd2c9b
commit 00d5d2ede4
2 changed files with 1 additions and 62 deletions

View File

@ -166,8 +166,7 @@ class TextEdges(object):
x0 = area[0] - TABLE_AREA_PADDING x0 = area[0] - TABLE_AREA_PADDING
y0 = area[1] - TABLE_AREA_PADDING y0 = area[1] - TABLE_AREA_PADDING
x1 = area[2] + TABLE_AREA_PADDING x1 = area[2] + TABLE_AREA_PADDING
# add a constant since table headers can be relatively up y1 = area[3] + TABLE_AREA_PADDING
y1 = area[3] + average_row_height * 5
return (x0, y0, x1, y1) return (x0, y0, x1, y1)
# sort relevant textedges in reading order # sort relevant textedges in reading order

View File

@ -819,42 +819,6 @@ data_stream_table_rotated = [
] ]
data_stream_two_tables_1 = [ data_stream_two_tables_1 = [
[
"Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
[
"by the FBI. Some persons may be arrested more than once during a year, therefore, the data in this table, in some cases,",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
[
"could represent multiple arrests of the same person. See text, this section and source]",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
["", "", "Total", "", "", "Male", "", "", "Female", ""], ["", "", "Total", "", "", "Male", "", "", "Female", ""],
[ [
"Offense charged", "Offense charged",
@ -1281,15 +1245,6 @@ data_stream_two_tables_1 = [
] ]
data_stream_two_tables_2 = [ data_stream_two_tables_2 = [
["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
[
"[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies",
"",
"",
"",
"",
"",
],
[ [
"with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]", "with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]",
"", "",
@ -1726,20 +1681,6 @@ data_stream_columns = [
] ]
data_stream_split_text = [ data_stream_split_text = [
[
"FEB",
"RUAR",
"Y 2014 M27 (BUS)",
"",
"",
"",
"",
"",
"",
""
],
["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""], ["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
[ [
"NUMBER", "NUMBER",
@ -2291,7 +2232,6 @@ data_stream_flag_size = [
] ]
data_stream_strip_text = [ data_stream_strip_text = [
["VinsauVerre", ""],
["LesBlancs", "12.5CL"], ["LesBlancs", "12.5CL"],
["A.O.PCôtesduRhône", ""], ["A.O.PCôtesduRhône", ""],
["DomainedelaGuicharde«Autourdelachapelle»2016", "8€"], ["DomainedelaGuicharde«Autourdelachapelle»2016", "8€"],