Address last unit test

pull/153/head
Frh 2020-04-22 16:02:49 -07:00
parent d3d625a08d
commit bfc2719aff
1 changed files with 7 additions and 15 deletions

View File

@ -969,11 +969,13 @@ class Hybrid(BaseParser):
gaps_hv = self.textedges._compute_plausible_gaps() gaps_hv = self.textedges._compute_plausible_gaps()
if gaps_hv is None: if gaps_hv is None:
return None return None
if self.edge_tol is not None: # edge_tol instructions override the calculated vertical gap
# edge_tol instructions override the calculated vertical gap edge_tol_hv = (
gaps_hv = (gaps_hv[0], self.edge_tol) gaps_hv[0],
gaps_hv[1] if self.edge_tol is None else self.edge_tol
)
bbox = self.textedges._build_bbox_candidate( bbox = self.textedges._build_bbox_candidate(
gaps_hv, edge_tol_hv,
debug_info=debug_info_bboxes_searches debug_info=debug_info_bboxes_searches
) )
if bbox is None: if bbox is None:
@ -991,21 +993,11 @@ class Hybrid(BaseParser):
# Apply a heuristic to salvage headers which formatting might be # Apply a heuristic to salvage headers which formatting might be
# off compared to the rest of the table. # off compared to the rest of the table.
# Calculate the average height of each textline
# FRHTODO: reuse the gap threshold from earlier?
alignments = self.textedges._textlines_alignments.keys()
average_tl_height = sum(
map(
lambda tl: tl.y1 - tl.y0,
alignments
)) / len(alignments)
expanded_bbox = todo_move_me_expand_area_for_header( expanded_bbox = todo_move_me_expand_area_for_header(
bbox, bbox,
textlines, textlines,
cols_anchors, cols_anchors,
gaps_hv[1] # average_tl_height gaps_hv[1]
) )
if self.debug_info is not None: if self.debug_info is not None: