Further simplification

pull/153/head
Frh 2020-04-24 12:48:51 -07:00
parent 22b6e33efa
commit 87d95a098c
1 changed files with 6 additions and 24 deletions

View File

@ -222,10 +222,6 @@ class TextNetworks(TextAlignments):
# "number of textlines aligned"
self._textlines_alignments = {}
# Maximum number of distinct aligned elements in rows/cols
self.max_rows = None
self.max_cols = None
def _update_edge(self, edge, coord, textline):
edge.register_aligned_textline(textline, coord)
@ -251,22 +247,6 @@ class TextNetworks(TextAlignments):
self._textlines_alignments[textline] = alignments
alignments[align_id] = textedge.textlines
# Finally calculate the overall maximum number of rows/cols
self.max_rows = max(
map(
lambda alignments: alignments.max_h_count(),
self._textlines_alignments.values()
),
default=0
)
self.max_cols = max(
map(
lambda alignments: alignments.max_v_count(),
self._textlines_alignments.values()
),
default=0
)
def _calculate_gaps_thresholds(self, percentile=75):
"""Identify reasonable gaps between lines and columns based
on gaps observed across alignments.
@ -356,25 +336,27 @@ class TextNetworks(TextAlignments):
(horizontal_gap, horizontal_gap) in pdf coordinate space.
"""
if self.max_rows <= 1 or self.max_cols <= 1:
return None
# Determine the textline that has the most combined
# alignments across horizontal and vertical axis.
# It will serve as a reference axis along which to collect the average
# spacing between rows/cols.
most_aligned_tl = self._most_connected_textline()
if most_aligned_tl is None:
return None
# Retrieve the list of textlines it's aligned with, across both
# axis
best_alignment = self._textlines_alignments[most_aligned_tl]
ref_h_alignment_id, ref_h_textlines = best_alignment.max_h()
ref_v_alignment_id, ref_v_textlines = best_alignment.max_v()
if len(ref_v_textlines) <= 1 or len(ref_h_textlines) <= 1:
return None
h_textlines = sorted(
ref_h_textlines,
key=lambda tl: tl.x0,
reverse=True
)
ref_v_alignment_id, ref_v_textlines = best_alignment.max_v()
v_textlines = sorted(
ref_v_textlines,
key=lambda tl: tl.y0,