Create notebook to help debug hybrid parser algo

Plot vertical col anchors found by hybrid parser
Include vertical text in col/row generation
pull/153/head
Frh 2020-04-28 12:26:12 -07:00
parent 6add19ae27
commit 3220b02ebc
5 changed files with 141 additions and 151 deletions

View File

@ -384,8 +384,8 @@ class TextNetworks(TextAlignments):
bbox = (most_aligned_tl.x0, most_aligned_tl.y0,
most_aligned_tl.x1, most_aligned_tl.y1)
# For the body of the table, we only consider cells with alignments
# on both axis.
# For the body of the table, we only consider cells that have
# alignments on both axis.
tls_search_space = list(self._textline_to_alignments.keys())
# tls_search_space = []
tls_search_space.remove(most_aligned_tl)
@ -586,9 +586,12 @@ class Hybrid(TextBaseParser):
)
all_tls = list(
filter(
lambda tl: len(tl.get_text().strip()) > 0,
self.t_bbox["horizontal"] # + self.t_bbox["vertical"]
sorted(
filter(
lambda tl: len(tl.get_text().strip()) > 0,
self.t_bbox["horizontal"] + self.t_bbox["vertical"]
),
key=lambda tl: (-tl.y0, tl.x0)
)
)
text_x_min, text_y_min, text_x_max, text_y_max = bbox_from_textlines(

View File

@ -487,5 +487,16 @@ class PlotMethods():
linewidth=2,
label_pos="bottom,left"
)
for col_anchor in col_search["cols_anchors"]:
# Display a green line at the col boundary line throughout the
# table bbox.
ax.plot(
[col_anchor, col_anchor],
[
col_search["core_bbox"][1] - 10,
col_search["core_bbox"][3] + 10,
],
color="green"
)
return ax.get_figure()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 101 KiB

After

Width:  |  Height:  |  Size: 101 KiB