Create notebook to help debug hybrid parser algo
Plot vertical col anchors found by hybrid parser Include vertical text in col/row generationpull/153/head
parent
6add19ae27
commit
3220b02ebc
|
|
@ -384,8 +384,8 @@ class TextNetworks(TextAlignments):
|
||||||
bbox = (most_aligned_tl.x0, most_aligned_tl.y0,
|
bbox = (most_aligned_tl.x0, most_aligned_tl.y0,
|
||||||
most_aligned_tl.x1, most_aligned_tl.y1)
|
most_aligned_tl.x1, most_aligned_tl.y1)
|
||||||
|
|
||||||
# For the body of the table, we only consider cells with alignments
|
# For the body of the table, we only consider cells that have
|
||||||
# on both axis.
|
# alignments on both axis.
|
||||||
tls_search_space = list(self._textline_to_alignments.keys())
|
tls_search_space = list(self._textline_to_alignments.keys())
|
||||||
# tls_search_space = []
|
# tls_search_space = []
|
||||||
tls_search_space.remove(most_aligned_tl)
|
tls_search_space.remove(most_aligned_tl)
|
||||||
|
|
@ -586,9 +586,12 @@ class Hybrid(TextBaseParser):
|
||||||
)
|
)
|
||||||
|
|
||||||
all_tls = list(
|
all_tls = list(
|
||||||
filter(
|
sorted(
|
||||||
lambda tl: len(tl.get_text().strip()) > 0,
|
filter(
|
||||||
self.t_bbox["horizontal"] # + self.t_bbox["vertical"]
|
lambda tl: len(tl.get_text().strip()) > 0,
|
||||||
|
self.t_bbox["horizontal"] + self.t_bbox["vertical"]
|
||||||
|
),
|
||||||
|
key=lambda tl: (-tl.y0, tl.x0)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
text_x_min, text_y_min, text_x_max, text_y_max = bbox_from_textlines(
|
text_x_min, text_y_min, text_x_max, text_y_max = bbox_from_textlines(
|
||||||
|
|
|
||||||
|
|
@ -487,5 +487,16 @@ class PlotMethods():
|
||||||
linewidth=2,
|
linewidth=2,
|
||||||
label_pos="bottom,left"
|
label_pos="bottom,left"
|
||||||
)
|
)
|
||||||
|
for col_anchor in col_search["cols_anchors"]:
|
||||||
|
# Display a green line at the col boundary line throughout the
|
||||||
|
# table bbox.
|
||||||
|
ax.plot(
|
||||||
|
[col_anchor, col_anchor],
|
||||||
|
[
|
||||||
|
col_search["core_bbox"][1] - 10,
|
||||||
|
col_search["core_bbox"][3] + 10,
|
||||||
|
],
|
||||||
|
color="green"
|
||||||
|
)
|
||||||
|
|
||||||
return ax.get_figure()
|
return ax.get_figure()
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
|
Before Width: | Height: | Size: 101 KiB After Width: | Height: | Size: 101 KiB |
Loading…
Reference in New Issue