Draw parse constraints for easier debug

* Display regions and areas rectangles
pull/153/head
Frh 2020-04-21 14:24:44 -07:00
parent 310a8cd80a
commit d2cf8520cb
4 changed files with 90 additions and 1 deletions

View File

@ -8,6 +8,25 @@ except ImportError:
else: else:
_HAS_MPL = True _HAS_MPL = True
from .utils import bbox_from_str
def draw_labeled_bbox(ax, bbox, text, rect_color):
ax.add_patch(
patches.Rectangle(
(bbox[0], bbox[1]),
bbox[2] - bbox[0], bbox[3] - bbox[1],
color="purple", linewidth=3,
fill=False
)
)
ax.text(
bbox[0], bbox[1],
text,
fontsize=12, color="black", verticalalignment="top",
bbox=dict(facecolor="purple", alpha=0.5)
)
def draw_pdf(table, ax, to_pdf_scale=True): def draw_pdf(table, ax, to_pdf_scale=True):
"""Draw the content of the table's source pdf into the passed subplot """Draw the content of the table's source pdf into the passed subplot
@ -16,7 +35,9 @@ def draw_pdf(table, ax, to_pdf_scale=True):
---------- ----------
table : camelot.core.Table table : camelot.core.Table
fig : matplotlib.axes.Axes ax : matplotlib.axes.Axes
to_pdf_scale : bool
""" """
img = table.get_pdf_image() img = table.get_pdf_image()
@ -25,6 +46,47 @@ def draw_pdf(table, ax, to_pdf_scale=True):
else: else:
ax.imshow(img) ax.imshow(img)
if table.debug_info:
# Display a bbox per region
for region_str in table.debug_info["table_regions"] or []:
draw_labeled_bbox(
ax, bbox_from_str(region_str),
"region: ({region_str})".format(region_str=region_str),
"purple"
)
# Display a bbox per area
for area_str in table.debug_info["table_areas"] or []:
draw_labeled_bbox(
ax, bbox_from_str(area_str),
"area: ({area_str})".format(area_str=area_str), "pink"
)
def draw_parse_constraints(table, ax):
"""Draw any user provided constraints (area, region, columns, etc)
Parameters
----------
table : camelot.core.Table
ax : matplotlib.axes.Axes
"""
if table.debug_info:
# Display a bbox per region
for region_str in table.debug_info["table_regions"] or []:
draw_labeled_bbox(
ax, bbox_from_str(region_str),
"region: ({region_str})".format(region_str=region_str),
"purple"
)
# Display a bbox per area
for area_str in table.debug_info["table_areas"] or []:
draw_labeled_bbox(
ax, bbox_from_str(area_str),
"area: ({area_str})".format(area_str=area_str), "pink"
)
class PlotMethods(object): class PlotMethods(object):
def __call__(self, table, kind="text", filename=None): def __call__(self, table, kind="text", filename=None):
@ -79,6 +141,7 @@ class PlotMethods(object):
fig = plt.figure() fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal") ax = fig.add_subplot(111, aspect="equal")
draw_pdf(table, ax) draw_pdf(table, ax)
draw_parse_constraints(table, ax)
xs, ys = [], [] xs, ys = [], []
for t in table._text: for t in table._text:
xs.extend([t[0], t[2]]) xs.extend([t[0], t[2]])
@ -112,6 +175,7 @@ class PlotMethods(object):
fig = plt.figure() fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal") ax = fig.add_subplot(111, aspect="equal")
draw_pdf(table, ax) draw_pdf(table, ax)
draw_parse_constraints(table, ax)
for row in table.cells: for row in table.cells:
for cell in row: for cell in row:
if cell.left: if cell.left:
@ -142,6 +206,7 @@ class PlotMethods(object):
ax = fig.add_subplot(111, aspect="equal") ax = fig.add_subplot(111, aspect="equal")
_FOR_LATTICE = table.flavor == "lattice" _FOR_LATTICE = table.flavor == "lattice"
draw_pdf(table, ax, to_pdf_scale=not _FOR_LATTICE) draw_pdf(table, ax, to_pdf_scale=not _FOR_LATTICE)
draw_parse_constraints(table, ax)
if _FOR_LATTICE: if _FOR_LATTICE:
table_bbox = table._bbox_unscaled table_bbox = table._bbox_unscaled
@ -189,6 +254,7 @@ class PlotMethods(object):
fig = plt.figure() fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal") ax = fig.add_subplot(111, aspect="equal")
draw_pdf(table, ax) draw_pdf(table, ax)
draw_parse_constraints(table, ax)
xs, ys = [], [] xs, ys = [], []
for t in table._text: for t in table._text:
xs.extend([t[0], t[2]]) xs.extend([t[0], t[2]])
@ -228,6 +294,7 @@ class PlotMethods(object):
fig = plt.figure() fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal") ax = fig.add_subplot(111, aspect="equal")
draw_pdf(table, ax, to_pdf_scale=False) draw_pdf(table, ax, to_pdf_scale=False)
draw_parse_constraints(table, ax)
table_bbox = table._bbox_unscaled table_bbox = table._bbox_unscaled
x_coord = [] x_coord = []
y_coord = [] y_coord = []
@ -255,6 +322,7 @@ class PlotMethods(object):
fig = plt.figure() fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal") ax = fig.add_subplot(111, aspect="equal")
draw_pdf(table, ax) draw_pdf(table, ax)
draw_parse_constraints(table, ax)
vertical, horizontal = table._segments vertical, horizontal = table._segments
for v in vertical: for v in vertical:
ax.plot([v[0], v[2]], [v[1], v[3]]) ax.plot([v[0], v[2]], [v[1], v[3]])

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

View File

@ -131,3 +131,24 @@ def test_hybrid_textedge_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, debug=True, flavor='hybrid') tables = camelot.read_pdf(filename, debug=True, flavor='hybrid')
return camelot.plot(tables[0], kind='textedge') return camelot.plot(tables[0], kind='textedge')
@pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True)
def test_hybrid_table_regions_textedge_plot():
filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf(
filename, debug=True, flavor="hybrid",
table_regions=["320,505,573,330"]
)
return camelot.plot(tables[0], kind='textedge')
@pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True)
def test_hybrid_table_areas_text_plot():
filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf(
filename, debug=True, flavor="hybrid",
table_areas=["320,500,573,335"]
)
return camelot.plot(tables[0], kind='text')