Draw parse constraints for easier debug
* Display regions and areas rectanglespull/153/head
parent
ad27a11d35
commit
cd338ff4e2
|
|
@ -8,6 +8,25 @@ except ImportError:
|
|||
else:
|
||||
_HAS_MPL = True
|
||||
|
||||
from .utils import bbox_from_str
|
||||
|
||||
|
||||
def draw_labeled_bbox(ax, bbox, text, rect_color):
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(bbox[0], bbox[1]),
|
||||
bbox[2] - bbox[0], bbox[3] - bbox[1],
|
||||
color="purple", linewidth=3,
|
||||
fill=False
|
||||
)
|
||||
)
|
||||
ax.text(
|
||||
bbox[0], bbox[1],
|
||||
text,
|
||||
fontsize=12, color="black", verticalalignment="top",
|
||||
bbox=dict(facecolor="purple", alpha=0.5)
|
||||
)
|
||||
|
||||
|
||||
def draw_pdf(table, ax, to_pdf_scale=True):
|
||||
"""Draw the content of the table's source pdf into the passed subplot
|
||||
|
|
@ -16,7 +35,9 @@ def draw_pdf(table, ax, to_pdf_scale=True):
|
|||
----------
|
||||
table : camelot.core.Table
|
||||
|
||||
fig : matplotlib.axes.Axes
|
||||
ax : matplotlib.axes.Axes
|
||||
|
||||
to_pdf_scale : bool
|
||||
|
||||
"""
|
||||
img = table.get_pdf_image()
|
||||
|
|
@ -25,6 +46,47 @@ def draw_pdf(table, ax, to_pdf_scale=True):
|
|||
else:
|
||||
ax.imshow(img)
|
||||
|
||||
if table.debug_info:
|
||||
# Display a bbox per region
|
||||
for region_str in table.debug_info["table_regions"] or []:
|
||||
draw_labeled_bbox(
|
||||
ax, bbox_from_str(region_str),
|
||||
"region: ({region_str})".format(region_str=region_str),
|
||||
"purple"
|
||||
)
|
||||
# Display a bbox per area
|
||||
for area_str in table.debug_info["table_areas"] or []:
|
||||
draw_labeled_bbox(
|
||||
ax, bbox_from_str(area_str),
|
||||
"area: ({area_str})".format(area_str=area_str), "pink"
|
||||
)
|
||||
|
||||
|
||||
def draw_parse_constraints(table, ax):
|
||||
"""Draw any user provided constraints (area, region, columns, etc)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
table : camelot.core.Table
|
||||
|
||||
ax : matplotlib.axes.Axes
|
||||
|
||||
"""
|
||||
if table.debug_info:
|
||||
# Display a bbox per region
|
||||
for region_str in table.debug_info["table_regions"] or []:
|
||||
draw_labeled_bbox(
|
||||
ax, bbox_from_str(region_str),
|
||||
"region: ({region_str})".format(region_str=region_str),
|
||||
"purple"
|
||||
)
|
||||
# Display a bbox per area
|
||||
for area_str in table.debug_info["table_areas"] or []:
|
||||
draw_labeled_bbox(
|
||||
ax, bbox_from_str(area_str),
|
||||
"area: ({area_str})".format(area_str=area_str), "pink"
|
||||
)
|
||||
|
||||
|
||||
class PlotMethods(object):
|
||||
def __call__(self, table, kind="text", filename=None):
|
||||
|
|
@ -79,6 +141,7 @@ class PlotMethods(object):
|
|||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect="equal")
|
||||
draw_pdf(table, ax)
|
||||
draw_parse_constraints(table, ax)
|
||||
xs, ys = [], []
|
||||
for t in table._text:
|
||||
xs.extend([t[0], t[2]])
|
||||
|
|
@ -112,6 +175,7 @@ class PlotMethods(object):
|
|||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect="equal")
|
||||
draw_pdf(table, ax)
|
||||
draw_parse_constraints(table, ax)
|
||||
for row in table.cells:
|
||||
for cell in row:
|
||||
if cell.left:
|
||||
|
|
@ -142,6 +206,7 @@ class PlotMethods(object):
|
|||
ax = fig.add_subplot(111, aspect="equal")
|
||||
_FOR_LATTICE = table.flavor == "lattice"
|
||||
draw_pdf(table, ax, to_pdf_scale=not _FOR_LATTICE)
|
||||
draw_parse_constraints(table, ax)
|
||||
|
||||
if _FOR_LATTICE:
|
||||
table_bbox = table._bbox_unscaled
|
||||
|
|
@ -189,6 +254,7 @@ class PlotMethods(object):
|
|||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect="equal")
|
||||
draw_pdf(table, ax)
|
||||
draw_parse_constraints(table, ax)
|
||||
xs, ys = [], []
|
||||
for t in table._text:
|
||||
xs.extend([t[0], t[2]])
|
||||
|
|
@ -228,6 +294,7 @@ class PlotMethods(object):
|
|||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect="equal")
|
||||
draw_pdf(table, ax, to_pdf_scale=False)
|
||||
draw_parse_constraints(table, ax)
|
||||
table_bbox = table._bbox_unscaled
|
||||
x_coord = []
|
||||
y_coord = []
|
||||
|
|
@ -255,6 +322,7 @@ class PlotMethods(object):
|
|||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect="equal")
|
||||
draw_pdf(table, ax)
|
||||
draw_parse_constraints(table, ax)
|
||||
vertical, horizontal = table._segments
|
||||
for v in vertical:
|
||||
ax.plot([v[0], v[2]], [v[1], v[3]])
|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 98 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 98 KiB |
|
|
@ -131,3 +131,24 @@ def test_hybrid_textedge_plot():
|
|||
filename = os.path.join(testdir, "tabula/12s0324.pdf")
|
||||
tables = camelot.read_pdf(filename, debug=True, flavor='hybrid')
|
||||
return camelot.plot(tables[0], kind='textedge')
|
||||
|
||||
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_hybrid_table_regions_textedge_plot():
|
||||
filename = os.path.join(testdir, "tabula/us-007.pdf")
|
||||
tables = camelot.read_pdf(
|
||||
filename, debug=True, flavor="hybrid",
|
||||
table_regions=["320,505,573,330"]
|
||||
)
|
||||
return camelot.plot(tables[0], kind='textedge')
|
||||
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_hybrid_table_areas_text_plot():
|
||||
filename = os.path.join(testdir, "tabula/us-007.pdf")
|
||||
tables = camelot.read_pdf(
|
||||
filename, debug=True, flavor="hybrid",
|
||||
table_areas=["320,500,573,335"]
|
||||
)
|
||||
return camelot.plot(tables[0], kind='text')
|
||||
|
|
|
|||
Loading…
Reference in New Issue