diff --git a/camelot/plotting.py b/camelot/plotting.py index 8d06179..a7f249f 100644 --- a/camelot/plotting.py +++ b/camelot/plotting.py @@ -8,6 +8,25 @@ except ImportError: else: _HAS_MPL = True +from .utils import bbox_from_str + + +def draw_labeled_bbox(ax, bbox, text, rect_color): + ax.add_patch( + patches.Rectangle( + (bbox[0], bbox[1]), + bbox[2] - bbox[0], bbox[3] - bbox[1], + color="purple", linewidth=3, + fill=False + ) + ) + ax.text( + bbox[0], bbox[1], + text, + fontsize=12, color="black", verticalalignment="top", + bbox=dict(facecolor="purple", alpha=0.5) + ) + def draw_pdf(table, ax, to_pdf_scale=True): """Draw the content of the table's source pdf into the passed subplot @@ -16,7 +35,9 @@ def draw_pdf(table, ax, to_pdf_scale=True): ---------- table : camelot.core.Table - fig : matplotlib.axes.Axes + ax : matplotlib.axes.Axes + + to_pdf_scale : bool """ img = table.get_pdf_image() @@ -25,6 +46,47 @@ def draw_pdf(table, ax, to_pdf_scale=True): else: ax.imshow(img) + if table.debug_info: + # Display a bbox per region + for region_str in table.debug_info["table_regions"] or []: + draw_labeled_bbox( + ax, bbox_from_str(region_str), + "region: ({region_str})".format(region_str=region_str), + "purple" + ) + # Display a bbox per area + for area_str in table.debug_info["table_areas"] or []: + draw_labeled_bbox( + ax, bbox_from_str(area_str), + "area: ({area_str})".format(area_str=area_str), "pink" + ) + + +def draw_parse_constraints(table, ax): + """Draw any user provided constraints (area, region, columns, etc) + + Parameters + ---------- + table : camelot.core.Table + + ax : matplotlib.axes.Axes + + """ + if table.debug_info: + # Display a bbox per region + for region_str in table.debug_info["table_regions"] or []: + draw_labeled_bbox( + ax, bbox_from_str(region_str), + "region: ({region_str})".format(region_str=region_str), + "purple" + ) + # Display a bbox per area + for area_str in table.debug_info["table_areas"] or []: + draw_labeled_bbox( + ax, bbox_from_str(area_str), + "area: ({area_str})".format(area_str=area_str), "pink" + ) + class PlotMethods(object): def __call__(self, table, kind="text", filename=None): @@ -79,6 +141,7 @@ class PlotMethods(object): fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) + draw_parse_constraints(table, ax) xs, ys = [], [] for t in table._text: xs.extend([t[0], t[2]]) @@ -112,6 +175,7 @@ class PlotMethods(object): fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) + draw_parse_constraints(table, ax) for row in table.cells: for cell in row: if cell.left: @@ -142,6 +206,7 @@ class PlotMethods(object): ax = fig.add_subplot(111, aspect="equal") _FOR_LATTICE = table.flavor == "lattice" draw_pdf(table, ax, to_pdf_scale=not _FOR_LATTICE) + draw_parse_constraints(table, ax) if _FOR_LATTICE: table_bbox = table._bbox_unscaled @@ -189,6 +254,7 @@ class PlotMethods(object): fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) + draw_parse_constraints(table, ax) xs, ys = [], [] for t in table._text: xs.extend([t[0], t[2]]) @@ -228,6 +294,7 @@ class PlotMethods(object): fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax, to_pdf_scale=False) + draw_parse_constraints(table, ax) table_bbox = table._bbox_unscaled x_coord = [] y_coord = [] @@ -255,6 +322,7 @@ class PlotMethods(object): fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) + draw_parse_constraints(table, ax) vertical, horizontal = table._segments for v in vertical: ax.plot([v[0], v[2]], [v[1], v[3]]) diff --git a/tests/files/baseline_plots/test_hybrid_table_areas_text_plot.png b/tests/files/baseline_plots/test_hybrid_table_areas_text_plot.png new file mode 100644 index 0000000..90874ff Binary files /dev/null and b/tests/files/baseline_plots/test_hybrid_table_areas_text_plot.png differ diff --git a/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png b/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png new file mode 100644 index 0000000..cac8334 Binary files /dev/null and b/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png differ diff --git a/tests/test_plotting.py b/tests/test_plotting.py index c550edb..a0d17fb 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -131,3 +131,24 @@ def test_hybrid_textedge_plot(): filename = os.path.join(testdir, "tabula/12s0324.pdf") tables = camelot.read_pdf(filename, debug=True, flavor='hybrid') return camelot.plot(tables[0], kind='textedge') + + +@pytest.mark.mpl_image_compare( + baseline_dir="files/baseline_plots", remove_text=True) +def test_hybrid_table_regions_textedge_plot(): + filename = os.path.join(testdir, "tabula/us-007.pdf") + tables = camelot.read_pdf( + filename, debug=True, flavor="hybrid", + table_regions=["320,505,573,330"] + ) + return camelot.plot(tables[0], kind='textedge') + +@pytest.mark.mpl_image_compare( + baseline_dir="files/baseline_plots", remove_text=True) +def test_hybrid_table_areas_text_plot(): + filename = os.path.join(testdir, "tabula/us-007.pdf") + tables = camelot.read_pdf( + filename, debug=True, flavor="hybrid", + table_areas=["320,500,573,335"] + ) + return camelot.plot(tables[0], kind='text')