# -*- coding: utf-8 -*- try: import matplotlib.pyplot as plt import matplotlib.patches as patches except ImportError: _HAS_MPL = False else: _HAS_MPL = True from .utils import bbox_from_str def draw_labeled_bbox(ax, bbox, text, rect_color): ax.add_patch( patches.Rectangle( (bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], color="purple", linewidth=3, fill=False ) ) ax.text( bbox[0], bbox[1], text, fontsize=12, color="black", verticalalignment="top", bbox=dict(facecolor="purple", alpha=0.5) ) def draw_pdf(table, ax, to_pdf_scale=True): """Draw the content of the table's source pdf into the passed subplot Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes to_pdf_scale : bool """ img = table.get_pdf_image() if to_pdf_scale: ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1])) else: ax.imshow(img) if table.debug_info: # Display a bbox per region for region_str in table.debug_info["table_regions"] or []: draw_labeled_bbox( ax, bbox_from_str(region_str), "region: ({region_str})".format(region_str=region_str), "purple" ) # Display a bbox per area for area_str in table.debug_info["table_areas"] or []: draw_labeled_bbox( ax, bbox_from_str(area_str), "area: ({area_str})".format(area_str=area_str), "pink" ) def draw_parse_constraints(table, ax): """Draw any user provided constraints (area, region, columns, etc) Parameters ---------- table : camelot.core.Table ax : matplotlib.axes.Axes """ if table.debug_info: # Display a bbox per region for region_str in table.debug_info["table_regions"] or []: draw_labeled_bbox( ax, bbox_from_str(region_str), "region: ({region_str})".format(region_str=region_str), "purple" ) # Display a bbox per area for area_str in table.debug_info["table_areas"] or []: draw_labeled_bbox( ax, bbox_from_str(area_str), "area: ({area_str})".format(area_str=area_str), "pink" ) class PlotMethods(object): def __call__(self, table, kind="text", filename=None): """Plot elements found on PDF page based on kind specified, useful for debugging and playing with different parameters to get the best output. Parameters ---------- table: camelot.core.Table A Camelot Table. kind : str, optional (default: 'text') {'text', 'grid', 'contour', 'joint', 'line'} The element type for which a plot should be generated. filepath: str, optional (default: None) Absolute path for saving the generated plot. Returns ------- fig : matplotlib.fig.Figure """ if not _HAS_MPL: raise ImportError("matplotlib is required for plotting.") if table.flavor == "lattice" and kind in ["textedge"]: raise NotImplementedError( "Lattice flavor does not support kind='{}'".format(kind) ) elif table.flavor in ["stream", "hybrid"] and kind in ["line"]: raise NotImplementedError( "Stream flavor does not support kind='{}'".format(kind) ) plot_method = getattr(self, kind) return plot_method(table) @staticmethod def text(table): """Generates a plot for all text elements present on the PDF page. Parameters ---------- table : camelot.core.Table Returns ------- fig : matplotlib.fig.Figure """ fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) draw_parse_constraints(table, ax) xs, ys = [], [] for t in table._text: xs.extend([t[0], t[2]]) ys.extend([t[1], t[3]]) ax.add_patch( patches.Rectangle( (t[0], t[1]), t[2] - t[0], t[3] - t[1], alpha=0.5 ) ) ax.set_xlim(min(xs) - 10, max(xs) + 10) ax.set_ylim(min(ys) - 10, max(ys) + 10) return fig @staticmethod def grid(table): """Generates a plot for the detected table grids on the PDF page. Parameters ---------- table : camelot.core.Table Returns ------- fig : matplotlib.fig.Figure """ fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) draw_parse_constraints(table, ax) for row in table.cells: for cell in row: if cell.left: ax.plot([cell.lb[0], cell.lt[0]], [cell.lb[1], cell.lt[1]]) if cell.right: ax.plot([cell.rb[0], cell.rt[0]], [cell.rb[1], cell.rt[1]]) if cell.top: ax.plot([cell.lt[0], cell.rt[0]], [cell.lt[1], cell.rt[1]]) if cell.bottom: ax.plot([cell.lb[0], cell.rb[0]], [cell.lb[1], cell.rb[1]]) return fig @staticmethod def contour(table): """Generates a plot for all table boundaries present on the PDF page. Parameters ---------- table : camelot.core.Table Returns ------- fig : matplotlib.fig.Figure """ fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") _FOR_LATTICE = table.flavor == "lattice" draw_pdf(table, ax, to_pdf_scale=not _FOR_LATTICE) draw_parse_constraints(table, ax) if _FOR_LATTICE: table_bbox = table._bbox_unscaled else: table_bbox = {table._bbox: None} xs, ys = [], [] if not _FOR_LATTICE: for t in table._text: xs.extend([t[0], t[2]]) ys.extend([t[1], t[3]]) ax.add_patch( patches.Rectangle( (t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue" ) ) for t in table_bbox.keys(): ax.add_patch( patches.Rectangle( (t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red" ) ) if not _FOR_LATTICE: xs.extend([t[0], t[2]]) ys.extend([t[1], t[3]]) ax.set_xlim(min(xs) - 10, max(xs) + 10) ax.set_ylim(min(ys) - 10, max(ys) + 10) return fig @staticmethod def textedge(table): """Generates a plot for relevant textedges. Parameters ---------- table : camelot.core.Table Returns ------- fig : matplotlib.fig.Figure """ fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) draw_parse_constraints(table, ax) xs, ys = [], [] for t in table._text: xs.extend([t[0], t[2]]) ys.extend([t[1], t[3]]) ax.add_patch( patches.Rectangle( (t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue", alpha=0.5 ) ) ax.set_xlim(min(xs) - 10, max(xs) + 10) ax.set_ylim(min(ys) - 10, max(ys) + 10) if table.flavor == "hybrid": # FRHTODO: Clean this up table.debug_info["edges_searches"][0].plot_alignments(ax) else: for te in table._textedges: ax.plot([te.x, te.x], [te.y0, te.y1]) return fig @staticmethod def joint(table): """Generates a plot for all line intersections present on the PDF page. Parameters ---------- table : camelot.core.Table Returns ------- fig : matplotlib.fig.Figure """ fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax, to_pdf_scale=False) draw_parse_constraints(table, ax) table_bbox = table._bbox_unscaled x_coord = [] y_coord = [] for k in table_bbox.keys(): for coord in table_bbox[k]: x_coord.append(coord[0]) y_coord.append(coord[1]) ax.plot(x_coord, y_coord, "ro") return fig @staticmethod def line(table): """Generates a plot for all line segments present on the PDF page. Parameters ---------- table : camelot.core.Table Returns ------- fig : matplotlib.fig.Figure """ fig = plt.figure() ax = fig.add_subplot(111, aspect="equal") draw_pdf(table, ax) draw_parse_constraints(table, ax) vertical, horizontal = table._segments for v in vertical: ax.plot([v[0], v[2]], [v[1], v[3]]) for h in horizontal: ax.plot([h[0], h[2]], [h[1], h[3]]) return fig