diff --git a/README.md b/README.md index 7efc6f2..4a5227f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Camelot is a Python 2.7 library and command-line tool for getting tables out of >>> import camelot >>> tables = camelot.read_pdf("foo.pdf") >>> tables -<TableSet n=2> +<TableList n=2> >>> tables.to_csv(zip=True) # to_json, to_excel, to_html >>> tables[0] <Table shape=(3,4)> @@ -19,8 +19,8 @@ Camelot is a Python 2.7 library and command-line tool for getting tables out of "time_taken": 0.5, "page": 1 } +>>> df = tables[0].df >>> tables[0].to_csv("foo.csv") # to_json, to_excel, to_html ->>> df = tables[0].to_df() Camelot comes with a CLI where you can specify page numbers, output format, output directory etc. By default, the output files are placed in the same directory as the PDF. diff --git a/camelot/__init__.py b/camelot/__init__.py index b762cea..9762184 100644 --- a/camelot/__init__.py +++ b/camelot/__init__.py @@ -1,3 +1,4 @@ from .__version__ import __version__ -from .io import read_pdf \ No newline at end of file +from .io import read_pdf +from .plot import plot_geometry \ No newline at end of file diff --git a/camelot/core.py b/camelot/core.py index 0b7a21b..ec74eb3 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -21,7 +21,6 @@ class Cell(object): self.text = '' self.spanning_h = False self.spanning_v = False - self.image = None def __repr__(self): pass @@ -49,8 +48,6 @@ class Table(object): self.rows = rows self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows] - self.nocont_ = 0 - self.image = None def __repr__(self): pass @@ -127,7 +124,7 @@ class Table(object): J += 1 for h in horizontal: - # find closest y coord + # find closest y coord # iterate over x coords and find closest points i = [i for i, t in enumerate(self.rows) if np.isclose(h[1], t[0], atol=jtol)] @@ -227,9 +224,66 @@ class Table(object): return ar -class TableSet(object): - def __init__(self): - pass +class TableList(list): + def __init__(self, tables): + self._tables = tables def __repr__(self): - pass \ No newline at end of file + return '<{} tables={}>'.format( + self.__class__.__name__, len(self._tables)) + + +class Geometry(object): + def __init__(self): + self._text = [] + self._images = [] + self._segments = [] + self._tables = [] + + @property + def text(self): + return self._text + + @text.setter + def text(self, t): + self._text = t + + @property + def images(self): + return self._images + + @images.setter + def images(self, i): + self._images = i + + @property + def segments(self): + return self._segments + + @segments.setter + def segments(self, s): + self._segments = s + + @property + def tables(self): + return self._tables + + @tables.setter + def tables(self, tb): + self._tables = tb + + +class GeometryList(object): + def __init__(self, geometry): + self._text = [g.text for g in geometry] + self._images = [g.images for g in geometry] + self._segments = [g.segments for g in geometry] + self._tables = [g.tables for g in geometry] + + def __repr__(self): + return '<{} text={} images={} segments={} tables={}>'.format( + self.__class__.__name__, + len(self._text), + len(self._images), + len(self._segments), + len(self._tables)) \ No newline at end of file diff --git a/camelot/handlers.py b/camelot/handlers.py index 83fe4ae..35f14f4 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -74,10 +74,11 @@ class PDFHandler(object): self.__save_page(self.filename, p, self.temp) pages = [os.path.join(self.temp, 'page-{0}.pdf'.format(p)) for p in self.pages] - tables = {} + tables = [] + geometry = [] parser = Stream(**kwargs) if not mesh else Lattice(**kwargs) for p in pages: - table = parser.get_tables(p) - if table is not None: - tables.update(table) - return tables \ No newline at end of file + t, g = parser.extract_tables(p) + tables.extend(t) + geometry.extend(g) + return TableList(tables), GeometryList(geometry) \ No newline at end of file diff --git a/camelot/io.py b/camelot/io.py index fcae040..c2b319b 100644 --- a/camelot/io.py +++ b/camelot/io.py @@ -4,4 +4,5 @@ from .handlers import PDFHandler def read_pdf(filepath, pages='1', mesh=False, **kwargs): # explicit type conversion p = PDFHandler(filepath, pages) - return p.parse(mesh=mesh, **kwargs) \ No newline at end of file + tables, __ = p.parse(mesh=mesh, **kwargs) + return tables \ No newline at end of file diff --git a/camelot/parsers.py b/camelot/parsers.py index c175fee..205743c 100644 --- a/camelot/parsers.py +++ b/camelot/parsers.py @@ -10,7 +10,7 @@ import subprocess import numpy as np -from .core import Table +from .core import Table, Geometry from .image_processing import (adaptive_threshold, find_lines, find_table_contours, find_table_joints) from .utils import (scale_to_pdf, scale_to_image, segments_bbox, text_in_bbox, @@ -30,192 +30,6 @@ def _reduce_method(m): copy_reg.pickle(types.MethodType, _reduce_method) -def _text_bbox(t_bbox): - """Returns bounding box for the text present on a page. - - Parameters - ---------- - t_bbox : dict - Dict with two keys 'horizontal' and 'vertical' with lists of - LTTextLineHorizontals and LTTextLineVerticals respectively. - - Returns - ------- - text_bbox : tuple - Tuple of the form (x0, y0, x1, y1) in PDFMiner's coordinate - space. - """ - xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]]) - ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]]) - xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]]) - ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]]) - text_bbox = (xmin, ymin, xmax, ymax) - return text_bbox - - -def _group_rows(text, ytol=2): - """Groups PDFMiner text objects into rows using their - y-coordinates taking into account some tolerance ytol. - - Parameters - ---------- - text : list - List of PDFMiner text objects. - - ytol : int - Tolerance parameter. - (optional, default: 2) - - Returns - ------- - rows : list - Two-dimensional list of text objects grouped into rows. - """ - row_y = 0 - rows = [] - temp = [] - for t in text: - # is checking for upright necessary? - # if t.get_text().strip() and all([obj.upright for obj in t._objs if - # type(obj) is LTChar]): - if t.get_text().strip(): - if not np.isclose(row_y, t.y0, atol=ytol): - rows.append(sorted(temp, key=lambda t: t.x0)) - temp = [] - row_y = t.y0 - temp.append(t) - rows.append(sorted(temp, key=lambda t: t.x0)) - __ = rows.pop(0) # hacky - return rows - - -def _merge_columns(l, mtol=0): - """Merges column boundaries if they overlap or lie within some - tolerance mtol. - - Parameters - ---------- - l : list - List of column coordinate tuples. - - mtol : int - TODO - (optional, default: 0) - - Returns - ------- - merged : list - List of merged column coordinate tuples. - """ - merged = [] - for higher in l: - if not merged: - merged.append(higher) - else: - lower = merged[-1] - if mtol >= 0: - if (higher[0] <= lower[1] or - np.isclose(higher[0], lower[1], atol=mtol)): - upper_bound = max(lower[1], higher[1]) - lower_bound = min(lower[0], higher[0]) - merged[-1] = (lower_bound, upper_bound) - else: - merged.append(higher) - elif mtol < 0: - if higher[0] <= lower[1]: - if np.isclose(higher[0], lower[1], atol=abs(mtol)): - merged.append(higher) - else: - upper_bound = max(lower[1], higher[1]) - lower_bound = min(lower[0], higher[0]) - merged[-1] = (lower_bound, upper_bound) - else: - merged.append(higher) - return merged - - -def _join_rows(rows_grouped, text_y_max, text_y_min): - """Makes row coordinates continuous. - - Parameters - ---------- - rows_grouped : list - Two-dimensional list of text objects grouped into rows. - - text_y_max : int - - text_y_min : int - - Returns - ------- - rows : list - List of continuous row coordinate tuples. - """ - row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) - if len(r) > 0 else 0 for r in rows_grouped] - rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))] - rows.insert(0, text_y_max) - rows.append(text_y_min) - rows = [(rows[i], rows[i + 1]) - for i in range(0, len(rows) - 1)] - return rows - - -def _join_columns(cols, text_x_min, text_x_max): - """Makes column coordinates continuous. - - Parameters - ---------- - cols : list - List of column coordinate tuples. - - text_x_min : int - - text_y_max : int - - Returns - ------- - cols : list - Updated list of column coordinate tuples. - """ - cols = sorted(cols) - cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))] - cols.insert(0, text_x_min) - cols.append(text_x_max) - cols = [(cols[i], cols[i + 1]) - for i in range(0, len(cols) - 1)] - return cols - - -def _add_columns(cols, text, ytol): - """Adds columns to existing list by taking into account - the text that lies outside the current column coordinates. - - Parameters - ---------- - cols : list - List of column coordinate tuples. - - text : list - List of PDFMiner text objects. - - ytol : int - Tolerance parameter. - - Returns - ------- - cols : list - Updated list of column coordinate tuples. - """ - if text: - text = _group_rows(text, ytol=ytol) - elements = [len(r) for r in text] - new_cols = [(t.x0, t.x1) - for r in text if len(r) == max(elements) for t in r] - cols.extend(_merge_columns(sorted(new_cols))) - return cols - - class Stream: """Stream looks for spaces between text elements to form a table. @@ -283,7 +97,193 @@ class Stream: self.flag_size = flag_size self.debug = debug - def get_tables(self, pdfname): + @staticmethod + def _text_bbox(t_bbox): + """Returns bounding box for the text present on a page. + + Parameters + ---------- + t_bbox : dict + Dict with two keys 'horizontal' and 'vertical' with lists of + LTTextLineHorizontals and LTTextLineVerticals respectively. + + Returns + ------- + text_bbox : tuple + Tuple of the form (x0, y0, x1, y1) in PDFMiner's coordinate + space. + """ + xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]]) + ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]]) + xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]]) + ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]]) + text_bbox = (xmin, ymin, xmax, ymax) + return text_bbox + + @staticmethod + def _group_rows(text, ytol=2): + """Groups PDFMiner text objects into rows using their + y-coordinates taking into account some tolerance ytol. + + Parameters + ---------- + text : list + List of PDFMiner text objects. + + ytol : int + Tolerance parameter. + (optional, default: 2) + + Returns + ------- + rows : list + Two-dimensional list of text objects grouped into rows. + """ + row_y = 0 + rows = [] + temp = [] + for t in text: + # is checking for upright necessary? + # if t.get_text().strip() and all([obj.upright for obj in t._objs if + # type(obj) is LTChar]): + if t.get_text().strip(): + if not np.isclose(row_y, t.y0, atol=ytol): + rows.append(sorted(temp, key=lambda t: t.x0)) + temp = [] + row_y = t.y0 + temp.append(t) + rows.append(sorted(temp, key=lambda t: t.x0)) + __ = rows.pop(0) # hacky + return rows + + @staticmethod + def _merge_columns(l, mtol=0): + """Merges column boundaries if they overlap or lie within some + tolerance mtol. + + Parameters + ---------- + l : list + List of column coordinate tuples. + + mtol : int + TODO + (optional, default: 0) + + Returns + ------- + merged : list + List of merged column coordinate tuples. + """ + merged = [] + for higher in l: + if not merged: + merged.append(higher) + else: + lower = merged[-1] + if mtol >= 0: + if (higher[0] <= lower[1] or + np.isclose(higher[0], lower[1], atol=mtol)): + upper_bound = max(lower[1], higher[1]) + lower_bound = min(lower[0], higher[0]) + merged[-1] = (lower_bound, upper_bound) + else: + merged.append(higher) + elif mtol < 0: + if higher[0] <= lower[1]: + if np.isclose(higher[0], lower[1], atol=abs(mtol)): + merged.append(higher) + else: + upper_bound = max(lower[1], higher[1]) + lower_bound = min(lower[0], higher[0]) + merged[-1] = (lower_bound, upper_bound) + else: + merged.append(higher) + return merged + + @staticmethod + def _join_rows(rows_grouped, text_y_max, text_y_min): + """Makes row coordinates continuous. + + Parameters + ---------- + rows_grouped : list + Two-dimensional list of text objects grouped into rows. + + text_y_max : int + + text_y_min : int + + Returns + ------- + rows : list + List of continuous row coordinate tuples. + """ + row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) + if len(r) > 0 else 0 for r in rows_grouped] + rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))] + rows.insert(0, text_y_max) + rows.append(text_y_min) + rows = [(rows[i], rows[i + 1]) + for i in range(0, len(rows) - 1)] + return rows + + @staticmethod + def _add_columns(cols, text, ytol): + """Adds columns to existing list by taking into account + the text that lies outside the current column coordinates. + + Parameters + ---------- + cols : list + List of column coordinate tuples. + + text : list + List of PDFMiner text objects. + + ytol : int + Tolerance parameter. + + Returns + ------- + cols : list + Updated list of column coordinate tuples. + """ + if text: + text = Stream._group_rows(text, ytol=ytol) + elements = [len(r) for r in text] + new_cols = [(t.x0, t.x1) + for r in text if len(r) == max(elements) for t in r] + cols.extend(Stream._merge_columns(sorted(new_cols))) + return cols + + @staticmethod + def _join_columns(cols, text_x_min, text_x_max): + """Makes column coordinates continuous. + + Parameters + ---------- + cols : list + List of column coordinate tuples. + + text_x_min : int + + text_y_max : int + + Returns + ------- + cols : list + Updated list of column coordinate tuples. + """ + cols = sorted(cols) + cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))] + cols.insert(0, text_x_min) + cols.append(text_x_max) + cols = [(cols[i], cols[i + 1]) + for i in range(0, len(cols) - 1)] + return cols + + def extract_tables(self, pdfname): """Expects a single page pdf as input with rotation corrected. Parameters @@ -308,11 +308,13 @@ class Stream: os.path.basename(bname))) return {os.path.basename(bname): None} + g = Geometry() if self.debug: - self.debug_text = [] - self.debug_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in lttextlh]) - self.debug_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in lttextlv]) - return None + text = [] + text.extend([(t.x0, t.y0, t.x1, t.y1) for t in lttextlh]) + text.extend([(t.x0, t.y0, t.x1, t.y1) for t in lttextlv]) + g.text = text + return [None], [g] if self.table_area is not None: if self.columns is not None: @@ -354,9 +356,9 @@ class Stream: table_data['text_p'] = 100 * (1 - (len(char_bbox) / len(ltchar))) for direction in t_bbox: t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0)) - text_x_min, text_y_min, text_x_max, text_y_max = _text_bbox(t_bbox) - rows_grouped = _group_rows(t_bbox['horizontal'], ytol=ytolerance[table_no]) - rows = _join_rows(rows_grouped, text_y_max, text_y_min) + text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(t_bbox) + rows_grouped = self._group_rows(t_bbox['horizontal'], ytol=ytolerance[table_no]) + rows = self._join_rows(rows_grouped, text_y_max, text_y_min) elements = [len(r) for r in rows_grouped] guess = False @@ -380,7 +382,7 @@ class Stream: os.path.basename(bname))) cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r] - cols = _merge_columns(sorted(cols), mtol=mtolerance[table_no]) + cols = self._merge_columns(sorted(cols), mtol=mtolerance[table_no]) inner_text = [] for i in range(1, len(cols)): left = cols[i - 1][1] @@ -392,8 +394,8 @@ class Stream: for t in t_bbox[direction] if t.x0 > cols[-1][1] or t.x1 < cols[0][0]] inner_text.extend(outer_text) - cols = _add_columns(cols, inner_text, ytolerance[table_no]) - cols = _join_columns(cols, text_x_min, text_x_max) + cols = self._add_columns(cols, inner_text, ytolerance[table_no]) + cols = self._join_columns(cols, text_x_min, text_x_max) table = Table(cols, rows) table = table.set_all_edges() @@ -433,87 +435,6 @@ class Stream: return page -def _reduce_index(t, idx, shift_text): - """Reduces index of a text object if it lies within a spanning - cell. - - Parameters - ---------- - table : object - camelot.table.Table - - idx : list - List of tuples of the form (r_idx, c_idx, text). - - shift_text : list - {'l', 'r', 't', 'b'} - Select one or more from above and pass them as a list to - specify where the text in a spanning cell should flow. - - Returns - ------- - indices : list - List of tuples of the form (idx, text) where idx is the reduced - index of row/column and text is the an lttextline substring. - """ - indices = [] - for r_idx, c_idx, text in idx: - for d in shift_text: - if d == 'l': - if t.cells[r_idx][c_idx].spanning_h: - while not t.cells[r_idx][c_idx].left: - c_idx -= 1 - if d == 'r': - if t.cells[r_idx][c_idx].spanning_h: - while not t.cells[r_idx][c_idx].right: - c_idx += 1 - if d == 't': - if t.cells[r_idx][c_idx].spanning_v: - while not t.cells[r_idx][c_idx].top: - r_idx -= 1 - if d == 'b': - if t.cells[r_idx][c_idx].spanning_v: - while not t.cells[r_idx][c_idx].bottom: - r_idx += 1 - indices.append((r_idx, c_idx, text)) - return indices - - -def _fill_spanning(t, fill=None): - """Fills spanning cells. - - Parameters - ---------- - t : object - camelot.table.Table - - fill : list - {'h', 'v'} - Specify to fill spanning cells in horizontal or vertical - direction. - (optional, default: None) - - Returns - ------- - t : object - camelot.table.Table - """ - for f in fill: - if f == "h": - for i in range(len(t.cells)): - for j in range(len(t.cells[i])): - if t.cells[i][j].get_text().strip() == '': - if t.cells[i][j].spanning_h and not t.cells[i][j].left: - t.cells[i][j].add_text(t.cells[i][j - 1].get_text()) - elif f == "v": - for i in range(len(t.cells)): - for j in range(len(t.cells[i])): - if t.cells[i][j].get_text().strip() == '': - if t.cells[i][j].spanning_v and not t.cells[i][j].top: - t.cells[i][j].add_text(t.cells[i - 1][j].get_text()) - return t - - class Lattice: """Lattice looks for lines in the pdf to form a table. @@ -617,7 +538,88 @@ class Lattice: self.shift_text = shift_text self.debug = debug - def get_tables(self, pdfname): + @staticmethod + def _reduce_index(t, idx, shift_text): + """Reduces index of a text object if it lies within a spanning + cell. + + Parameters + ---------- + table : object + camelot.table.Table + + idx : list + List of tuples of the form (r_idx, c_idx, text). + + shift_text : list + {'l', 'r', 't', 'b'} + Select one or more from above and pass them as a list to + specify where the text in a spanning cell should flow. + + Returns + ------- + indices : list + List of tuples of the form (idx, text) where idx is the reduced + index of row/column and text is the an lttextline substring. + """ + indices = [] + for r_idx, c_idx, text in idx: + for d in shift_text: + if d == 'l': + if t.cells[r_idx][c_idx].spanning_h: + while not t.cells[r_idx][c_idx].left: + c_idx -= 1 + if d == 'r': + if t.cells[r_idx][c_idx].spanning_h: + while not t.cells[r_idx][c_idx].right: + c_idx += 1 + if d == 't': + if t.cells[r_idx][c_idx].spanning_v: + while not t.cells[r_idx][c_idx].top: + r_idx -= 1 + if d == 'b': + if t.cells[r_idx][c_idx].spanning_v: + while not t.cells[r_idx][c_idx].bottom: + r_idx += 1 + indices.append((r_idx, c_idx, text)) + return indices + + + def _fill_spanning(t, fill=None): + """Fills spanning cells. + + Parameters + ---------- + t : object + camelot.table.Table + + fill : list + {'h', 'v'} + Specify to fill spanning cells in horizontal or vertical + direction. + (optional, default: None) + + Returns + ------- + t : object + camelot.table.Table + """ + for f in fill: + if f == "h": + for i in range(len(t.cells)): + for j in range(len(t.cells[i])): + if t.cells[i][j].get_text().strip() == '': + if t.cells[i][j].spanning_h and not t.cells[i][j].left: + t.cells[i][j].add_text(t.cells[i][j - 1].get_text()) + elif f == "v": + for i in range(len(t.cells)): + for j in range(len(t.cells[i])): + if t.cells[i][j].get_text().strip() == '': + if t.cells[i][j].spanning_v and not t.cells[i][j].top: + t.cells[i][j].add_text(t.cells[i - 1][j].get_text()) + return t + + def extract_tables(self, pdfname): """Expects a single page pdf as input with rotation corrected. Parameters @@ -696,15 +698,16 @@ class Lattice: else: jtolerance = copy.deepcopy(self.jtol) + g = Geometry() if self.debug: - self.debug_images = (img, table_bbox) + g.images = [(img, table_bbox)] table_bbox, v_segments, h_segments = scale_to_pdf(table_bbox, v_segments, h_segments, factors_pdf) if self.debug: - self.debug_segments = (v_segments, h_segments) - self.debug_tables = [] + g.segments = [(v_segments, h_segments)] + _tables = [] page = {} tables = {} @@ -737,15 +740,13 @@ class Lattice: table = Table(cols, rows) # set table edges to True using ver+hor lines table = table.set_edges(v_s, h_s, jtol=jtolerance[table_no]) - nouse = table.nocont_ / (len(v_s) + len(h_s)) - table_data['line_p'] = 100 * (1 - nouse) # set spanning cells to True table = table.set_spanning() # set table border edges to True table = table.set_border_edges() if self.debug: - self.debug_tables.append(table) + _tables.append(table) assignment_errors = [] table_data['split_text'] = [] @@ -757,7 +758,7 @@ class Lattice: flag_size=self.flag_size) if indices[:2] != (-1, -1): assignment_errors.append(error) - indices = _reduce_index(table, indices, shift_text=self.shift_text) + indices = self._reduce_index(table, indices, shift_text=self.shift_text) if len(indices) > 1: table_data['split_text'].append(indices) for r_idx, c_idx, text in indices: @@ -768,7 +769,7 @@ class Lattice: table_data['score'] = score if self.fill is not None: - table = _fill_spanning(table, fill=self.fill) + table = self._fill_spanning(table, fill=self.fill) ar = table.get_list() ar = encode_list(ar) table_data['data'] = ar @@ -782,6 +783,7 @@ class Lattice: page[os.path.basename(bname)] = tables if self.debug: - return None + g.tables = _tables + return [None], [g] return page \ No newline at end of file diff --git a/camelot/plot.py b/camelot/plot.py new file mode 100644 index 0000000..92bcd69 --- /dev/null +++ b/camelot/plot.py @@ -0,0 +1,98 @@ +import matplotlib.pyplot as plt +import matplotlib.patches as patches + +from .handlers import PDFHandler + + +def plot_geometry(filepath, pages='1', mesh=False, geometry_type='text', **kwargs): + # explicit type conversion + p = PDFHandler(filepath, pages) + kwargs.update({'debug': geometry_type}) + __, geometry = p.parse(mesh=mesh, **kwargs) + + if geometry_type == 'text': + for text in geometry.text: + fig = plt.figure() + ax = fig.add_subplot(111, aspect='equal') + xs, ys = [], [] + for t in text: + xs.extend([t[0], t[1]]) + ys.extend([t[2], t[3]]) + ax.add_patch( + patches.Rectangle( + (t[0], t[1]), + t[2] - t[0], + t[3] - t[1] + ) + ) + ax.set_xlim(min(xs) - 10, max(xs) + 10) + ax.set_ylim(min(ys) - 10, max(ys) + 10) + plt.show() + elif geometry_type == 'contour': + try: + for img, table_bbox in geometry.images: + for t in table_bbox.keys(): + cv2.rectangle(img, (t[0], t[1]), + (t[2], t[3]), (255, 0, 0), 3) + plt.imshow(img) + plt.show() + except AttributeError: + raise ValueError("This option can only be used with Lattice.") + elif geometry_type == 'joint': + try: + for img, table_bbox in geometry.images: + x_coord = [] + y_coord = [] + for k in table_bbox.keys(): + for coord in table_bbox[k]: + x_coord.append(coord[0]) + y_coord.append(coord[1]) + max_x, max_y = max(x_coord), max(y_coord) + plt.plot(x_coord, y_coord, 'ro') + plt.axis([0, max_x + 100, max_y + 100, 0]) + plt.imshow(img) + plt.show() + except AttributeError: + raise ValueError("This option can only be used with Lattice.") + elif geometry_type == 'line': + try: + for v_s, h_s in geometry.segments: + for v in v_s: + plt.plot([v[0], v[2]], [v[1], v[3]]) + for h in h_s: + plt.plot([h[0], h[2]], [h[1], h[3]]) + plt.show() + except AttributeError: + raise ValueError("This option can only be used with Lattice.") + elif geometry_type == 'table': + try: + for tables in geometry.tables: + for table in tables: + for r in range(len(table.rows)): + for c in range(len(table.cols)): + if table.cells[r][c].left: + plt.plot([table.cells[r][c].lb[0], + table.cells[r][c].lt[0]], + [table.cells[r][c].lb[1], + table.cells[r][c].lt[1]]) + if table.cells[r][c].right: + plt.plot([table.cells[r][c].rb[0], + table.cells[r][c].rt[0]], + [table.cells[r][c].rb[1], + table.cells[r][c].rt[1]]) + if table.cells[r][c].top: + plt.plot([table.cells[r][c].lt[0], + table.cells[r][c].rt[0]], + [table.cells[r][c].lt[1], + table.cells[r][c].rt[1]]) + if table.cells[r][c].bottom: + plt.plot([table.cells[r][c].lb[0], + table.cells[r][c].rb[0]], + [table.cells[r][c].lb[1], + table.cells[r][c].rb[1]]) + plt.show() + except AttributeError: + raise ValueError("This option can only be used with Lattice.") + else: + raise UserWarning("This method can only be called after" + " debug has been specified.") \ No newline at end of file