From 7aaa7b2460be360e2b6a5dbed85548cbb4ebf55e Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sun, 23 Sep 2018 11:56:40 +0530 Subject: [PATCH] Deprecate debug and add plot docstrings --- camelot/parsers/lattice.py | 7 +------ camelot/parsers/stream.py | 7 +------ camelot/plotting.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 5de6faa..18ee0fc 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -77,17 +77,13 @@ class Lattice(BaseParser): PDFMiner margins. (char_margin, line_margin, word_margin) For more information, refer `PDFMiner docs `_. - debug : bool, optional (default: False) - Whether or not to return all text objects on the page - which can be used to generate a matplotlib plot, to get - values for table_area(s) and debugging. """ def __init__(self, table_area=None, process_background=False, line_size_scaling=15, copy_text=None, shift_text=['l', 't'], split_text=False, flag_size=False, line_close_tol=2, joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2, - iterations=0, margins=(1.0, 0.5, 0.1), debug=False): + iterations=0, margins=(1.0, 0.5, 0.1)): self.table_area = table_area self.process_background = process_background self.line_size_scaling = line_size_scaling @@ -101,7 +97,6 @@ class Lattice(BaseParser): self.threshold_constant = threshold_constant self.iterations = iterations self.char_margin, self.line_margin, self.word_margin = margins - self.debug = debug @staticmethod def _reduce_index(t, idx, shift_text): diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index b3acf38..2fcee66 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -47,15 +47,11 @@ class Stream(BaseParser): PDFMiner margins. (char_margin, line_margin, word_margin) For more information, refer `PDFMiner docs `_. - debug : bool, optional (default: False) - Whether or not to return all text objects on the page - which can be used to generate a matplotlib plot, to get - values for table_area(s), columns and debugging. """ def __init__(self, table_area=None, columns=None, split_text=False, flag_size=False, row_close_tol=2, col_close_tol=0, - margins=(1.0, 0.5, 0.1), debug=False): + margins=(1.0, 0.5, 0.1)): self.table_area = table_area self.columns = columns self._validate_columns() @@ -64,7 +60,6 @@ class Stream(BaseParser): self.row_close_tol = row_close_tol self.col_close_tol = col_close_tol self.char_margin, self.line_margin, self.word_margin = margins - self.debug = debug @staticmethod def _text_bbox(t_bbox): diff --git a/camelot/plotting.py b/camelot/plotting.py index 9e7e7a3..b3cf1b5 100644 --- a/camelot/plotting.py +++ b/camelot/plotting.py @@ -4,6 +4,13 @@ import matplotlib.patches as patches def plot_text(text): + """Generates a plot for all text present on the PDF page. + + Parameters + ---------- + text : list + + """ fig = plt.figure() ax = fig.add_subplot(111, aspect='equal') xs, ys = [], [] @@ -23,6 +30,13 @@ def plot_text(text): def plot_table(table): + """Generates a plot for the table. + + Parameters + ---------- + table : camelot.core.Table + + """ for row in table.cells: for cell in row: if cell.left: @@ -41,6 +55,14 @@ def plot_table(table): def plot_contour(image): + """Generates a plot for all table boundaries present on the + PDF page. + + Parameters + ---------- + image : tuple + + """ img, table_bbox = image for t in table_bbox.keys(): cv2.rectangle(img, (t[0], t[1]), @@ -50,6 +72,14 @@ def plot_contour(image): def plot_joint(image): + """Generates a plot for all line intersections present on the + PDF page. + + Parameters + ---------- + image : tuple + + """ img, table_bbox = image x_coord = [] y_coord = [] @@ -63,6 +93,13 @@ def plot_joint(image): def plot_line(segments): + """Generates a plot for all line segments present on the PDF page. + + Parameters + ---------- + segments : tuple + + """ vertical, horizontal = segments for v in vertical: plt.plot([v[0], v[2]], [v[1], v[3]])