Deprecate debug and add plot docstrings

pull/2/head
Vinayak Mehta 2018-09-23 11:56:40 +05:30
parent 71d91fbebd
commit 7aaa7b2460
3 changed files with 39 additions and 12 deletions

View File

@ -77,17 +77,13 @@ class Lattice(BaseParser):
PDFMiner margins. (char_margin, line_margin, word_margin)
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
debug : bool, optional (default: False)
Whether or not to return all text objects on the page
which can be used to generate a matplotlib plot, to get
values for table_area(s) and debugging.
"""
def __init__(self, table_area=None, process_background=False,
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
split_text=False, flag_size=False, line_close_tol=2,
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
iterations=0, margins=(1.0, 0.5, 0.1), debug=False):
iterations=0, margins=(1.0, 0.5, 0.1)):
self.table_area = table_area
self.process_background = process_background
self.line_size_scaling = line_size_scaling
@ -101,7 +97,6 @@ class Lattice(BaseParser):
self.threshold_constant = threshold_constant
self.iterations = iterations
self.char_margin, self.line_margin, self.word_margin = margins
self.debug = debug
@staticmethod
def _reduce_index(t, idx, shift_text):

View File

@ -47,15 +47,11 @@ class Stream(BaseParser):
PDFMiner margins. (char_margin, line_margin, word_margin)
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
debug : bool, optional (default: False)
Whether or not to return all text objects on the page
which can be used to generate a matplotlib plot, to get
values for table_area(s), columns and debugging.
"""
def __init__(self, table_area=None, columns=None, split_text=False,
flag_size=False, row_close_tol=2, col_close_tol=0,
margins=(1.0, 0.5, 0.1), debug=False):
margins=(1.0, 0.5, 0.1)):
self.table_area = table_area
self.columns = columns
self._validate_columns()
@ -64,7 +60,6 @@ class Stream(BaseParser):
self.row_close_tol = row_close_tol
self.col_close_tol = col_close_tol
self.char_margin, self.line_margin, self.word_margin = margins
self.debug = debug
@staticmethod
def _text_bbox(t_bbox):

View File

@ -4,6 +4,13 @@ import matplotlib.patches as patches
def plot_text(text):
"""Generates a plot for all text present on the PDF page.
Parameters
----------
text : list
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
xs, ys = [], []
@ -23,6 +30,13 @@ def plot_text(text):
def plot_table(table):
"""Generates a plot for the table.
Parameters
----------
table : camelot.core.Table
"""
for row in table.cells:
for cell in row:
if cell.left:
@ -41,6 +55,14 @@ def plot_table(table):
def plot_contour(image):
"""Generates a plot for all table boundaries present on the
PDF page.
Parameters
----------
image : tuple
"""
img, table_bbox = image
for t in table_bbox.keys():
cv2.rectangle(img, (t[0], t[1]),
@ -50,6 +72,14 @@ def plot_contour(image):
def plot_joint(image):
"""Generates a plot for all line intersections present on the
PDF page.
Parameters
----------
image : tuple
"""
img, table_bbox = image
x_coord = []
y_coord = []
@ -63,6 +93,13 @@ def plot_joint(image):
def plot_line(segments):
"""Generates a plot for all line segments present on the PDF page.
Parameters
----------
segments : tuple
"""
vertical, horizontal = segments
for v in vertical:
plt.plot([v[0], v[2]], [v[1], v[3]])