Deprecate debug and add plot docstrings
parent
71d91fbebd
commit
7aaa7b2460
|
|
@ -77,17 +77,13 @@ class Lattice(BaseParser):
|
||||||
PDFMiner margins. (char_margin, line_margin, word_margin)
|
PDFMiner margins. (char_margin, line_margin, word_margin)
|
||||||
|
|
||||||
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
|
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
|
||||||
debug : bool, optional (default: False)
|
|
||||||
Whether or not to return all text objects on the page
|
|
||||||
which can be used to generate a matplotlib plot, to get
|
|
||||||
values for table_area(s) and debugging.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, table_area=None, process_background=False,
|
def __init__(self, table_area=None, process_background=False,
|
||||||
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
||||||
split_text=False, flag_size=False, line_close_tol=2,
|
split_text=False, flag_size=False, line_close_tol=2,
|
||||||
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
||||||
iterations=0, margins=(1.0, 0.5, 0.1), debug=False):
|
iterations=0, margins=(1.0, 0.5, 0.1)):
|
||||||
self.table_area = table_area
|
self.table_area = table_area
|
||||||
self.process_background = process_background
|
self.process_background = process_background
|
||||||
self.line_size_scaling = line_size_scaling
|
self.line_size_scaling = line_size_scaling
|
||||||
|
|
@ -101,7 +97,6 @@ class Lattice(BaseParser):
|
||||||
self.threshold_constant = threshold_constant
|
self.threshold_constant = threshold_constant
|
||||||
self.iterations = iterations
|
self.iterations = iterations
|
||||||
self.char_margin, self.line_margin, self.word_margin = margins
|
self.char_margin, self.line_margin, self.word_margin = margins
|
||||||
self.debug = debug
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _reduce_index(t, idx, shift_text):
|
def _reduce_index(t, idx, shift_text):
|
||||||
|
|
|
||||||
|
|
@ -47,15 +47,11 @@ class Stream(BaseParser):
|
||||||
PDFMiner margins. (char_margin, line_margin, word_margin)
|
PDFMiner margins. (char_margin, line_margin, word_margin)
|
||||||
|
|
||||||
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
|
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
|
||||||
debug : bool, optional (default: False)
|
|
||||||
Whether or not to return all text objects on the page
|
|
||||||
which can be used to generate a matplotlib plot, to get
|
|
||||||
values for table_area(s), columns and debugging.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, table_area=None, columns=None, split_text=False,
|
def __init__(self, table_area=None, columns=None, split_text=False,
|
||||||
flag_size=False, row_close_tol=2, col_close_tol=0,
|
flag_size=False, row_close_tol=2, col_close_tol=0,
|
||||||
margins=(1.0, 0.5, 0.1), debug=False):
|
margins=(1.0, 0.5, 0.1)):
|
||||||
self.table_area = table_area
|
self.table_area = table_area
|
||||||
self.columns = columns
|
self.columns = columns
|
||||||
self._validate_columns()
|
self._validate_columns()
|
||||||
|
|
@ -64,7 +60,6 @@ class Stream(BaseParser):
|
||||||
self.row_close_tol = row_close_tol
|
self.row_close_tol = row_close_tol
|
||||||
self.col_close_tol = col_close_tol
|
self.col_close_tol = col_close_tol
|
||||||
self.char_margin, self.line_margin, self.word_margin = margins
|
self.char_margin, self.line_margin, self.word_margin = margins
|
||||||
self.debug = debug
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _text_bbox(t_bbox):
|
def _text_bbox(t_bbox):
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,13 @@ import matplotlib.patches as patches
|
||||||
|
|
||||||
|
|
||||||
def plot_text(text):
|
def plot_text(text):
|
||||||
|
"""Generates a plot for all text present on the PDF page.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
text : list
|
||||||
|
|
||||||
|
"""
|
||||||
fig = plt.figure()
|
fig = plt.figure()
|
||||||
ax = fig.add_subplot(111, aspect='equal')
|
ax = fig.add_subplot(111, aspect='equal')
|
||||||
xs, ys = [], []
|
xs, ys = [], []
|
||||||
|
|
@ -23,6 +30,13 @@ def plot_text(text):
|
||||||
|
|
||||||
|
|
||||||
def plot_table(table):
|
def plot_table(table):
|
||||||
|
"""Generates a plot for the table.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
table : camelot.core.Table
|
||||||
|
|
||||||
|
"""
|
||||||
for row in table.cells:
|
for row in table.cells:
|
||||||
for cell in row:
|
for cell in row:
|
||||||
if cell.left:
|
if cell.left:
|
||||||
|
|
@ -41,6 +55,14 @@ def plot_table(table):
|
||||||
|
|
||||||
|
|
||||||
def plot_contour(image):
|
def plot_contour(image):
|
||||||
|
"""Generates a plot for all table boundaries present on the
|
||||||
|
PDF page.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
image : tuple
|
||||||
|
|
||||||
|
"""
|
||||||
img, table_bbox = image
|
img, table_bbox = image
|
||||||
for t in table_bbox.keys():
|
for t in table_bbox.keys():
|
||||||
cv2.rectangle(img, (t[0], t[1]),
|
cv2.rectangle(img, (t[0], t[1]),
|
||||||
|
|
@ -50,6 +72,14 @@ def plot_contour(image):
|
||||||
|
|
||||||
|
|
||||||
def plot_joint(image):
|
def plot_joint(image):
|
||||||
|
"""Generates a plot for all line intersections present on the
|
||||||
|
PDF page.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
image : tuple
|
||||||
|
|
||||||
|
"""
|
||||||
img, table_bbox = image
|
img, table_bbox = image
|
||||||
x_coord = []
|
x_coord = []
|
||||||
y_coord = []
|
y_coord = []
|
||||||
|
|
@ -63,6 +93,13 @@ def plot_joint(image):
|
||||||
|
|
||||||
|
|
||||||
def plot_line(segments):
|
def plot_line(segments):
|
||||||
|
"""Generates a plot for all line segments present on the PDF page.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
segments : tuple
|
||||||
|
|
||||||
|
"""
|
||||||
vertical, horizontal = segments
|
vertical, horizontal = segments
|
||||||
for v in vertical:
|
for v in vertical:
|
||||||
plt.plot([v[0], v[2]], [v[1], v[3]])
|
plt.plot([v[0], v[2]], [v[1], v[3]])
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue