Add textedge plot type

pull/2/head
Vinayak Mehta 2018-12-12 07:36:07 +05:30
parent 451fac9e53
commit 87a2f4fdc9
4 changed files with 75 additions and 6 deletions

View File

@ -138,7 +138,7 @@ def lattice(c, *args, **kwargs):
@click.option('-c', '--col_close_tol', default=0, help='Tolerance parameter'
' used to combine text horizontally, to generate columns.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'grid']),
type=click.Choice(['text', 'grid', 'contour', 'textedge']),
help='Plot elements found on PDF page for visual debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config

View File

@ -341,6 +341,7 @@ class Lattice(BaseParser):
table._text = _text
table._image = (self.image, self.table_bbox_unscaled)
table._segments = (self.vertical_segments, self.horizontal_segments)
table._textedges = None
return table

View File

@ -263,6 +263,7 @@ class Stream(BaseParser):
textedges.generate(textlines)
# select relevant edges
relevant_textedges = textedges.get_relevant()
self.textedges.extend(relevant_textedges)
# guess table areas using textlines and relevant edges
table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
# treat whole page as table area if no table areas found
@ -272,6 +273,7 @@ class Stream(BaseParser):
return table_bbox
def _generate_table_bbox(self):
self.textedges = []
if self.table_areas is not None:
table_bbox = {}
for area in self.table_areas:
@ -378,6 +380,7 @@ class Stream(BaseParser):
table._text = _text
table._image = None
table._segments = None
table._textedges = self.textedges
return table

View File

@ -33,7 +33,10 @@ class PlotMethods(object):
if not _HAS_MPL:
raise ImportError('matplotlib is required for plotting.')
if table.flavor == 'stream' and kind in ['contour', 'joint', 'line']:
if table.flavor == 'lattice' and kind in ['textedge']:
raise NotImplementedError("Lattice flavor does not support kind='{}'".format(
kind))
elif table.flavor == 'stream' and kind in ['joint', 'line']:
raise NotImplementedError("Stream flavor does not support kind='{}'".format(
kind))
@ -114,20 +117,82 @@ class PlotMethods(object):
fig : matplotlib.fig.Figure
"""
img, table_bbox = table._image
try:
img, table_bbox = table._image
_FOR_LATTICE = True
except TypeError:
img, table_bbox = (None, {table._bbox: None})
_FOR_LATTICE = False
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
xs, ys = [], []
if not _FOR_LATTICE:
for t in table._text:
xs.extend([t[0], t[2]])
ys.extend([t[1], t[3]])
ax.add_patch(
patches.Rectangle(
(t[0], t[1]),
t[2] - t[0],
t[3] - t[1],
color='blue'
)
)
for t in table_bbox.keys():
ax.add_patch(
patches.Rectangle(
(t[0], t[1]),
t[2] - t[0],
t[3] - t[1],
fill=None,
edgecolor='red'
fill=False,
color='red'
)
)
ax.imshow(img)
if not _FOR_LATTICE:
xs.extend([t[0], t[2]])
ys.extend([t[1], t[3]])
ax.set_xlim(min(xs) - 10, max(xs) + 10)
ax.set_ylim(min(ys) - 10, max(ys) + 10)
if _FOR_LATTICE:
ax.imshow(img)
return fig
def textedge(self, table):
"""Generates a plot for relevant textedges.
Parameters
----------
table : camelot.core.Table
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
xs, ys = [], []
for t in table._text:
xs.extend([t[0], t[2]])
ys.extend([t[1], t[3]])
ax.add_patch(
patches.Rectangle(
(t[0], t[1]),
t[2] - t[0],
t[3] - t[1],
color='blue'
)
)
ax.set_xlim(min(xs) - 10, max(xs) + 10)
ax.set_ylim(min(ys) - 10, max(ys) + 10)
for te in table._textedges:
ax.plot([te.x, te.x],
[te.y0, te.y1])
return fig
def joint(self, table):