Add textedge plot type
parent
451fac9e53
commit
87a2f4fdc9
|
|
@ -138,7 +138,7 @@ def lattice(c, *args, **kwargs):
|
|||
@click.option('-c', '--col_close_tol', default=0, help='Tolerance parameter'
|
||||
' used to combine text horizontally, to generate columns.')
|
||||
@click.option('-plot', '--plot_type',
|
||||
type=click.Choice(['text', 'grid']),
|
||||
type=click.Choice(['text', 'grid', 'contour', 'textedge']),
|
||||
help='Plot elements found on PDF page for visual debugging.')
|
||||
@click.argument('filepath', type=click.Path(exists=True))
|
||||
@pass_config
|
||||
|
|
|
|||
|
|
@ -341,6 +341,7 @@ class Lattice(BaseParser):
|
|||
table._text = _text
|
||||
table._image = (self.image, self.table_bbox_unscaled)
|
||||
table._segments = (self.vertical_segments, self.horizontal_segments)
|
||||
table._textedges = None
|
||||
|
||||
return table
|
||||
|
||||
|
|
|
|||
|
|
@ -263,6 +263,7 @@ class Stream(BaseParser):
|
|||
textedges.generate(textlines)
|
||||
# select relevant edges
|
||||
relevant_textedges = textedges.get_relevant()
|
||||
self.textedges.extend(relevant_textedges)
|
||||
# guess table areas using textlines and relevant edges
|
||||
table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
|
||||
# treat whole page as table area if no table areas found
|
||||
|
|
@ -272,6 +273,7 @@ class Stream(BaseParser):
|
|||
return table_bbox
|
||||
|
||||
def _generate_table_bbox(self):
|
||||
self.textedges = []
|
||||
if self.table_areas is not None:
|
||||
table_bbox = {}
|
||||
for area in self.table_areas:
|
||||
|
|
@ -378,6 +380,7 @@ class Stream(BaseParser):
|
|||
table._text = _text
|
||||
table._image = None
|
||||
table._segments = None
|
||||
table._textedges = self.textedges
|
||||
|
||||
return table
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,10 @@ class PlotMethods(object):
|
|||
if not _HAS_MPL:
|
||||
raise ImportError('matplotlib is required for plotting.')
|
||||
|
||||
if table.flavor == 'stream' and kind in ['contour', 'joint', 'line']:
|
||||
if table.flavor == 'lattice' and kind in ['textedge']:
|
||||
raise NotImplementedError("Lattice flavor does not support kind='{}'".format(
|
||||
kind))
|
||||
elif table.flavor == 'stream' and kind in ['joint', 'line']:
|
||||
raise NotImplementedError("Stream flavor does not support kind='{}'".format(
|
||||
kind))
|
||||
|
||||
|
|
@ -114,22 +117,84 @@ class PlotMethods(object):
|
|||
fig : matplotlib.fig.Figure
|
||||
|
||||
"""
|
||||
try:
|
||||
img, table_bbox = table._image
|
||||
_FOR_LATTICE = True
|
||||
except TypeError:
|
||||
img, table_bbox = (None, {table._bbox: None})
|
||||
_FOR_LATTICE = False
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect='equal')
|
||||
|
||||
xs, ys = [], []
|
||||
if not _FOR_LATTICE:
|
||||
for t in table._text:
|
||||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]),
|
||||
t[2] - t[0],
|
||||
t[3] - t[1],
|
||||
color='blue'
|
||||
)
|
||||
)
|
||||
|
||||
for t in table_bbox.keys():
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]),
|
||||
t[2] - t[0],
|
||||
t[3] - t[1],
|
||||
fill=None,
|
||||
edgecolor='red'
|
||||
fill=False,
|
||||
color='red'
|
||||
)
|
||||
)
|
||||
if not _FOR_LATTICE:
|
||||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||
|
||||
if _FOR_LATTICE:
|
||||
ax.imshow(img)
|
||||
return fig
|
||||
|
||||
def textedge(self, table):
|
||||
"""Generates a plot for relevant textedges.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
table : camelot.core.Table
|
||||
|
||||
Returns
|
||||
-------
|
||||
fig : matplotlib.fig.Figure
|
||||
|
||||
"""
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111, aspect='equal')
|
||||
xs, ys = [], []
|
||||
for t in table._text:
|
||||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]),
|
||||
t[2] - t[0],
|
||||
t[3] - t[1],
|
||||
color='blue'
|
||||
)
|
||||
)
|
||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||
|
||||
for te in table._textedges:
|
||||
ax.plot([te.x, te.x],
|
||||
[te.y0, te.y1])
|
||||
|
||||
return fig
|
||||
|
||||
def joint(self, table):
|
||||
"""Generates a plot for all line intersections present
|
||||
on the PDF page.
|
||||
|
|
|
|||
Loading…
Reference in New Issue