186 lines
4.4 KiB
Python
186 lines
4.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.patches as patches
|
|
|
|
|
|
def plot(table, plot_type='text', filepath=None):
|
|
"""Plot elements found on PDF page based on plot_type
|
|
specified, useful for debugging and playing with different
|
|
parameters to get the best output.
|
|
|
|
Parameters
|
|
----------
|
|
table: Table
|
|
A Camelot Table.
|
|
plot_type : str, optional (default: 'text')
|
|
{'text', 'table', 'contour', 'joint', 'line'}
|
|
The element type for which a plot should be generated.
|
|
filepath: str, optional (default: None)
|
|
Absolute path for saving the generated plot.
|
|
|
|
Returns
|
|
-------
|
|
fig : matplotlib.fig.Figure
|
|
|
|
"""
|
|
if table.flavor == 'stream' and plot_type in ['contour', 'joint', 'line']:
|
|
raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
|
|
plot_type))
|
|
if plot_type == 'text':
|
|
fig = plot_text(table._text)
|
|
elif plot_type == 'table':
|
|
fig = plot_table(table)
|
|
elif plot_type == 'contour':
|
|
fig = plot_contour(table._image)
|
|
elif plot_type == 'joint':
|
|
fig = plot_joint(table._image)
|
|
elif plot_type == 'line':
|
|
fig = plot_line(table._segments)
|
|
if filepath:
|
|
plt.savefig(filepath)
|
|
return fig
|
|
|
|
|
|
def plot_text(text):
|
|
"""Generates a plot for all text elements present
|
|
on the PDF page.
|
|
|
|
Parameters
|
|
----------
|
|
text : list
|
|
|
|
Returns
|
|
-------
|
|
fig : matplotlib.fig.Figure
|
|
|
|
"""
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
xs, ys = [], []
|
|
for t in text:
|
|
xs.extend([t[0], t[2]])
|
|
ys.extend([t[1], t[3]])
|
|
ax.add_patch(
|
|
patches.Rectangle(
|
|
(t[0], t[1]),
|
|
t[2] - t[0],
|
|
t[3] - t[1]
|
|
)
|
|
)
|
|
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
|
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
|
return fig
|
|
|
|
|
|
def plot_table(table):
|
|
"""Generates a plot for the detected tables
|
|
on the PDF page.
|
|
|
|
Parameters
|
|
----------
|
|
table : camelot.core.Table
|
|
|
|
Returns
|
|
-------
|
|
fig : matplotlib.fig.Figure
|
|
|
|
"""
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
for row in table.cells:
|
|
for cell in row:
|
|
if cell.left:
|
|
ax.plot([cell.lb[0], cell.lt[0]],
|
|
[cell.lb[1], cell.lt[1]])
|
|
if cell.right:
|
|
ax.plot([cell.rb[0], cell.rt[0]],
|
|
[cell.rb[1], cell.rt[1]])
|
|
if cell.top:
|
|
ax.plot([cell.lt[0], cell.rt[0]],
|
|
[cell.lt[1], cell.rt[1]])
|
|
if cell.bottom:
|
|
ax.plot([cell.lb[0], cell.rb[0]],
|
|
[cell.lb[1], cell.rb[1]])
|
|
return fig
|
|
|
|
|
|
def plot_contour(image):
|
|
"""Generates a plot for all table boundaries present
|
|
on the PDF page.
|
|
|
|
Parameters
|
|
----------
|
|
image : tuple
|
|
|
|
Returns
|
|
-------
|
|
fig : matplotlib.fig.Figure
|
|
|
|
"""
|
|
img, table_bbox = image
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
for t in table_bbox.keys():
|
|
ax.add_patch(
|
|
patches.Rectangle(
|
|
(t[0], t[1]),
|
|
t[2] - t[0],
|
|
t[3] - t[1],
|
|
fill=None,
|
|
edgecolor='red'
|
|
)
|
|
)
|
|
ax.imshow(img)
|
|
return fig
|
|
|
|
|
|
def plot_joint(image):
|
|
"""Generates a plot for all line intersections present
|
|
on the PDF page.
|
|
|
|
Parameters
|
|
----------
|
|
image : tuple
|
|
|
|
Returns
|
|
-------
|
|
fig : matplotlib.fig.Figure
|
|
|
|
"""
|
|
img, table_bbox = image
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
x_coord = []
|
|
y_coord = []
|
|
for k in table_bbox.keys():
|
|
for coord in table_bbox[k]:
|
|
x_coord.append(coord[0])
|
|
y_coord.append(coord[1])
|
|
ax.plot(x_coord, y_coord, 'ro')
|
|
ax.imshow(img)
|
|
return fig
|
|
|
|
|
|
def plot_line(segments):
|
|
"""Generates a plot for all line segments present
|
|
on the PDF page.
|
|
|
|
Parameters
|
|
----------
|
|
segments : tuple
|
|
|
|
Returns
|
|
-------
|
|
fig : matplotlib.fig.Figure
|
|
|
|
"""
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
vertical, horizontal = segments
|
|
for v in vertical:
|
|
ax.plot([v[0], v[2]], [v[1], v[3]])
|
|
for h in horizontal:
|
|
ax.plot([h[0], h[2]], [h[1], h[3]])
|
|
return fig
|