camelot-py/camelot/plotting.py

186 lines
4.4 KiB
Python

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import matplotlib.patches as patches
def plot(table, plot_type='text', filepath=None):
"""Plot elements found on PDF page based on plot_type
specified, useful for debugging and playing with different
parameters to get the best output.
Parameters
----------
table: Table
A Camelot Table.
plot_type : str, optional (default: 'text')
{'text', 'table', 'contour', 'joint', 'line'}
The element type for which a plot should be generated.
filepath: str, optional (default: None)
Absolute path for saving the generated plot.
Returns
-------
fig : matplotlib.fig.Figure
"""
if table.flavor == 'stream' and plot_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
plot_type))
if plot_type == 'text':
fig = plot_text(table._text)
elif plot_type == 'table':
fig = plot_table(table)
elif plot_type == 'contour':
fig = plot_contour(table._image)
elif plot_type == 'joint':
fig = plot_joint(table._image)
elif plot_type == 'line':
fig = plot_line(table._segments)
if filepath:
plt.savefig(filepath)
return fig
def plot_text(text):
"""Generates a plot for all text elements present
on the PDF page.
Parameters
----------
text : list
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
xs, ys = [], []
for t in text:
xs.extend([t[0], t[2]])
ys.extend([t[1], t[3]])
ax.add_patch(
patches.Rectangle(
(t[0], t[1]),
t[2] - t[0],
t[3] - t[1]
)
)
ax.set_xlim(min(xs) - 10, max(xs) + 10)
ax.set_ylim(min(ys) - 10, max(ys) + 10)
return fig
def plot_table(table):
"""Generates a plot for the detected tables
on the PDF page.
Parameters
----------
table : camelot.core.Table
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
for row in table.cells:
for cell in row:
if cell.left:
ax.plot([cell.lb[0], cell.lt[0]],
[cell.lb[1], cell.lt[1]])
if cell.right:
ax.plot([cell.rb[0], cell.rt[0]],
[cell.rb[1], cell.rt[1]])
if cell.top:
ax.plot([cell.lt[0], cell.rt[0]],
[cell.lt[1], cell.rt[1]])
if cell.bottom:
ax.plot([cell.lb[0], cell.rb[0]],
[cell.lb[1], cell.rb[1]])
return fig
def plot_contour(image):
"""Generates a plot for all table boundaries present
on the PDF page.
Parameters
----------
image : tuple
Returns
-------
fig : matplotlib.fig.Figure
"""
img, table_bbox = image
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
for t in table_bbox.keys():
ax.add_patch(
patches.Rectangle(
(t[0], t[1]),
t[2] - t[0],
t[3] - t[1],
fill=None,
edgecolor='red'
)
)
ax.imshow(img)
return fig
def plot_joint(image):
"""Generates a plot for all line intersections present
on the PDF page.
Parameters
----------
image : tuple
Returns
-------
fig : matplotlib.fig.Figure
"""
img, table_bbox = image
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
x_coord = []
y_coord = []
for k in table_bbox.keys():
for coord in table_bbox[k]:
x_coord.append(coord[0])
y_coord.append(coord[1])
ax.plot(x_coord, y_coord, 'ro')
ax.imshow(img)
return fig
def plot_line(segments):
"""Generates a plot for all line segments present
on the PDF page.
Parameters
----------
segments : tuple
Returns
-------
fig : matplotlib.fig.Figure
"""
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
vertical, horizontal = segments
for v in vertical:
ax.plot([v[0], v[2]], [v[1], v[3]])
for h in horizontal:
ax.plot([h[0], h[2]], [h[1], h[3]])
return fig