camelot-py/camelot/imgproc.py

272 lines
8.3 KiB
Python

from itertools import groupby
from operator import itemgetter
import cv2
import numpy as np
from .utils import merge_tuples
def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
"""Thresholds an image using OpenCV's adaptiveThreshold.
Parameters
----------
imagename : string
Path to image file.
invert : bool
Whether or not to invert the image. Useful when pdfs have
tables with lines in background.
(optional, default: False)
blocksize: int
Size of a pixel neighborhood that is used to calculate a
threshold value for the pixel: 3, 5, 7, and so on.
c: float
Constant subtracted from the mean or weighted mean
(see the details below). Normally, it is positive but may be
zero or negative as well.
Returns
-------
img : object
numpy.ndarray representing the original image.
threshold : object
numpy.ndarray representing the thresholded image.
"""
img = cv2.imread(imagename)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
if invert:
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, blocksize, c)
else:
threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
return img, threshold
def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
"""Finds horizontal and vertical lines by applying morphological
transformations on an image.
Parameters
----------
threshold : object
numpy.ndarray representing the thresholded image.
direction : string
Specifies whether to find vertical or horizontal lines.
(default: 'horizontal')
scale : int
Used to divide the height/width to get a structuring element
for morph transform.
(optional, default: 15)
iterations : int
Number of iterations for dilation.
(optional, default: 2)
Returns
-------
dmask : object
numpy.ndarray representing pixels where vertical/horizontal
lines lie.
lines : list
List of tuples representing vertical/horizontal lines with
coordinates relative to a left-top origin in
OpenCV's coordinate space.
"""
lines = []
if direction == 'vertical':
size = threshold.shape[0] // scale
el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
elif direction == 'horizontal':
size = threshold.shape[1] // scale
el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
elif direction is None:
raise ValueError("Specify direction as either 'vertical' or"
" 'horizontal'")
threshold = cv2.erode(threshold, el)
threshold = cv2.dilate(threshold, el)
dmask = cv2.dilate(threshold, el, iterations=iterations)
try:
_, contours, _ = cv2.findContours(
threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
except ValueError:
contours, _ = cv2.findContours(
threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for c in contours:
x, y, w, h = cv2.boundingRect(c)
x1, x2 = x, x + w
y1, y2 = y, y + h
if direction == 'vertical':
lines.append(((x1 + x2) / 2, y2, (x1 + x2) / 2, y1))
elif direction == 'horizontal':
lines.append((x1, (y1 + y2) / 2, x2, (y1 + y2) / 2))
return dmask, lines
def find_table_contours(vertical, horizontal):
"""Finds table boundaries using OpenCV's findContours.
Parameters
----------
vertical : object
numpy.ndarray representing pixels where vertical lines lie.
horizontal : object
numpy.ndarray representing pixels where horizontal lines lie.
Returns
-------
cont : list
List of tuples representing table boundaries. Each tuple is of
the form (x, y, w, h) where (x, y) -> left-top, w -> width and
h -> height in OpenCV's coordinate space.
"""
mask = vertical + horizontal
try:
__, contours, __ = cv2.findContours(
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
except ValueError:
contours, __ = cv2.findContours(
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
cont = []
for c in contours:
c_poly = cv2.approxPolyDP(c, 3, True)
x, y, w, h = cv2.boundingRect(c_poly)
cont.append((x, y, w, h))
return cont
def find_table_joints(contours, vertical, horizontal):
"""Finds joints/intersections present inside each table boundary.
Parameters
----------
contours : list
List of tuples representing table boundaries. Each tuple is of
the form (x, y, w, h) where (x, y) -> left-top, w -> width and
h -> height in OpenCV's coordinate space.
vertical : object
numpy.ndarray representing pixels where vertical lines lie.
horizontal : object
numpy.ndarray representing pixels where horizontal lines lie.
Returns
-------
tables : dict
Dict with table boundaries as keys and list of intersections
in that boundary as their value.
Keys are of the form (x1, y1, x2, y2) where (x1, y1) -> lb
and (x2, y2) -> rt in OpenCV's coordinate space.
"""
joints = np.bitwise_and(vertical, horizontal)
tables = {}
for c in contours:
x, y, w, h = c
roi = joints[y : y + h, x : x + w]
try:
__, jc, __ = cv2.findContours(
roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
except ValueError:
jc, __ = cv2.findContours(
roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
if len(jc) <= 4: # remove contours with less than 4 joints
continue
joint_coords = []
for j in jc:
jx, jy, jw, jh = cv2.boundingRect(j)
c1, c2 = x + (2 * jx + jw) / 2, y + (2 * jy + jh) / 2
joint_coords.append((c1, c2))
tables[(x, y + h, x + w, y)] = joint_coords
return tables
def remove_lines(threshold, line_scale=15):
"""Removes lines from a thresholded image.
Parameters
----------
threshold : object
numpy.ndarray representing the thresholded image.
line_scale : int
Line scaling factor.
(optional, default: 15)
Returns
-------
threshold : object
numpy.ndarray representing the thresholded image
with horizontal and vertical lines removed.
"""
size = threshold.shape[0] // line_scale
vertical_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
horizontal_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
dilate_el = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
vertical = cv2.erode(threshold, vertical_erode_el)
vertical = cv2.dilate(vertical, dilate_el)
horizontal = cv2.erode(threshold, horizontal_erode_el)
horizontal = cv2.dilate(horizontal, dilate_el)
threshold = np.bitwise_and(threshold, np.invert(vertical))
threshold = np.bitwise_and(threshold, np.invert(horizontal))
return threshold
def find_cuts(threshold, char_scale=200):
"""Finds cuts made by text projections on y-axis.
Parameters
----------
threshold : object
numpy.ndarray representing the thresholded image.
char_scale : int
Char scaling factor.
(optional, default: 200)
Returns
-------
y_cuts : list
List of cuts on y-axis.
"""
size = threshold.shape[0] // char_scale
char_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
threshold = cv2.erode(threshold, char_el)
threshold = cv2.dilate(threshold, char_el)
try:
__, contours, __ = cv2.findContours(threshold, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
except ValueError:
contours, __ = cv2.findContours(threshold, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
contours = [cv2.boundingRect(c) for c in contours]
y_cuts = [(c[1], c[1] + c[3]) for c in contours]
y_cuts = list(merge_tuples(sorted(y_cuts)))
y_cuts = [(y_cuts[i][0] + y_cuts[i - 1][1]) / 2 for i in range(1, len(y_cuts))]
return sorted(y_cuts, reverse=True)