parent
0bb6ce0bf9
commit
d86630e70b
|
|
@ -0,0 +1,98 @@
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def adaptive_threshold(imagename, invert=False):
|
||||||
|
img = cv2.imread(imagename)
|
||||||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
|
if invert:
|
||||||
|
threshold = cv2.adaptiveThreshold(
|
||||||
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,
|
||||||
|
15, -0.2)
|
||||||
|
else:
|
||||||
|
threshold = cv2.adaptiveThreshold(
|
||||||
|
np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
|
cv2.THRESH_BINARY,
|
||||||
|
15, -0.2)
|
||||||
|
return img, threshold
|
||||||
|
|
||||||
|
|
||||||
|
def find_lines(threshold, direction=None, scale=15):
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
if direction == 'vertical':
|
||||||
|
size = threshold.shape[0] // scale
|
||||||
|
el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
||||||
|
elif direction == 'horizontal':
|
||||||
|
size = threshold.shape[1] // scale
|
||||||
|
el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
|
||||||
|
elif direction is None:
|
||||||
|
raise ValueError("Specify direction as either 'vertical' or"
|
||||||
|
" 'horizontal'")
|
||||||
|
|
||||||
|
threshold = cv2.erode(threshold, el, (-1, -1))
|
||||||
|
threshold = cv2.dilate(threshold, el, (-1, -1))
|
||||||
|
|
||||||
|
dmask = threshold # findContours modifies source image
|
||||||
|
|
||||||
|
try:
|
||||||
|
_, contours, _ = cv2.findContours(
|
||||||
|
threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
except ValueError:
|
||||||
|
contours, _ = cv2.findContours(
|
||||||
|
threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
|
for c in contours:
|
||||||
|
x, y, w, h = cv2.boundingRect(c)
|
||||||
|
x1, x2 = x, x + w
|
||||||
|
y1, y2 = y, y + h
|
||||||
|
if direction == 'vertical':
|
||||||
|
lines.append(((x1 + x2) / 2, y2, (x1 + x2) / 2, y1))
|
||||||
|
elif direction == 'horizontal':
|
||||||
|
lines.append((x1, (y1 + y2) / 2, x2, (y1 + y2) / 2))
|
||||||
|
|
||||||
|
return dmask, lines
|
||||||
|
|
||||||
|
|
||||||
|
def find_table_contours(vertical, horizontal):
|
||||||
|
mask = vertical + horizontal
|
||||||
|
|
||||||
|
try:
|
||||||
|
__, contours, __ = cv2.findContours(
|
||||||
|
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
except ValueError:
|
||||||
|
contours, __ = cv2.findContours(
|
||||||
|
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
|
||||||
|
|
||||||
|
cont = []
|
||||||
|
for c in contours:
|
||||||
|
c_poly = cv2.approxPolyDP(c, 3, True)
|
||||||
|
x, y, w, h = cv2.boundingRect(c_poly)
|
||||||
|
cont.append((x, y, w, h))
|
||||||
|
return cont
|
||||||
|
|
||||||
|
|
||||||
|
def find_table_joints(contours, vertical, horizontal):
|
||||||
|
joints = np.bitwise_and(vertical, horizontal)
|
||||||
|
tables = {}
|
||||||
|
for c in contours:
|
||||||
|
x, y, w, h = c
|
||||||
|
roi = joints[y : y + h, x : x + w]
|
||||||
|
try:
|
||||||
|
__, jc, __ = cv2.findContours(
|
||||||
|
roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
except ValueError:
|
||||||
|
jc, __ = cv2.findContours(
|
||||||
|
roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
if len(jc) <= 4: # remove contours with less than 4 joints
|
||||||
|
continue
|
||||||
|
joint_coords = []
|
||||||
|
for j in jc:
|
||||||
|
jx, jy, jw, jh = cv2.boundingRect(j)
|
||||||
|
c1, c2 = x + (2 * jx + jw) / 2, y + (2 * jy + jh) / 2
|
||||||
|
joint_coords.append((c1, c2))
|
||||||
|
tables[(x, y + h, x + w, y)] = joint_coords
|
||||||
|
|
||||||
|
return tables
|
||||||
|
|
@ -4,15 +4,15 @@ import types
|
||||||
import copy_reg
|
import copy_reg
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import cv2
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from wand.image import Image
|
from wand.image import Image
|
||||||
|
|
||||||
|
from .imgproc import (adaptive_threshold, find_lines, find_table_contours,
|
||||||
|
find_table_joints)
|
||||||
from .table import Table
|
from .table import Table
|
||||||
from .utils import (transform, segments_bbox, text_bbox, detect_vertical, merge_close_values,
|
from .utils import (scale_to_pdf, scale_to_image, segments_bbox, text_bbox,
|
||||||
get_row_index, get_column_index, get_score, reduce_index,
|
detect_vertical, merge_close_values, get_row_index,
|
||||||
outline, fill_spanning, count_empty, encode_list, pdf_to_text)
|
get_column_index, get_score, reduce_index, outline,
|
||||||
|
fill_spanning, count_empty, encode_list, pdf_to_text)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Lattice']
|
__all__ = ['Lattice']
|
||||||
|
|
@ -26,128 +26,6 @@ def _reduce_method(m):
|
||||||
copy_reg.pickle(types.MethodType, _reduce_method)
|
copy_reg.pickle(types.MethodType, _reduce_method)
|
||||||
|
|
||||||
|
|
||||||
def _morph_transform(imagename, scale=15, invert=False):
|
|
||||||
"""Morphological Transformation
|
|
||||||
|
|
||||||
Applies a series of morphological operations on the image
|
|
||||||
to find table contours and line segments.
|
|
||||||
http://answers.opencv.org/question/63847/how-to-extract-tables-from-an-image/
|
|
||||||
|
|
||||||
Empirical result for adaptiveThreshold's blockSize=5 and C=-0.2
|
|
||||||
taken from http://pequan.lip6.fr/~bereziat/pima/2012/seuillage/sezgin04.pdf
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
imagename : Path to image.
|
|
||||||
|
|
||||||
scale : int
|
|
||||||
Scaling factor. Large scaling factor leads to smaller lines
|
|
||||||
being detected. (optional, default: 15)
|
|
||||||
|
|
||||||
invert : bool
|
|
||||||
Invert pdf image to make sure that lines are in foreground.
|
|
||||||
(optional, default: False)
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
img : ndarray
|
|
||||||
|
|
||||||
tables : dict
|
|
||||||
Dictionary with table bounding box as key and list of
|
|
||||||
joints found in the table as value.
|
|
||||||
|
|
||||||
v_segments : list
|
|
||||||
List of vertical line segments found in the image.
|
|
||||||
|
|
||||||
h_segments : list
|
|
||||||
List of horizontal line segments found in the image.
|
|
||||||
"""
|
|
||||||
img = cv2.imread(imagename)
|
|
||||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
||||||
|
|
||||||
if invert:
|
|
||||||
threshold = cv2.adaptiveThreshold(
|
|
||||||
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,
|
|
||||||
15, -0.2)
|
|
||||||
else:
|
|
||||||
threshold = cv2.adaptiveThreshold(
|
|
||||||
np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
||||||
cv2.THRESH_BINARY,
|
|
||||||
15, -0.2)
|
|
||||||
|
|
||||||
vertical = threshold
|
|
||||||
horizontal = threshold
|
|
||||||
|
|
||||||
verticalsize = vertical.shape[0] // scale
|
|
||||||
horizontalsize = horizontal.shape[1] // scale
|
|
||||||
|
|
||||||
ver = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
|
|
||||||
hor = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontalsize, 1))
|
|
||||||
|
|
||||||
vertical = cv2.erode(vertical, ver, (-1, -1))
|
|
||||||
vertical = cv2.dilate(vertical, ver, (-1, -1))
|
|
||||||
|
|
||||||
horizontal = cv2.erode(horizontal, hor, (-1, -1))
|
|
||||||
horizontal = cv2.dilate(horizontal, hor, (-1, -1))
|
|
||||||
|
|
||||||
mask = vertical + horizontal
|
|
||||||
joints = np.bitwise_and(vertical, horizontal)
|
|
||||||
try:
|
|
||||||
__, contours, __ = cv2.findContours(
|
|
||||||
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
except ValueError:
|
|
||||||
contours, __ = cv2.findContours(
|
|
||||||
mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
|
|
||||||
|
|
||||||
tables = {}
|
|
||||||
for c in contours:
|
|
||||||
c_poly = cv2.approxPolyDP(c, 3, True)
|
|
||||||
x, y, w, h = cv2.boundingRect(c_poly)
|
|
||||||
roi = joints[y : y + h, x : x + w]
|
|
||||||
try:
|
|
||||||
__, jc, __ = cv2.findContours(
|
|
||||||
roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
except ValueError:
|
|
||||||
jc, __ = cv2.findContours(
|
|
||||||
roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
if len(jc) <= 4: # remove contours with less than <=4 joints
|
|
||||||
continue
|
|
||||||
joint_coords = []
|
|
||||||
for j in jc:
|
|
||||||
jx, jy, jw, jh = cv2.boundingRect(j)
|
|
||||||
c1, c2 = x + (2 * jx + jw) / 2, y + (2 * jy + jh) / 2
|
|
||||||
joint_coords.append((c1, c2))
|
|
||||||
tables[(x, y + h, x + w, y)] = joint_coords
|
|
||||||
|
|
||||||
v_segments, h_segments = [], []
|
|
||||||
try:
|
|
||||||
_, vcontours, _ = cv2.findContours(
|
|
||||||
vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
except ValueError:
|
|
||||||
vcontours, _ = cv2.findContours(
|
|
||||||
vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
for vc in vcontours:
|
|
||||||
x, y, w, h = cv2.boundingRect(vc)
|
|
||||||
x1, x2 = x, x + w
|
|
||||||
y1, y2 = y, y + h
|
|
||||||
v_segments.append(((x1 + x2) / 2, y2, (x1 + x2) / 2, y1))
|
|
||||||
|
|
||||||
try:
|
|
||||||
_, hcontours, _ = cv2.findContours(
|
|
||||||
horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
except ValueError:
|
|
||||||
hcontours, _ = cv2.findContours(
|
|
||||||
horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
for hc in hcontours:
|
|
||||||
x, y, w, h = cv2.boundingRect(hc)
|
|
||||||
x1, x2 = x, x + w
|
|
||||||
y1, y2 = y, y + h
|
|
||||||
h_segments.append((x1, (y1 + y2) / 2, x2, (y1 + y2) / 2))
|
|
||||||
|
|
||||||
return img, tables, v_segments, h_segments
|
|
||||||
|
|
||||||
|
|
||||||
class Lattice:
|
class Lattice:
|
||||||
"""Lattice algorithm
|
"""Lattice algorithm
|
||||||
|
|
||||||
|
|
@ -188,17 +66,17 @@ class Lattice:
|
||||||
Dictionary with page number as key and list of tables on that
|
Dictionary with page number as key and list of tables on that
|
||||||
page as value.
|
page as value.
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, table_area=None, fill=None, jtol=[2], mtol=[2], scale=15,
|
||||||
def __init__(self, fill=None, scale=15, jtol=2, mtol=2,
|
invert=False, margins=(2.0, 0.5, 0.1), debug=None):
|
||||||
invert=False, pdf_margin=(2.0, 0.5, 0.1), debug=None):
|
|
||||||
|
|
||||||
self.method = 'lattice'
|
self.method = 'lattice'
|
||||||
|
self.table_area = table_area
|
||||||
self.fill = fill
|
self.fill = fill
|
||||||
self.scale = scale
|
|
||||||
self.jtol = jtol
|
self.jtol = jtol
|
||||||
self.mtol = mtol
|
self.mtol = mtol
|
||||||
|
self.scale = scale
|
||||||
self.invert = invert
|
self.invert = invert
|
||||||
self.char_margin, self.line_margin, self.word_margin = pdf_margin
|
self.char_margin, self.line_margin, self.word_margin = margins
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
|
|
||||||
def get_tables(self, pdfname):
|
def get_tables(self, pdfname):
|
||||||
|
|
@ -217,48 +95,79 @@ class Lattice:
|
||||||
logging.warning("{0}: PDF has no text. It may be an image.".format(
|
logging.warning("{0}: PDF has no text. It may be an image.".format(
|
||||||
os.path.basename(bname)))
|
os.path.basename(bname)))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
imagename = ''.join([bname, '.png'])
|
imagename = ''.join([bname, '.png'])
|
||||||
with Image(filename=pdfname, depth=8, resolution=300) as png:
|
with Image(filename=pdfname, depth=8, resolution=300) as png:
|
||||||
png.save(filename=imagename)
|
png.save(filename=imagename)
|
||||||
|
|
||||||
|
img, threshold = adaptive_threshold(imagename, invert=self.invert)
|
||||||
pdf_x = width
|
pdf_x = width
|
||||||
pdf_y = height
|
pdf_y = height
|
||||||
img, table_bbox, v_segments, h_segments = _morph_transform(
|
|
||||||
imagename, scale=self.scale, invert=self.invert)
|
|
||||||
img_x = img.shape[1]
|
img_x = img.shape[1]
|
||||||
img_y = img.shape[0]
|
img_y = img.shape[0]
|
||||||
scaling_factor_x = pdf_x / float(img_x)
|
sc_x_image = img_x / float(pdf_x)
|
||||||
scaling_factor_y = pdf_y / float(img_y)
|
sc_y_image = img_y / float(pdf_y)
|
||||||
|
sc_x_pdf = pdf_x / float(img_x)
|
||||||
|
sc_y_pdf = pdf_y / float(img_y)
|
||||||
|
factors_image = (sc_x_image, sc_y_image, pdf_y)
|
||||||
|
factors_pdf = (sc_x_pdf, sc_y_pdf, img_y)
|
||||||
|
|
||||||
|
vmask, v_segments = find_lines(threshold, direction='vertical',
|
||||||
|
scale=self.scale)
|
||||||
|
hmask, h_segments = find_lines(threshold, direction='horizontal',
|
||||||
|
scale=self.scale)
|
||||||
|
|
||||||
|
if self.table_area:
|
||||||
|
if self.fill:
|
||||||
|
if len(self.table_area) != len(self.fill):
|
||||||
|
raise ValueError("message")
|
||||||
|
if len(self.jtol) == 1 and self.jtol[0] == 2:
|
||||||
|
self.jtol = self.jtol * len(self.table_area)
|
||||||
|
if len(self.mtol) == 1 and self.mtol[0] == 2:
|
||||||
|
self.mtol = self.mtol * len(self.table_area)
|
||||||
|
areas = []
|
||||||
|
for area in self.table_area:
|
||||||
|
x1, y1, x2, y2 = area.split(",")
|
||||||
|
x1 = int(x1)
|
||||||
|
y1 = int(y1)
|
||||||
|
x2 = int(x2)
|
||||||
|
y2 = int(y2)
|
||||||
|
x1, y1, x2, y2 = scale_to_image((x1, y1, x2, y2), factors_image)
|
||||||
|
areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
|
||||||
|
table_bbox = find_table_joints(areas, vmask, hmask)
|
||||||
|
else:
|
||||||
|
contours = find_table_contours(vmask, hmask)
|
||||||
|
table_bbox = find_table_joints(contours, vmask, hmask)
|
||||||
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
self.debug_images = (img, table_bbox)
|
self.debug_images = (img, table_bbox)
|
||||||
|
|
||||||
factors = (scaling_factor_x, scaling_factor_y, img_y)
|
table_bbox, v_segments, h_segments = scale_to_pdf(table_bbox, v_segments,
|
||||||
table_bbox, v_segments, h_segments = transform(table_bbox, v_segments,
|
h_segments, factors_pdf)
|
||||||
h_segments, factors)
|
|
||||||
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
self.debug_segments = (v_segments, h_segments)
|
self.debug_segments = (v_segments, h_segments)
|
||||||
self.debug_tables = []
|
self.debug_tables = []
|
||||||
|
|
||||||
pdf_page = {}
|
page = {}
|
||||||
page_tables = {}
|
tables = {}
|
||||||
table_no = 1
|
table_no = 0
|
||||||
# sort tables based on y-coord
|
# sort tables based on y-coord
|
||||||
for k in sorted(table_bbox.keys(), key=lambda x: x[1], reverse=True):
|
for k in sorted(table_bbox.keys(), key=lambda x: x[1], reverse=True):
|
||||||
# select edges which lie within table_bbox
|
# select elements which lie within table_bbox
|
||||||
table_info = {}
|
table_data = {}
|
||||||
v_s, h_s = segments_bbox(k, v_segments, h_segments)
|
v_s, h_s = segments_bbox(k, v_segments, h_segments)
|
||||||
t_bbox = text_bbox(k, text)
|
t_bbox = text_bbox(k, text)
|
||||||
table_info['text_p'] = 100 * (1 - (len(t_bbox) / len(text)))
|
table_data['text_p'] = 100 * (1 - (len(t_bbox) / len(text)))
|
||||||
table_rotation = detect_vertical(t_bbox)
|
table_rotation = detect_vertical(t_bbox)
|
||||||
cols, rows = zip(*table_bbox[k])
|
cols, rows = zip(*table_bbox[k])
|
||||||
cols, rows = list(cols), list(rows)
|
cols, rows = list(cols), list(rows)
|
||||||
cols.extend([k[0], k[2]])
|
cols.extend([k[0], k[2]])
|
||||||
rows.extend([k[1], k[3]])
|
rows.extend([k[1], k[3]])
|
||||||
# sort horizontal and vertical segments
|
# sort horizontal and vertical segments
|
||||||
cols = merge_close_values(sorted(cols), mtol=self.mtol)
|
cols = merge_close_values(sorted(cols), mtol=self.mtol[table_no])
|
||||||
rows = merge_close_values(
|
rows = merge_close_values(
|
||||||
sorted(rows, reverse=True), mtol=self.mtol)
|
sorted(rows, reverse=True), mtol=self.mtol[table_no])
|
||||||
# make grid using x and y coord of shortlisted rows and cols
|
# make grid using x and y coord of shortlisted rows and cols
|
||||||
cols = [(cols[i], cols[i + 1])
|
cols = [(cols[i], cols[i + 1])
|
||||||
for i in range(0, len(cols) - 1)]
|
for i in range(0, len(cols) - 1)]
|
||||||
|
|
@ -266,9 +175,9 @@ class Lattice:
|
||||||
for i in range(0, len(rows) - 1)]
|
for i in range(0, len(rows) - 1)]
|
||||||
table = Table(cols, rows)
|
table = Table(cols, rows)
|
||||||
# set table edges to True using ver+hor lines
|
# set table edges to True using ver+hor lines
|
||||||
table = table.set_edges(v_s, h_s, jtol=self.jtol)
|
table = table.set_edges(v_s, h_s, jtol=self.jtol[table_no])
|
||||||
nouse = table.nocont_ / (len(v_s) + len(h_s))
|
nouse = table.nocont_ / (len(v_s) + len(h_s))
|
||||||
table_info['line_p'] = 100 * (1 - nouse)
|
table_data['line_p'] = 100 * (1 - nouse)
|
||||||
# set spanning cells to True
|
# set spanning cells to True
|
||||||
table = table.set_spanning()
|
table = table.set_spanning()
|
||||||
# set table border edges to True
|
# set table border edges to True
|
||||||
|
|
@ -314,10 +223,10 @@ class Lattice:
|
||||||
for t in t_bbox]))
|
for t in t_bbox]))
|
||||||
|
|
||||||
score = get_score([[50, rerror], [50, cerror]])
|
score = get_score([[50, rerror], [50, cerror]])
|
||||||
table_info['score'] = score
|
table_data['score'] = score
|
||||||
|
|
||||||
if self.fill is not None:
|
if self.fill:
|
||||||
table = fill_spanning(table, fill=self.fill)
|
table = fill_spanning(table, fill=self.fill[table_no])
|
||||||
ar = table.get_list()
|
ar = table.get_list()
|
||||||
if table_rotation == 'left':
|
if table_rotation == 'left':
|
||||||
ar = zip(*ar[::-1])
|
ar = zip(*ar[::-1])
|
||||||
|
|
@ -325,18 +234,18 @@ class Lattice:
|
||||||
ar = zip(*ar[::1])
|
ar = zip(*ar[::1])
|
||||||
ar.reverse()
|
ar.reverse()
|
||||||
ar = encode_list(ar)
|
ar = encode_list(ar)
|
||||||
table_info['data'] = ar
|
table_data['data'] = ar
|
||||||
empty_p, r_nempty_cells, c_nempty_cells = count_empty(ar)
|
empty_p, r_nempty_cells, c_nempty_cells = count_empty(ar)
|
||||||
table_info['empty_p'] = empty_p
|
table_data['empty_p'] = empty_p
|
||||||
table_info['r_nempty_cells'] = r_nempty_cells
|
table_data['r_nempty_cells'] = r_nempty_cells
|
||||||
table_info['c_nempty_cells'] = c_nempty_cells
|
table_data['c_nempty_cells'] = c_nempty_cells
|
||||||
table_info['nrows'] = len(ar)
|
table_data['nrows'] = len(ar)
|
||||||
table_info['ncols'] = len(ar[0])
|
table_data['ncols'] = len(ar[0])
|
||||||
page_tables['table_{0}'.format(table_no)] = table_info
|
tables['table-{0}'.format(table_no + 1)] = table_data
|
||||||
table_no += 1
|
table_no += 1
|
||||||
pdf_page[os.path.basename(bname)] = page_tables
|
page[os.path.basename(bname)] = tables
|
||||||
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return pdf_page
|
return page
|
||||||
|
|
@ -7,7 +7,8 @@ import logging
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from .table import Table
|
from .table import Table
|
||||||
from .utils import get_row_index, get_score, count_empty, encode_list, pdf_to_text
|
from .utils import (get_row_index, get_score, count_empty, encode_list,
|
||||||
|
pdf_to_text, text_bbox)
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Stream']
|
__all__ = ['Stream']
|
||||||
|
|
@ -133,6 +134,17 @@ def _get_column_index(t, columns):
|
||||||
return c_idx, error
|
return c_idx, error
|
||||||
|
|
||||||
|
|
||||||
|
def _join_rows(rows_grouped, text_y_max, text_y_min):
|
||||||
|
row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r)
|
||||||
|
if len(r) > 0 else 0 for r in rows_grouped]
|
||||||
|
rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
|
||||||
|
rows.insert(0, text_y_max)
|
||||||
|
rows.append(text_y_min)
|
||||||
|
rows = [(rows[i], rows[i + 1])
|
||||||
|
for i in range(0, len(rows) - 1)]
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
def _add_columns(cols, text, ytolerance):
|
def _add_columns(cols, text, ytolerance):
|
||||||
if text:
|
if text:
|
||||||
text = _group_rows(text, ytol=ytolerance)
|
text = _group_rows(text, ytol=ytolerance)
|
||||||
|
|
@ -143,14 +155,6 @@ def _add_columns(cols, text, ytolerance):
|
||||||
return cols
|
return cols
|
||||||
|
|
||||||
|
|
||||||
def _get_table_bounds(rows):
|
|
||||||
x0 = min([t.x0 for r in rows for t in r])
|
|
||||||
x1 = max([t.x1 for r in rows for t in r])
|
|
||||||
y0 = min([t.y0 for t in rows[-1]])
|
|
||||||
y1 = max([t.y1 for t in rows[0]])
|
|
||||||
return x0, x1, y0, y1
|
|
||||||
|
|
||||||
|
|
||||||
def _join_columns(cols, text_x_min, text_x_max):
|
def _join_columns(cols, text_x_min, text_x_max):
|
||||||
cols = sorted(cols)
|
cols = sorted(cols)
|
||||||
cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
|
cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
|
||||||
|
|
@ -194,16 +198,16 @@ class Stream:
|
||||||
Dictionary with page number as key and list of tables on that
|
Dictionary with page number as key and list of tables on that
|
||||||
page as value.
|
page as value.
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, table_area=None, columns=None, ncolumns=None, ytol=[2],
|
||||||
def __init__(self, ncolumns=0, columns=None, ytol=2, mtol=2,
|
mtol=[2], margins=(2.0, 0.5, 0.1), debug=False):
|
||||||
pdf_margin=(2.0, 0.5, 0.1), debug=False):
|
|
||||||
|
|
||||||
self.method = 'stream'
|
self.method = 'stream'
|
||||||
self.ncolumns = ncolumns
|
self.table_area = table_area
|
||||||
self.columns = columns
|
self.columns = columns
|
||||||
|
self.ncolumns = ncolumns
|
||||||
self.ytol = ytol
|
self.ytol = ytol
|
||||||
self.mtol = mtol
|
self.mtol = mtol
|
||||||
self.char_margin, self.line_margin, self.word_margin = pdf_margin
|
self.char_margin, self.line_margin, self.word_margin = margins
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
|
|
||||||
def get_tables(self, pdfname):
|
def get_tables(self, pdfname):
|
||||||
|
|
@ -222,44 +226,68 @@ class Stream:
|
||||||
logging.warning("{0}: PDF has no text. It may be an image.".format(
|
logging.warning("{0}: PDF has no text. It may be an image.".format(
|
||||||
os.path.basename(bname)))
|
os.path.basename(bname)))
|
||||||
return None
|
return None
|
||||||
text.sort(key=lambda x: (-x.y0, x.x0))
|
|
||||||
|
|
||||||
if self.debug:
|
if self.debug:
|
||||||
self.debug_text = [(t.x0, t.y0, t.x1, t.y1) for t in text]
|
self.debug_text = [(t.x0, t.y0, t.x1, t.y1) for t in text]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
rows_grouped = _group_rows(text, ytol=self.ytol)
|
if self.table_area:
|
||||||
|
if self.columns:
|
||||||
|
if len(self.table_area) != len(self.columns):
|
||||||
|
raise ValueError("message")
|
||||||
|
if self.ncolumns:
|
||||||
|
if len(self.table_area) != len(self.ncolumns):
|
||||||
|
raise ValueError("message")
|
||||||
|
if len(self.ytol) == 1 and self.ytol[0] == 2:
|
||||||
|
self.ytol = self.ytol * len(self.table_area)
|
||||||
|
if len(self.mtol) == 1 and self.mtol[0] == 2:
|
||||||
|
self.mtol = self.mtol * len(self.table_area)
|
||||||
|
table_bbox = {}
|
||||||
|
for area in self.table_area:
|
||||||
|
x1, y1, x2, y2 = area.split(",")
|
||||||
|
x1 = int(x1)
|
||||||
|
y1 = int(y1)
|
||||||
|
x2 = int(x2)
|
||||||
|
y2 = int(y2)
|
||||||
|
table_bbox[(x1, y2, x2, y1)] = None
|
||||||
|
else:
|
||||||
|
table_bbox = {(0, height, width, 0): None}
|
||||||
|
|
||||||
|
page = {}
|
||||||
|
tables = {}
|
||||||
|
table_no = 0
|
||||||
|
# sort tables based on y-coord
|
||||||
|
for k in sorted(table_bbox.keys(), key=lambda x: x[1], reverse=True):
|
||||||
|
# select elements which lie within table_bbox
|
||||||
|
table_data = {}
|
||||||
|
t_bbox = text_bbox(k, text)
|
||||||
|
t_bbox.sort(key=lambda x: (-x.y0, x.x0))
|
||||||
|
|
||||||
|
rows_grouped = _group_rows(t_bbox, ytol=self.ytol[table_no])
|
||||||
|
rows = _join_rows(rows_grouped, k[3], k[1])
|
||||||
elements = [len(r) for r in rows_grouped]
|
elements = [len(r) for r in rows_grouped]
|
||||||
row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r)
|
|
||||||
if len(r) > 0 else 0 for r in rows_grouped]
|
|
||||||
rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
|
|
||||||
bounds = _get_table_bounds(rows_grouped)
|
|
||||||
rows.insert(0, bounds[3])
|
|
||||||
rows.append(bounds[2])
|
|
||||||
rows = [(rows[i], rows[i + 1])
|
|
||||||
for i in range(0, len(rows) - 1)]
|
|
||||||
|
|
||||||
guess = False
|
guess = False
|
||||||
if self.columns:
|
if self.columns and self.columns[table_no] != "":
|
||||||
# user has to input boundary columns too
|
# user has to input boundary columns too
|
||||||
# take (0, width) by default
|
# take (0, width) by default
|
||||||
# similar to else condition
|
# similar to else condition
|
||||||
# len can't be 1
|
# len can't be 1
|
||||||
cols = self.columns.split(',')
|
cols = self.columns[table_no].split(',')
|
||||||
cols = [(float(cols[i]), float(cols[i + 1]))
|
cols = [(float(cols[i]), float(cols[i + 1]))
|
||||||
for i in range(0, len(cols) - 1)]
|
for i in range(0, len(cols) - 1)]
|
||||||
else:
|
else:
|
||||||
if self.ncolumns:
|
if self.ncolumns and self.ncolumns[table_no] != -1:
|
||||||
ncols = self.ncolumns
|
ncols = self.ncolumns[table_no]
|
||||||
cols = [(t.x0, t.x1)
|
cols = [(t.x0, t.x1)
|
||||||
for r in rows_grouped if len(r) == ncols for t in r]
|
for r in rows_grouped if len(r) == ncols for t in r]
|
||||||
cols = _merge_columns(sorted(cols), mtol=self.mtol)
|
cols = _merge_columns(sorted(cols), mtol=self.mtol[table_no])
|
||||||
if len(cols) != self.ncolumns:
|
if len(cols) != self.ncolumns[table_no]:
|
||||||
logging.warning("{}: The number of columns after merge"
|
logging.warning("{}: The number of columns after merge"
|
||||||
" isn't the same as what you specified."
|
" isn't the same as what you specified."
|
||||||
" Change the value of mtol.".format(
|
" Change the value of mtol.".format(
|
||||||
os.path.basename(bname)))
|
os.path.basename(bname)))
|
||||||
cols = _join_columns(cols, bounds[0], bounds[1])
|
cols = _join_columns(cols, k[0], k[2])
|
||||||
else:
|
else:
|
||||||
guess = True
|
guess = True
|
||||||
ncols = max(set(elements), key=elements.count)
|
ncols = max(set(elements), key=elements.count)
|
||||||
|
|
@ -272,7 +300,7 @@ class Stream:
|
||||||
os.path.basename(bname)))
|
os.path.basename(bname)))
|
||||||
cols = [(t.x0, t.x1)
|
cols = [(t.x0, t.x1)
|
||||||
for r in rows_grouped if len(r) == ncols for t in r]
|
for r in rows_grouped if len(r) == ncols for t in r]
|
||||||
cols = _merge_columns(sorted(cols), mtol=self.mtol)
|
cols = _merge_columns(sorted(cols), mtol=self.mtol[table_no])
|
||||||
inner_text = []
|
inner_text = []
|
||||||
for i in range(1, len(cols)):
|
for i in range(1, len(cols)):
|
||||||
left = cols[i - 1][1]
|
left = cols[i - 1][1]
|
||||||
|
|
@ -280,12 +308,9 @@ class Stream:
|
||||||
inner_text.extend([t for t in text if t.x0 > left and t.x1 < right])
|
inner_text.extend([t for t in text if t.x0 > left and t.x1 < right])
|
||||||
outer_text = [t for t in text if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
|
outer_text = [t for t in text if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
|
||||||
inner_text.extend(outer_text)
|
inner_text.extend(outer_text)
|
||||||
cols = _add_columns(cols, inner_text, self.ytol)
|
cols = _add_columns(cols, inner_text, self.ytol[table_no])
|
||||||
cols = _join_columns(cols, bounds[0], bounds[1])
|
cols = _join_columns(cols, k[0], k[2])
|
||||||
|
|
||||||
pdf_page = {}
|
|
||||||
page_tables = {}
|
|
||||||
table_info = {}
|
|
||||||
table = Table(cols, rows)
|
table = Table(cols, rows)
|
||||||
rerror = []
|
rerror = []
|
||||||
cerror = []
|
cerror = []
|
||||||
|
|
@ -295,13 +320,11 @@ class Stream:
|
||||||
r_idx, rass_error = get_row_index(t, rows)
|
r_idx, rass_error = get_row_index(t, rows)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
# couldn't assign LTTextLH to any cell
|
# couldn't assign LTTextLH to any cell
|
||||||
vprint(e.message)
|
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
c_idx, cass_error = _get_column_index(t, cols)
|
c_idx, cass_error = _get_column_index(t, cols)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
# couldn't assign LTTextLH to any cell
|
# couldn't assign LTTextLH to any cell
|
||||||
vprint(e.message)
|
|
||||||
continue
|
continue
|
||||||
rerror.append(rass_error)
|
rerror.append(rass_error)
|
||||||
cerror.append(cass_error)
|
cerror.append(cass_error)
|
||||||
|
|
@ -311,17 +334,18 @@ class Stream:
|
||||||
score = get_score([[33, rerror], [33, cerror], [34, [len_non_mode / len(elements)]]])
|
score = get_score([[33, rerror], [33, cerror], [34, [len_non_mode / len(elements)]]])
|
||||||
else:
|
else:
|
||||||
score = get_score([[50, rerror], [50, cerror]])
|
score = get_score([[50, rerror], [50, cerror]])
|
||||||
table_info['score'] = score
|
|
||||||
ar = table.get_list()
|
|
||||||
ar = encode_list(ar)
|
|
||||||
table_info['data'] = ar
|
|
||||||
empty_p, r_nempty_cells, c_nempty_cells = count_empty(ar)
|
|
||||||
table_info['empty_p'] = empty_p
|
|
||||||
table_info['r_nempty_cells'] = r_nempty_cells
|
|
||||||
table_info['c_nempty_cells'] = c_nempty_cells
|
|
||||||
table_info['nrows'] = len(ar)
|
|
||||||
table_info['ncols'] = len(ar[0])
|
|
||||||
page_tables['table_1'] = table_info
|
|
||||||
pdf_page[os.path.basename(bname)] = page_tables
|
|
||||||
|
|
||||||
return pdf_page
|
table_data['score'] = score
|
||||||
|
ar = encode_list(table.get_list())
|
||||||
|
table_data['data'] = ar
|
||||||
|
empty_p, r_nempty_cells, c_nempty_cells = count_empty(ar)
|
||||||
|
table_data['empty_p'] = empty_p
|
||||||
|
table_data['r_nempty_cells'] = r_nempty_cells
|
||||||
|
table_data['c_nempty_cells'] = c_nempty_cells
|
||||||
|
table_data['nrows'] = len(ar)
|
||||||
|
table_data['ncols'] = len(ar[0])
|
||||||
|
tables['table-{0}'.format(table_no + 1)] = table_data
|
||||||
|
table_no += 1
|
||||||
|
page[os.path.basename(bname)] = tables
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
@ -81,7 +81,17 @@ def rotate(x1, y1, x2, y2, angle):
|
||||||
return xnew, ynew
|
return xnew, ynew
|
||||||
|
|
||||||
|
|
||||||
def transform(tables, v_segments, h_segments, factors):
|
def scale_to_image(k, factors):
|
||||||
|
x1, y1, x2, y2 = k
|
||||||
|
scaling_factor_x, scaling_factor_y, pdf_y = factors
|
||||||
|
x1 = scale(x1, scaling_factor_x)
|
||||||
|
y1 = scale(abs(translate(-pdf_y, y1)), scaling_factor_y)
|
||||||
|
x2 = scale(x2, scaling_factor_x)
|
||||||
|
y2 = scale(abs(translate(-pdf_y, y2)), scaling_factor_y)
|
||||||
|
return int(x1), int(y1), int(x2), int(y2)
|
||||||
|
|
||||||
|
|
||||||
|
def scale_to_pdf(tables, v_segments, h_segments, factors):
|
||||||
"""Translates and scales OpenCV coordinates to PDFMiner coordinate
|
"""Translates and scales OpenCV coordinates to PDFMiner coordinate
|
||||||
space.
|
space.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,9 +40,9 @@ options:
|
||||||
-W, --wmargin <wmargin> Word margin. Insert blank spaces between chars
|
-W, --wmargin <wmargin> Word margin. Insert blank spaces between chars
|
||||||
if distance between words is greater than word
|
if distance between words is greater than word
|
||||||
margin. [default: 0.1]
|
margin. [default: 0.1]
|
||||||
-S, --save-info Save parsing info for each page to a file.
|
-S, --print-stats List stats on the parsing process.
|
||||||
|
-T, --save-stats Save stats to a file.
|
||||||
-X, --plot <dist> Plot distributions. (page,all,rc)
|
-X, --plot <dist> Plot distributions. (page,all,rc)
|
||||||
-Z, --summary Summarize metrics.
|
|
||||||
|
|
||||||
camelot methods:
|
camelot methods:
|
||||||
lattice Looks for lines between data.
|
lattice Looks for lines between data.
|
||||||
|
|
@ -55,19 +55,21 @@ lattice_doc = """
|
||||||
Lattice method looks for lines between text to form a table.
|
Lattice method looks for lines between text to form a table.
|
||||||
|
|
||||||
usage:
|
usage:
|
||||||
camelot lattice [options] [--] <file>
|
camelot lattice [-t <tarea>...] [-F <fill>...] [-j <jtol>...]
|
||||||
|
[-m <mtol>...] [options] [--] <file>
|
||||||
|
|
||||||
options:
|
options:
|
||||||
|
-t, --tarea <tarea> Specific table areas to analyze.
|
||||||
-F, --fill <fill> Fill data in horizontal and/or vertical spanning
|
-F, --fill <fill> Fill data in horizontal and/or vertical spanning
|
||||||
cells. Example: -F h, -F v, -F hv
|
cells. Example: -F h, -F v, -F hv
|
||||||
-s, --scale <scale> Scaling factor. Large scaling factor leads to
|
|
||||||
smaller lines being detected. [default: 15]
|
|
||||||
-i, --invert Invert pdf image to make sure that lines are
|
|
||||||
in foreground.
|
|
||||||
-j, --jtol <jtol> Tolerance to account for when comparing joint
|
-j, --jtol <jtol> Tolerance to account for when comparing joint
|
||||||
and line coordinates. [default: 2]
|
and line coordinates. [default: 2]
|
||||||
-m, --mtol <mtol> Tolerance to account for when merging lines
|
-m, --mtol <mtol> Tolerance to account for when merging lines
|
||||||
which are very close. [default: 2]
|
which are very close. [default: 2]
|
||||||
|
-s, --scale <scale> Scaling factor. Large scaling factor leads to
|
||||||
|
smaller lines being detected. [default: 15]
|
||||||
|
-i, --invert Invert pdf image to make sure that lines are
|
||||||
|
in foreground.
|
||||||
-d, --debug <debug> Debug by visualizing pdf geometry.
|
-d, --debug <debug> Debug by visualizing pdf geometry.
|
||||||
(contour,line,joint,table) Example: -d table
|
(contour,line,joint,table) Example: -d table
|
||||||
"""
|
"""
|
||||||
|
|
@ -76,12 +78,14 @@ stream_doc = """
|
||||||
Stream method looks for whitespaces between text to form a table.
|
Stream method looks for whitespaces between text to form a table.
|
||||||
|
|
||||||
usage:
|
usage:
|
||||||
camelot stream [options] [--] <file>
|
camelot stream [-t <tarea>...] [-c <columns>...] [-n <ncols>...] [-y <ytol>...]
|
||||||
|
[-m <mtol>...] [options] [--] <file>
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-n, --ncols <ncols> Number of columns. [default: 0]
|
-t, --tarea <tarea> Specific table areas to analyze.
|
||||||
-c, --columns <columns> Comma-separated list of column x-coordinates.
|
-c, --columns <columns> Comma-separated list of column x-coordinates.
|
||||||
Example: -c 10.1,20.2,30.3
|
Example: -c 10.1,20.2,30.3
|
||||||
|
-n, --ncols <ncols> Number of columns. [default: -1]
|
||||||
-y, --ytol <ytol> Tolerance to account for when grouping rows
|
-y, --ytol <ytol> Tolerance to account for when grouping rows
|
||||||
together. [default: 2]
|
together. [default: 2]
|
||||||
-m, --mtol <mtol> Tolerance to account for when merging columns
|
-m, --mtol <mtol> Tolerance to account for when merging columns
|
||||||
|
|
@ -166,7 +170,7 @@ def plot_rc_piechart(data, output):
|
||||||
plt.savefig(''.join([output, '_rc.png']), dpi=300)
|
plt.savefig(''.join([output, '_rc.png']), dpi=300)
|
||||||
|
|
||||||
|
|
||||||
def summary(data, p_time):
|
def print_stats(data, p_time):
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from itertools import groupby
|
from itertools import groupby
|
||||||
|
|
||||||
|
|
@ -331,17 +335,18 @@ if __name__ == '__main__':
|
||||||
else:
|
else:
|
||||||
p.append({'start': int(r), 'end': int(r)})
|
p.append({'start': int(r), 'end': int(r)})
|
||||||
|
|
||||||
margin_tuple = (float(args['--cmargin']), float(args['--lmargin']),
|
margins = (float(args['--cmargin']), float(args['--lmargin']),
|
||||||
float(args['--wmargin']))
|
float(args['--wmargin']))
|
||||||
if args['<method>'] == 'lattice':
|
if args['<method>'] == 'lattice':
|
||||||
try:
|
try:
|
||||||
manager = Pdf(Lattice(
|
manager = Pdf(Lattice(
|
||||||
|
table_area=args['--tarea'],
|
||||||
fill=args['--fill'],
|
fill=args['--fill'],
|
||||||
|
jtol=[int(j) for j in args['--jtol']],
|
||||||
|
mtol=[int(m) for m in args['--mtol']],
|
||||||
scale=int(args['--scale']),
|
scale=int(args['--scale']),
|
||||||
invert=args['--invert'],
|
invert=args['--invert'],
|
||||||
jtol=int(args['--jtol']),
|
margins=margins,
|
||||||
mtol=int(args['--mtol']),
|
|
||||||
pdf_margin=margin_tuple,
|
|
||||||
debug=args['--debug']),
|
debug=args['--debug']),
|
||||||
filename,
|
filename,
|
||||||
pagenos=p,
|
pagenos=p,
|
||||||
|
|
@ -374,10 +379,10 @@ if __name__ == '__main__':
|
||||||
if 'rc' in plot_type:
|
if 'rc' in plot_type:
|
||||||
plot_rc_piechart(data, pngname)
|
plot_rc_piechart(data, pngname)
|
||||||
|
|
||||||
if args['--summary']:
|
if args['--print-stats']:
|
||||||
summary(data, processing_time)
|
print_stats(data, processing_time)
|
||||||
|
|
||||||
if args['--save-info']:
|
if args['--save-stats']:
|
||||||
if args['--output']:
|
if args['--output']:
|
||||||
scorename = os.path.join(args['--output'], os.path.basename(scorename))
|
scorename = os.path.join(args['--output'], os.path.basename(scorename))
|
||||||
with open(scorename, 'w') as score_file:
|
with open(scorename, 'w') as score_file:
|
||||||
|
|
@ -402,11 +407,12 @@ if __name__ == '__main__':
|
||||||
elif args['<method>'] == 'stream':
|
elif args['<method>'] == 'stream':
|
||||||
try:
|
try:
|
||||||
manager = Pdf(Stream(
|
manager = Pdf(Stream(
|
||||||
ncolumns=int(args['--ncols']),
|
table_area=args['--tarea'],
|
||||||
columns=args['--columns'],
|
columns=args['--columns'],
|
||||||
ytol=int(args['--ytol']),
|
ncolumns=[int(nc) for nc in args['--ncols']],
|
||||||
mtol=int(args['--mtol']),
|
ytol=[int(y) for y in args['--ytol']],
|
||||||
pdf_margin=margin_tuple,
|
mtol=[int(m) for m in args['--mtol']],
|
||||||
|
margins=margins,
|
||||||
debug=args['--debug']),
|
debug=args['--debug']),
|
||||||
filename,
|
filename,
|
||||||
pagenos=p,
|
pagenos=p,
|
||||||
|
|
@ -439,10 +445,10 @@ if __name__ == '__main__':
|
||||||
if 'rc' in plot_type:
|
if 'rc' in plot_type:
|
||||||
plot_rc_piechart(data, pngname)
|
plot_rc_piechart(data, pngname)
|
||||||
|
|
||||||
if args['--summary']:
|
if args['--print-stats']:
|
||||||
summary(data, processing_time)
|
print_stats(data, processing_time)
|
||||||
|
|
||||||
if args['--save-info']:
|
if args['--save-stats']:
|
||||||
if args['--output']:
|
if args['--output']:
|
||||||
scorename = os.path.join(args['--output'], os.path.basename(scorename))
|
scorename = os.path.join(args['--output'], os.path.basename(scorename))
|
||||||
with open(scorename, 'w') as score_file:
|
with open(scorename, 'w') as score_file:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue