pull/153/head
Frh 2020-04-29 13:52:58 -07:00
parent 8a63e8e794
commit f3aded5b17
4 changed files with 54 additions and 55 deletions

View File

@ -171,16 +171,16 @@ class TextAlignments():
idx_insert = None
if idx_closest is None:
idx_insert = 0
else:
coord_closest = alignment_array[idx_closest].coord
# Note: np.isclose is slow!
elif coord - 0.5 < \
alignment_array[idx_closest].coord < \
coord + 0.5:
if coord - 0.5 < coord_closest < coord + 0.5:
self._update_alignment(
alignment_array[idx_closest],
coord,
textline
)
elif alignment_array[idx_closest].coord < coord:
elif coord_closest < coord:
idx_insert = idx_closest + 1
else:
idx_insert = idx_closest
@ -212,8 +212,8 @@ class TextEdges(TextAlignments):
te = self._create_new_text_alignment(coord, textline, align)
self._text_alignments[align].append(te)
def _update_alignment(self, edge, coord, textline):
edge.update_coords(coord, textline, self.edge_tol)
def _update_alignment(self, alignment, coord, textline):
alignment.update_coords(coord, textline, self.edge_tol)
def generate(self, textlines):
"""Generates the text edges dict based on horizontal text
@ -469,8 +469,7 @@ class Table():
if self.page == other.page:
if self.order < other.order:
return True
if self.page < other.page:
return True
return self.page < other.page
@property
def data(self):
@ -674,7 +673,7 @@ class Table():
bottom = cell.bottom
if cell.bound == 4:
continue
elif cell.bound == 3:
if cell.bound == 3:
if not left and (right and top and bottom):
cell.hspan = True
elif not right and (left and top and bottom):

View File

@ -44,9 +44,9 @@ def draw_labeled_bbox(
)
vlabel, hlabel = label_pos.split(",")
if (vlabel == "top"):
if vlabel == "top":
y = max(bbox[1], bbox[3])
elif (vlabel == "bottom"):
elif vlabel == "bottom":
y = min(bbox[1], bbox[3])
else:
y = 0.5 * (bbox[1] + bbox[3])
@ -58,9 +58,9 @@ def draw_labeled_bbox(
"center": "center"
}
vlabel_out_of_box = label_align_swap[vlabel]
if (hlabel == "right"):
if hlabel == "right":
x = max(bbox[0], bbox[2])
elif (hlabel == "left"):
elif hlabel == "left":
x = min(bbox[0], bbox[2])
else:
x = 0.5 * (bbox[0] + bbox[2])
@ -203,7 +203,7 @@ class PlotMethods():
raise NotImplementedError(
"Lattice flavor does not support kind='{}'".format(kind)
)
elif table.flavor in ["stream", "hybrid"] and kind in ["line"]:
if table.flavor in ["stream", "hybrid"] and kind in ["line"]:
raise NotImplementedError(
"Stream flavor does not support kind='{}'".format(kind)
)

View File

@ -157,12 +157,15 @@ def remove_extra(kwargs, flavor="lattice"):
# https://stackoverflow.com/a/22726782
# and https://stackoverflow.com/questions/10965479
class TemporaryDirectory():
def __init__(self):
self.dir_path = None
def __enter__(self):
self.name = tempfile.mkdtemp()
self.dir_path = tempfile.mkdtemp()
# Only delete the temporary directory upon
# program exit.
atexit.register(shutil.rmtree, self.name)
return self.name
atexit.register(shutil.rmtree, self.dir_path)
return self.dir_path
def __exit__(self, exc_type, exc_value, traceback):
pass
@ -343,8 +346,8 @@ def get_rotation(chars, horizontal_text, vertical_text):
hlen = len([t for t in horizontal_text if t.get_text().strip()])
vlen = len([t for t in vertical_text if t.get_text().strip()])
if hlen < vlen:
clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
clockwise = sum(t.matrix[1] < 0 < t.matrix[2] for t in chars)
anticlockwise = sum(t.matrix[1] > 0 > t.matrix[2] for t in chars)
rotation = "anticlockwise" if clockwise < anticlockwise \
else "clockwise"
return rotation
@ -753,7 +756,7 @@ def flag_font_size(textline, direction, strip_text=""):
flist.append("".join(fchars))
fstring = "".join(flist)
else:
fstring = "".join([t.get_text() for t in textline])
fstring = "".join(t.get_text() for t in textline)
return text_strip(fstring, strip_text)
@ -815,7 +818,6 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
):
cut_text.append((r, cut[0], obj))
break
else:
# TODO: add test
if cut == x_cuts[-1]:
cut_text.append((r, cut[0] + 1, obj))
@ -848,7 +850,6 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
and (obj.y0 + obj.y1) / 2 >= cut[1]:
cut_text.append((cut[0], c, obj))
break
else:
# TODO: add test
if cut == y_cuts[-1]:
cut_text.append((cut[0] - 1, c, obj))
@ -964,7 +965,6 @@ def get_table_index(
),
error,
)
else:
if flag_size:
return (
[
@ -978,7 +978,6 @@ def get_table_index(
],
error,
)
else:
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], \
error
@ -1002,7 +1001,7 @@ def compute_accuracy(error_weights):
SCORE_VAL = 100
try:
score = 0
if sum([ew[0] for ew in error_weights]) != SCORE_VAL:
if sum(ew[0] for ew in error_weights) != SCORE_VAL:
raise ValueError("Sum of weights should be equal to 100.")
for ew in error_weights:
weight = ew[0] / len(ew[1])

View File

@ -7,8 +7,9 @@ from pandas.testing import assert_frame_equal
import camelot
from camelot.core import Table, TableList
from camelot.utils import compare_tables
from camelot.__version__ import generate_version
# compare_tables used in console mode while debugging
from camelot.utils import compare_tables # noqa
from .data import *