Rename parser keyword arguments
parent
6948f16f13
commit
04383920b4
|
|
@ -139,14 +139,14 @@ class Table(object):
|
||||||
cell.left = cell.right = cell.top = cell.bottom = True
|
cell.left = cell.right = cell.top = cell.bottom = True
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def set_edges(self, vertical, horizontal, jtol=2):
|
def set_edges(self, vertical, horizontal, joint_close_tol=2):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
vertical
|
vertical
|
||||||
horizontal
|
horizontal
|
||||||
jtol
|
joint_close_tol
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
@ -156,11 +156,11 @@ class Table(object):
|
||||||
# find closest x coord
|
# find closest x coord
|
||||||
# iterate over y coords and find closest start and end points
|
# iterate over y coords and find closest start and end points
|
||||||
i = [i for i, t in enumerate(self.cols)
|
i = [i for i, t in enumerate(self.cols)
|
||||||
if np.isclose(v[0], t[0], atol=jtol)]
|
if np.isclose(v[0], t[0], atol=joint_close_tol)]
|
||||||
j = [j for j, t in enumerate(self.rows)
|
j = [j for j, t in enumerate(self.rows)
|
||||||
if np.isclose(v[3], t[0], atol=jtol)]
|
if np.isclose(v[3], t[0], atol=joint_close_tol)]
|
||||||
k = [k for k, t in enumerate(self.rows)
|
k = [k for k, t in enumerate(self.rows)
|
||||||
if np.isclose(v[1], t[0], atol=jtol)]
|
if np.isclose(v[1], t[0], atol=joint_close_tol)]
|
||||||
if not j:
|
if not j:
|
||||||
continue
|
continue
|
||||||
J = j[0]
|
J = j[0]
|
||||||
|
|
@ -207,11 +207,11 @@ class Table(object):
|
||||||
# find closest y coord
|
# find closest y coord
|
||||||
# iterate over x coords and find closest start and end points
|
# iterate over x coords and find closest start and end points
|
||||||
i = [i for i, t in enumerate(self.rows)
|
i = [i for i, t in enumerate(self.rows)
|
||||||
if np.isclose(h[1], t[0], atol=jtol)]
|
if np.isclose(h[1], t[0], atol=joint_close_tol)]
|
||||||
j = [j for j, t in enumerate(self.cols)
|
j = [j for j, t in enumerate(self.cols)
|
||||||
if np.isclose(h[0], t[0], atol=jtol)]
|
if np.isclose(h[0], t[0], atol=joint_close_tol)]
|
||||||
k = [k for k, t in enumerate(self.cols)
|
k = [k for k, t in enumerate(self.cols)
|
||||||
if np.isclose(h[2], t[0], atol=jtol)]
|
if np.isclose(h[2], t[0], atol=joint_close_tol)]
|
||||||
if not j:
|
if not j:
|
||||||
continue
|
continue
|
||||||
J = j[0]
|
J = j[0]
|
||||||
|
|
|
||||||
|
|
@ -7,13 +7,13 @@ import numpy as np
|
||||||
from .utils import merge_tuples
|
from .utils import merge_tuples
|
||||||
|
|
||||||
|
|
||||||
def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
|
def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
imagename
|
imagename
|
||||||
invert
|
process_background
|
||||||
blocksize
|
blocksize
|
||||||
c
|
c
|
||||||
|
|
||||||
|
|
@ -24,7 +24,7 @@ def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
|
||||||
img = cv2.imread(imagename)
|
img = cv2.imread(imagename)
|
||||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
if invert:
|
if process_background:
|
||||||
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
cv2.THRESH_BINARY, blocksize, c)
|
cv2.THRESH_BINARY, blocksize, c)
|
||||||
else:
|
else:
|
||||||
|
|
@ -33,14 +33,14 @@ def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
|
||||||
return img, threshold
|
return img, threshold
|
||||||
|
|
||||||
|
|
||||||
def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
|
def find_lines(threshold, direction='horizontal', line_size_scaling=15, iterations=0):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
threshold
|
threshold
|
||||||
direction
|
direction
|
||||||
scale
|
line_size_scaling
|
||||||
iterations
|
iterations
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
|
@ -50,10 +50,10 @@ def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
|
||||||
lines = []
|
lines = []
|
||||||
|
|
||||||
if direction == 'vertical':
|
if direction == 'vertical':
|
||||||
size = threshold.shape[0] // scale
|
size = threshold.shape[0] // line_size_scaling
|
||||||
el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
||||||
elif direction == 'horizontal':
|
elif direction == 'horizontal':
|
||||||
size = threshold.shape[1] // scale
|
size = threshold.shape[1] // line_size_scaling
|
||||||
el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
|
el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
|
||||||
elif direction is None:
|
elif direction is None:
|
||||||
raise ValueError("Specify direction as either 'vertical' or"
|
raise ValueError("Specify direction as either 'vertical' or"
|
||||||
|
|
@ -148,19 +148,19 @@ def find_table_joints(contours, vertical, horizontal):
|
||||||
return tables
|
return tables
|
||||||
|
|
||||||
|
|
||||||
def remove_lines(threshold, line_scale=15):
|
def remove_lines(threshold, line_size_scaling=15):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
threshold
|
threshold
|
||||||
line_scale
|
line_size_scaling
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
||||||
"""
|
"""
|
||||||
size = threshold.shape[0] // line_scale
|
size = threshold.shape[0] // line_size_scaling
|
||||||
vertical_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
vertical_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
||||||
horizontal_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
|
horizontal_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
|
||||||
dilate_el = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
|
dilate_el = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
|
||||||
|
|
@ -176,19 +176,19 @@ def remove_lines(threshold, line_scale=15):
|
||||||
return threshold
|
return threshold
|
||||||
|
|
||||||
|
|
||||||
def find_cuts(threshold, char_scale=200):
|
def find_cuts(threshold, char_size_scaling=200):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
threshold
|
threshold
|
||||||
char_scale
|
char_size_scaling
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
||||||
"""
|
"""
|
||||||
size = threshold.shape[0] // char_scale
|
size = threshold.shape[0] // char_size_scaling
|
||||||
char_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
char_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
|
||||||
|
|
||||||
threshold = cv2.erode(threshold, char_el)
|
threshold = cv2.erode(threshold, char_el)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import pandas as pd
|
||||||
from .base import BaseParser
|
from .base import BaseParser
|
||||||
from ..core import Table
|
from ..core import Table
|
||||||
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
|
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
|
||||||
merge_close_values, get_table_index, compute_accuracy,
|
merge_close_lines, get_table_index, compute_accuracy,
|
||||||
count_empty_strings, encode_, setup_logging)
|
count_empty_strings, encode_, setup_logging)
|
||||||
from ..image_processing import (adaptive_threshold, find_lines,
|
from ..image_processing import (adaptive_threshold, find_lines,
|
||||||
find_table_contours, find_table_joints)
|
find_table_contours, find_table_joints)
|
||||||
|
|
@ -23,23 +23,24 @@ class Lattice(BaseParser):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, table_area=None, fill=None, mtol=2, jtol=2,
|
def __init__(self, table_area=None, process_background=False,
|
||||||
blocksize=15, threshold_constant=-2, scale=15, iterations=0,
|
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
||||||
invert=False, margins=(1.0, 0.5, 0.1), split_text=False,
|
split_text=False, flag_size=True, line_close_tol=2,
|
||||||
flag_size=True, shift_text=['l', 't'], debug=None):
|
joint_close_tol=2, blocksize=15, threshold_constant=-2,
|
||||||
|
iterations=0, margins=(1.0, 0.5, 0.1), debug=None):
|
||||||
self.table_area = table_area
|
self.table_area = table_area
|
||||||
self.fill = fill
|
self.process_background = process_background
|
||||||
self.mtol = mtol
|
self.line_size_scaling = line_size_scaling
|
||||||
self.jtol = jtol
|
self.copy_text = copy_text
|
||||||
self.blocksize = blocksize
|
self.shift_text = shift_text
|
||||||
self.threshold_constant = threshold_constant
|
|
||||||
self.scale = scale
|
|
||||||
self.iterations = iterations
|
|
||||||
self.invert = invert
|
|
||||||
self.char_margin, self.line_margin, self.word_margin = margins
|
|
||||||
self.split_text = split_text
|
self.split_text = split_text
|
||||||
self.flag_size = flag_size
|
self.flag_size = flag_size
|
||||||
self.shift_text = shift_text
|
self.line_close_tol = line_close_tol
|
||||||
|
self.joint_close_tol = joint_close_tol
|
||||||
|
self.blocksize = blocksize
|
||||||
|
self.threshold_constant = threshold_constant
|
||||||
|
self.iterations = iterations
|
||||||
|
self.char_margin, self.line_margin, self.word_margin = margins
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -67,8 +68,8 @@ class Lattice(BaseParser):
|
||||||
return indices
|
return indices
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fill_spanning(t, fill=None):
|
def _copy_spanning_text(t, copy_text=None):
|
||||||
for f in fill:
|
for f in copy_text:
|
||||||
if f == "h":
|
if f == "h":
|
||||||
for i in range(len(t.cells)):
|
for i in range(len(t.cells)):
|
||||||
for j in range(len(t.cells[i])):
|
for j in range(len(t.cells[i])):
|
||||||
|
|
@ -96,7 +97,7 @@ class Lattice(BaseParser):
|
||||||
stderr=subprocess.STDOUT)
|
stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
def _generate_table_bbox(self):
|
def _generate_table_bbox(self):
|
||||||
self.image, self.threshold = adaptive_threshold(self.imagename, invert=self.invert,
|
self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
|
||||||
blocksize=self.blocksize, c=self.threshold_constant)
|
blocksize=self.blocksize, c=self.threshold_constant)
|
||||||
image_width = self.image.shape[1]
|
image_width = self.image.shape[1]
|
||||||
image_height = self.image.shape[0]
|
image_height = self.image.shape[0]
|
||||||
|
|
@ -107,10 +108,12 @@ class Lattice(BaseParser):
|
||||||
image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
|
image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
|
||||||
pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
|
pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
|
||||||
|
|
||||||
vertical_mask, vertical_segments = find_lines(self.threshold,
|
vertical_mask, vertical_segments = find_lines(
|
||||||
direction='vertical', scale=self.scale, iterations=self.iterations)
|
self.threshold, direction='vertical',
|
||||||
horizontal_mask, horizontal_segments = find_lines(self.threshold,
|
line_size_scaling=self.line_size_scaling, iterations=self.iterations)
|
||||||
direction='horizontal', scale=self.scale, iterations=self.iterations)
|
horizontal_mask, horizontal_segments = find_lines(
|
||||||
|
self.threshold, direction='horizontal',
|
||||||
|
line_size_scaling=self.line_size_scaling, iterations=self.iterations)
|
||||||
|
|
||||||
if self.table_area is not None:
|
if self.table_area is not None:
|
||||||
areas = []
|
areas = []
|
||||||
|
|
@ -149,8 +152,10 @@ class Lattice(BaseParser):
|
||||||
cols.extend([tk[0], tk[2]])
|
cols.extend([tk[0], tk[2]])
|
||||||
rows.extend([tk[1], tk[3]])
|
rows.extend([tk[1], tk[3]])
|
||||||
# sort horizontal and vertical segments
|
# sort horizontal and vertical segments
|
||||||
cols = merge_close_values(sorted(cols), mtol=self.mtol)
|
cols = merge_close_lines(
|
||||||
rows = merge_close_values(sorted(rows, reverse=True), mtol=self.mtol)
|
sorted(cols), line_close_tol=self.line_close_tol)
|
||||||
|
rows = merge_close_lines(
|
||||||
|
sorted(rows, reverse=True), line_close_tol=self.line_close_tol)
|
||||||
# make grid using x and y coord of shortlisted rows and cols
|
# make grid using x and y coord of shortlisted rows and cols
|
||||||
cols = [(cols[i], cols[i + 1])
|
cols = [(cols[i], cols[i + 1])
|
||||||
for i in range(0, len(cols) - 1)]
|
for i in range(0, len(cols) - 1)]
|
||||||
|
|
@ -167,7 +172,7 @@ class Lattice(BaseParser):
|
||||||
|
|
||||||
table = Table(cols, rows)
|
table = Table(cols, rows)
|
||||||
# set table edges to True using ver+hor lines
|
# set table edges to True using ver+hor lines
|
||||||
table = table.set_edges(v_s, h_s, jtol=self.jtol)
|
table = table.set_edges(v_s, h_s, joint_close_tol=self.joint_close_tol)
|
||||||
# set spanning cells to True
|
# set spanning cells to True
|
||||||
table = table.set_span()
|
table = table.set_span()
|
||||||
# set table border edges to True
|
# set table border edges to True
|
||||||
|
|
@ -186,8 +191,8 @@ class Lattice(BaseParser):
|
||||||
table.cells[r_idx][c_idx].text = text
|
table.cells[r_idx][c_idx].text = text
|
||||||
accuracy = compute_accuracy([[100, pos_errors]])
|
accuracy = compute_accuracy([[100, pos_errors]])
|
||||||
|
|
||||||
if self.fill is not None:
|
if self.copy_text is not None:
|
||||||
table = Lattice._fill_spanning(table, fill=self.fill)
|
table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
|
||||||
|
|
||||||
data = table.data
|
data = table.data
|
||||||
data = encode_(data)
|
data = encode_(data)
|
||||||
|
|
|
||||||
|
|
@ -18,17 +18,17 @@ class Stream(BaseParser):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, table_area=None, columns=None, ytol=2, mtol=0,
|
def __init__(self, table_area=None, columns=None, split_text=False,
|
||||||
margins=(1.0, 0.5, 0.1), split_text=False, flag_size=True,
|
flag_size=True, row_close_tol=2, col_close_tol=0,
|
||||||
debug=None):
|
margins=(1.0, 0.5, 0.1), debug=None):
|
||||||
self.table_area = table_area
|
self.table_area = table_area
|
||||||
self.columns = columns
|
self.columns = columns
|
||||||
self._validate_columns()
|
self._validate_columns()
|
||||||
self.ytol = ytol
|
|
||||||
self.mtol = mtol
|
|
||||||
self.char_margin, self.line_margin, self.word_margin = margins
|
|
||||||
self.split_text = split_text
|
self.split_text = split_text
|
||||||
self.flag_size = flag_size
|
self.flag_size = flag_size
|
||||||
|
self.row_close_tol = row_close_tol
|
||||||
|
self.col_close_tol = col_close_tol
|
||||||
|
self.char_margin, self.line_margin, self.word_margin = margins
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -41,7 +41,7 @@ class Stream(BaseParser):
|
||||||
return text_bbox
|
return text_bbox
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _group_rows(text, ytol=2):
|
def _group_rows(text, row_close_tol=2):
|
||||||
row_y = 0
|
row_y = 0
|
||||||
rows = []
|
rows = []
|
||||||
temp = []
|
temp = []
|
||||||
|
|
@ -50,7 +50,7 @@ class Stream(BaseParser):
|
||||||
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
||||||
# type(obj) is LTChar]):
|
# type(obj) is LTChar]):
|
||||||
if t.get_text().strip():
|
if t.get_text().strip():
|
||||||
if not np.isclose(row_y, t.y0, atol=ytol):
|
if not np.isclose(row_y, t.y0, atol=row_close_tol):
|
||||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||||
temp = []
|
temp = []
|
||||||
row_y = t.y0
|
row_y = t.y0
|
||||||
|
|
@ -60,24 +60,24 @@ class Stream(BaseParser):
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _merge_columns(l, mtol=0):
|
def _merge_columns(l, col_close_tol=0):
|
||||||
merged = []
|
merged = []
|
||||||
for higher in l:
|
for higher in l:
|
||||||
if not merged:
|
if not merged:
|
||||||
merged.append(higher)
|
merged.append(higher)
|
||||||
else:
|
else:
|
||||||
lower = merged[-1]
|
lower = merged[-1]
|
||||||
if mtol >= 0:
|
if col_close_tol >= 0:
|
||||||
if (higher[0] <= lower[1] or
|
if (higher[0] <= lower[1] or
|
||||||
np.isclose(higher[0], lower[1], atol=mtol)):
|
np.isclose(higher[0], lower[1], atol=col_close_tol)):
|
||||||
upper_bound = max(lower[1], higher[1])
|
upper_bound = max(lower[1], higher[1])
|
||||||
lower_bound = min(lower[0], higher[0])
|
lower_bound = min(lower[0], higher[0])
|
||||||
merged[-1] = (lower_bound, upper_bound)
|
merged[-1] = (lower_bound, upper_bound)
|
||||||
else:
|
else:
|
||||||
merged.append(higher)
|
merged.append(higher)
|
||||||
elif mtol < 0:
|
elif col_close_tol < 0:
|
||||||
if higher[0] <= lower[1]:
|
if higher[0] <= lower[1]:
|
||||||
if np.isclose(higher[0], lower[1], atol=abs(mtol)):
|
if np.isclose(higher[0], lower[1], atol=abs(col_close_tol)):
|
||||||
merged.append(higher)
|
merged.append(higher)
|
||||||
else:
|
else:
|
||||||
upper_bound = max(lower[1], higher[1])
|
upper_bound = max(lower[1], higher[1])
|
||||||
|
|
@ -99,9 +99,9 @@ class Stream(BaseParser):
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _add_columns(cols, text, ytol):
|
def _add_columns(cols, text, row_close_tol):
|
||||||
if text:
|
if text:
|
||||||
text = Stream._group_rows(text, ytol=ytol)
|
text = Stream._group_rows(text, row_close_tol=row_close_tol)
|
||||||
elements = [len(r) for r in text]
|
elements = [len(r) for r in text]
|
||||||
new_cols = [(t.x0, t.x1)
|
new_cols = [(t.x0, t.x1)
|
||||||
for r in text if len(r) == max(elements) for t in r]
|
for r in text if len(r) == max(elements) for t in r]
|
||||||
|
|
@ -149,7 +149,7 @@ class Stream(BaseParser):
|
||||||
self.t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
|
self.t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
|
||||||
|
|
||||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||||
rows_grouped = self._group_rows(self.t_bbox['horizontal'], ytol=self.ytol)
|
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
|
||||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||||
elements = [len(r) for r in rows_grouped]
|
elements = [len(r) for r in rows_grouped]
|
||||||
|
|
||||||
|
|
@ -170,7 +170,7 @@ class Stream(BaseParser):
|
||||||
os.path.basename(self.rootname)))
|
os.path.basename(self.rootname)))
|
||||||
cols = [(t.x0, t.x1)
|
cols = [(t.x0, t.x1)
|
||||||
for r in rows_grouped if len(r) == ncols for t in r]
|
for r in rows_grouped if len(r) == ncols for t in r]
|
||||||
cols = self._merge_columns(sorted(cols), mtol=self.mtol)
|
cols = self._merge_columns(sorted(cols), col_close_tol=self.col_close_tol)
|
||||||
inner_text = []
|
inner_text = []
|
||||||
for i in range(1, len(cols)):
|
for i in range(1, len(cols)):
|
||||||
left = cols[i - 1][1]
|
left = cols[i - 1][1]
|
||||||
|
|
@ -182,7 +182,7 @@ class Stream(BaseParser):
|
||||||
for t in self.t_bbox[direction]
|
for t in self.t_bbox[direction]
|
||||||
if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
|
if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
|
||||||
inner_text.extend(outer_text)
|
inner_text.extend(outer_text)
|
||||||
cols = self._add_columns(cols, inner_text, self.ytol)
|
cols = self._add_columns(cols, inner_text, self.row_close_tol)
|
||||||
cols = self._join_columns(cols, text_x_min, text_x_max)
|
cols = self._join_columns(cols, text_x_min, text_x_max)
|
||||||
|
|
||||||
return cols, rows
|
return cols, rows
|
||||||
|
|
|
||||||
|
|
@ -236,13 +236,13 @@ def text_in_bbox(bbox, text):
|
||||||
return t_bbox
|
return t_bbox
|
||||||
|
|
||||||
|
|
||||||
def remove_close_values(ar, mtol=2):
|
def remove_close_lines(ar, line_close_tol=2):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
ar
|
ar
|
||||||
mtol
|
line_close_tol
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
@ -254,20 +254,20 @@ def remove_close_values(ar, mtol=2):
|
||||||
ret.append(a)
|
ret.append(a)
|
||||||
else:
|
else:
|
||||||
temp = ret[-1]
|
temp = ret[-1]
|
||||||
if np.isclose(temp, a, atol=mtol):
|
if np.isclose(temp, a, atol=line_close_tol):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
ret.append(a)
|
ret.append(a)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def merge_close_values(ar, mtol=2):
|
def merge_close_lines(ar, line_close_tol=2):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
ar
|
ar
|
||||||
mtol
|
line_close_tol
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
@ -279,7 +279,7 @@ def merge_close_values(ar, mtol=2):
|
||||||
ret.append(a)
|
ret.append(a)
|
||||||
else:
|
else:
|
||||||
temp = ret[-1]
|
temp = ret[-1]
|
||||||
if np.isclose(temp, a, atol=mtol):
|
if np.isclose(temp, a, atol=line_close_tol):
|
||||||
temp = (temp + a) / 2.0
|
temp = (temp + a) / 2.0
|
||||||
ret[-1] = temp
|
ret[-1] = temp
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue