Change utils function names
parent
20acda2259
commit
b3f840bba9
|
|
@ -11,7 +11,7 @@ from .base import BaseParser
|
|||
from ..core import Table
|
||||
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
|
||||
merge_close_values, get_table_index, compute_accuracy,
|
||||
count_empty, encode_, setup_logging)
|
||||
count_empty_strings, encode_, setup_logging)
|
||||
from ..image_processing import (adaptive_threshold, find_lines,
|
||||
find_table_contours, find_table_joints)
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ class Lattice(BaseParser):
|
|||
table.df = pd.DataFrame(data)
|
||||
table.shape = table.df.shape
|
||||
|
||||
whitespace, __, __ = count_empty(data)
|
||||
whitespace, __, __ = count_empty_strings(data)
|
||||
table.accuracy = accuracy
|
||||
table.whitespace = whitespace
|
||||
table.order = table_idx + 1
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import pandas as pd
|
|||
from .base import BaseParser
|
||||
from ..core import Table
|
||||
from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
||||
count_empty, encode_)
|
||||
count_empty_strings, encode_)
|
||||
|
||||
|
||||
logger = setup_logging(__name__)
|
||||
|
|
@ -207,7 +207,7 @@ class Stream(BaseParser):
|
|||
table.df = pd.DataFrame(data)
|
||||
table.shape = table.df.shape
|
||||
|
||||
whitespace, __, __ = count_empty(data)
|
||||
whitespace, __, __ = count_empty_strings(data)
|
||||
table.accuracy = accuracy
|
||||
table.whitespace = whitespace
|
||||
table.order = table_idx + 1
|
||||
|
|
|
|||
|
|
@ -287,7 +287,7 @@ def merge_close_values(ar, mtol=2):
|
|||
return ret
|
||||
|
||||
|
||||
def flag_on_size(textline, direction):
|
||||
def flag_font_size(textline, direction):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
|
|
@ -381,7 +381,7 @@ def split_textline(table, textline, direction, flag_size=True):
|
|||
grouped_chars = []
|
||||
for key, chars in groupby(cut_text, itemgetter(0, 1)):
|
||||
if flag_size:
|
||||
grouped_chars.append((key[0], key[1], flag_on_size([t[2] for t in chars], direction)))
|
||||
grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
|
||||
else:
|
||||
gchars = [t[2].get_text() for t in chars]
|
||||
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
|
||||
|
|
@ -444,7 +444,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
|
|||
return split_textline(table, t, direction, flag_size=flag_size), error
|
||||
else:
|
||||
if flag_size:
|
||||
return [(r_idx, c_idx, flag_on_size(t._objs, direction))], error
|
||||
return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
|
||||
else:
|
||||
return [(r_idx, c_idx, t.get_text().strip('\n'))], error
|
||||
|
||||
|
|
@ -474,27 +474,7 @@ def compute_accuracy(error_weights):
|
|||
return score
|
||||
|
||||
|
||||
def remove_empty(d):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
d
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
"""
|
||||
for i, row in enumerate(d):
|
||||
if row == [''] * len(row):
|
||||
d.pop(i)
|
||||
d = zip(*d)
|
||||
d = [list(row) for row in d if any(row)]
|
||||
d = zip(*d)
|
||||
return d
|
||||
|
||||
|
||||
def count_empty(d):
|
||||
def count_empty_strings(d):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
|
|
@ -529,6 +509,26 @@ def count_empty(d):
|
|||
return empty_p, r_nempty_cells, c_nempty_cells
|
||||
|
||||
|
||||
def remove_empty_strings(d):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
d
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
"""
|
||||
for i, row in enumerate(d):
|
||||
if row == [''] * len(row):
|
||||
d.pop(i)
|
||||
d = zip(*d)
|
||||
d = [list(row) for row in d if any(row)]
|
||||
d = zip(*d)
|
||||
return d
|
||||
|
||||
|
||||
def encode_(ar):
|
||||
"""
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue