Change utils function names
parent
20acda2259
commit
b3f840bba9
|
|
@ -11,7 +11,7 @@ from .base import BaseParser
|
||||||
from ..core import Table
|
from ..core import Table
|
||||||
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
|
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
|
||||||
merge_close_values, get_table_index, compute_accuracy,
|
merge_close_values, get_table_index, compute_accuracy,
|
||||||
count_empty, encode_, setup_logging)
|
count_empty_strings, encode_, setup_logging)
|
||||||
from ..image_processing import (adaptive_threshold, find_lines,
|
from ..image_processing import (adaptive_threshold, find_lines,
|
||||||
find_table_contours, find_table_joints)
|
find_table_contours, find_table_joints)
|
||||||
|
|
||||||
|
|
@ -194,7 +194,7 @@ class Lattice(BaseParser):
|
||||||
table.df = pd.DataFrame(data)
|
table.df = pd.DataFrame(data)
|
||||||
table.shape = table.df.shape
|
table.shape = table.df.shape
|
||||||
|
|
||||||
whitespace, __, __ = count_empty(data)
|
whitespace, __, __ = count_empty_strings(data)
|
||||||
table.accuracy = accuracy
|
table.accuracy = accuracy
|
||||||
table.whitespace = whitespace
|
table.whitespace = whitespace
|
||||||
table.order = table_idx + 1
|
table.order = table_idx + 1
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import pandas as pd
|
||||||
from .base import BaseParser
|
from .base import BaseParser
|
||||||
from ..core import Table
|
from ..core import Table
|
||||||
from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
||||||
count_empty, encode_)
|
count_empty_strings, encode_)
|
||||||
|
|
||||||
|
|
||||||
logger = setup_logging(__name__)
|
logger = setup_logging(__name__)
|
||||||
|
|
@ -207,7 +207,7 @@ class Stream(BaseParser):
|
||||||
table.df = pd.DataFrame(data)
|
table.df = pd.DataFrame(data)
|
||||||
table.shape = table.df.shape
|
table.shape = table.df.shape
|
||||||
|
|
||||||
whitespace, __, __ = count_empty(data)
|
whitespace, __, __ = count_empty_strings(data)
|
||||||
table.accuracy = accuracy
|
table.accuracy = accuracy
|
||||||
table.whitespace = whitespace
|
table.whitespace = whitespace
|
||||||
table.order = table_idx + 1
|
table.order = table_idx + 1
|
||||||
|
|
|
||||||
|
|
@ -287,7 +287,7 @@ def merge_close_values(ar, mtol=2):
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def flag_on_size(textline, direction):
|
def flag_font_size(textline, direction):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
|
|
@ -381,7 +381,7 @@ def split_textline(table, textline, direction, flag_size=True):
|
||||||
grouped_chars = []
|
grouped_chars = []
|
||||||
for key, chars in groupby(cut_text, itemgetter(0, 1)):
|
for key, chars in groupby(cut_text, itemgetter(0, 1)):
|
||||||
if flag_size:
|
if flag_size:
|
||||||
grouped_chars.append((key[0], key[1], flag_on_size([t[2] for t in chars], direction)))
|
grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
|
||||||
else:
|
else:
|
||||||
gchars = [t[2].get_text() for t in chars]
|
gchars = [t[2].get_text() for t in chars]
|
||||||
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
|
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
|
||||||
|
|
@ -444,7 +444,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
|
||||||
return split_textline(table, t, direction, flag_size=flag_size), error
|
return split_textline(table, t, direction, flag_size=flag_size), error
|
||||||
else:
|
else:
|
||||||
if flag_size:
|
if flag_size:
|
||||||
return [(r_idx, c_idx, flag_on_size(t._objs, direction))], error
|
return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
|
||||||
else:
|
else:
|
||||||
return [(r_idx, c_idx, t.get_text().strip('\n'))], error
|
return [(r_idx, c_idx, t.get_text().strip('\n'))], error
|
||||||
|
|
||||||
|
|
@ -474,27 +474,7 @@ def compute_accuracy(error_weights):
|
||||||
return score
|
return score
|
||||||
|
|
||||||
|
|
||||||
def remove_empty(d):
|
def count_empty_strings(d):
|
||||||
"""
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
d
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
|
|
||||||
"""
|
|
||||||
for i, row in enumerate(d):
|
|
||||||
if row == [''] * len(row):
|
|
||||||
d.pop(i)
|
|
||||||
d = zip(*d)
|
|
||||||
d = [list(row) for row in d if any(row)]
|
|
||||||
d = zip(*d)
|
|
||||||
return d
|
|
||||||
|
|
||||||
|
|
||||||
def count_empty(d):
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
|
|
@ -529,6 +509,26 @@ def count_empty(d):
|
||||||
return empty_p, r_nempty_cells, c_nempty_cells
|
return empty_p, r_nempty_cells, c_nempty_cells
|
||||||
|
|
||||||
|
|
||||||
|
def remove_empty_strings(d):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
d
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
|
||||||
|
"""
|
||||||
|
for i, row in enumerate(d):
|
||||||
|
if row == [''] * len(row):
|
||||||
|
d.pop(i)
|
||||||
|
d = zip(*d)
|
||||||
|
d = [list(row) for row in d if any(row)]
|
||||||
|
d = zip(*d)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
def encode_(ar):
|
def encode_(ar):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue