Change utils function names

pull/2/head
Vinayak Mehta 2018-09-07 06:04:45 +05:30
parent 20acda2259
commit b3f840bba9
3 changed files with 28 additions and 28 deletions

View File

@ -11,7 +11,7 @@ from .base import BaseParser
from ..core import Table from ..core import Table
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox, from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
merge_close_values, get_table_index, compute_accuracy, merge_close_values, get_table_index, compute_accuracy,
count_empty, encode_, setup_logging) count_empty_strings, encode_, setup_logging)
from ..image_processing import (adaptive_threshold, find_lines, from ..image_processing import (adaptive_threshold, find_lines,
find_table_contours, find_table_joints) find_table_contours, find_table_joints)
@ -194,7 +194,7 @@ class Lattice(BaseParser):
table.df = pd.DataFrame(data) table.df = pd.DataFrame(data)
table.shape = table.df.shape table.shape = table.df.shape
whitespace, __, __ = count_empty(data) whitespace, __, __ = count_empty_strings(data)
table.accuracy = accuracy table.accuracy = accuracy
table.whitespace = whitespace table.whitespace = whitespace
table.order = table_idx + 1 table.order = table_idx + 1

View File

@ -8,7 +8,7 @@ import pandas as pd
from .base import BaseParser from .base import BaseParser
from ..core import Table from ..core import Table
from ..utils import (text_in_bbox, get_table_index, compute_accuracy, from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
count_empty, encode_) count_empty_strings, encode_)
logger = setup_logging(__name__) logger = setup_logging(__name__)
@ -207,7 +207,7 @@ class Stream(BaseParser):
table.df = pd.DataFrame(data) table.df = pd.DataFrame(data)
table.shape = table.df.shape table.shape = table.df.shape
whitespace, __, __ = count_empty(data) whitespace, __, __ = count_empty_strings(data)
table.accuracy = accuracy table.accuracy = accuracy
table.whitespace = whitespace table.whitespace = whitespace
table.order = table_idx + 1 table.order = table_idx + 1

View File

@ -287,7 +287,7 @@ def merge_close_values(ar, mtol=2):
return ret return ret
def flag_on_size(textline, direction): def flag_font_size(textline, direction):
""" """
Parameters Parameters
@ -381,7 +381,7 @@ def split_textline(table, textline, direction, flag_size=True):
grouped_chars = [] grouped_chars = []
for key, chars in groupby(cut_text, itemgetter(0, 1)): for key, chars in groupby(cut_text, itemgetter(0, 1)):
if flag_size: if flag_size:
grouped_chars.append((key[0], key[1], flag_on_size([t[2] for t in chars], direction))) grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
else: else:
gchars = [t[2].get_text() for t in chars] gchars = [t[2].get_text() for t in chars]
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n'))) grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
@ -444,7 +444,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
return split_textline(table, t, direction, flag_size=flag_size), error return split_textline(table, t, direction, flag_size=flag_size), error
else: else:
if flag_size: if flag_size:
return [(r_idx, c_idx, flag_on_size(t._objs, direction))], error return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
else: else:
return [(r_idx, c_idx, t.get_text().strip('\n'))], error return [(r_idx, c_idx, t.get_text().strip('\n'))], error
@ -474,27 +474,7 @@ def compute_accuracy(error_weights):
return score return score
def remove_empty(d): def count_empty_strings(d):
"""
Parameters
----------
d
Returns
-------
"""
for i, row in enumerate(d):
if row == [''] * len(row):
d.pop(i)
d = zip(*d)
d = [list(row) for row in d if any(row)]
d = zip(*d)
return d
def count_empty(d):
""" """
Parameters Parameters
@ -529,6 +509,26 @@ def count_empty(d):
return empty_p, r_nempty_cells, c_nempty_cells return empty_p, r_nempty_cells, c_nempty_cells
def remove_empty_strings(d):
"""
Parameters
----------
d
Returns
-------
"""
for i, row in enumerate(d):
if row == [''] * len(row):
d.pop(i)
d = zip(*d)
d = [list(row) for row in d if any(row)]
d = zip(*d)
return d
def encode_(ar): def encode_(ar):
""" """