pull/2/head
Vinayak Mehta 2018-12-13 12:50:30 +05:30
parent 33cea45346
commit 5e71f0b0e6
3 changed files with 12 additions and 10 deletions

View File

@ -271,10 +271,11 @@ class Lattice(BaseParser):
tk, self.vertical_segments, self.horizontal_segments) tk, self.vertical_segments, self.horizontal_segments)
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text) t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text) t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
self.t_bbox = t_bbox
for direction in self.t_bbox: t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0)) t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
self.t_bbox = t_bbox
cols, rows = zip(*self.table_bbox[tk]) cols, rows = zip(*self.table_bbox[tk])
cols, rows = list(cols), list(rows) cols, rows = list(cols), list(rows)

View File

@ -293,10 +293,11 @@ class Stream(BaseParser):
t_bbox = {} t_bbox = {}
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text) t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text) t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
self.t_bbox = t_bbox
for direction in self.t_bbox: t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0)) t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
self.t_bbox = t_bbox
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox) text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol) rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)

View File

@ -344,9 +344,9 @@ def flag_font_size(textline, direction):
fchars = [t[0] for t in chars] fchars = [t[0] for t in chars]
if ''.join(fchars).strip(): if ''.join(fchars).strip():
flist.append(''.join(fchars)) flist.append(''.join(fchars))
fstring = ''.join(flist).strip('\n') fstring = ''.join(flist)
else: else:
fstring = ''.join([t.get_text() for t in textline]).strip('\n') fstring = ''.join([t.get_text() for t in textline])
return fstring return fstring
@ -419,7 +419,7 @@ def split_textline(table, textline, direction, flag_size=False):
grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction))) grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
else: else:
gchars = [t[2].get_text() for t in chars] gchars = [t[2].get_text() for t in chars]
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n'))) grouped_chars.append((key[0], key[1], ''.join(gchars)))
return grouped_chars return grouped_chars
@ -500,7 +500,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False):
if flag_size: if flag_size:
return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
else: else:
return [(r_idx, c_idx, t.get_text().strip('\n'))], error return [(r_idx, c_idx, t.get_text())], error
def compute_accuracy(error_weights): def compute_accuracy(error_weights):