Fix #192
parent
33cea45346
commit
5e71f0b0e6
|
|
@ -271,10 +271,11 @@ class Lattice(BaseParser):
|
||||||
tk, self.vertical_segments, self.horizontal_segments)
|
tk, self.vertical_segments, self.horizontal_segments)
|
||||||
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
|
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
|
||||||
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
|
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
|
||||||
self.t_bbox = t_bbox
|
|
||||||
|
|
||||||
for direction in self.t_bbox:
|
t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
|
||||||
self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0))
|
t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
|
||||||
|
|
||||||
|
self.t_bbox = t_bbox
|
||||||
|
|
||||||
cols, rows = zip(*self.table_bbox[tk])
|
cols, rows = zip(*self.table_bbox[tk])
|
||||||
cols, rows = list(cols), list(rows)
|
cols, rows = list(cols), list(rows)
|
||||||
|
|
|
||||||
|
|
@ -293,10 +293,11 @@ class Stream(BaseParser):
|
||||||
t_bbox = {}
|
t_bbox = {}
|
||||||
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
|
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
|
||||||
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
|
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
|
||||||
self.t_bbox = t_bbox
|
|
||||||
|
|
||||||
for direction in self.t_bbox:
|
t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
|
||||||
self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0))
|
t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
|
||||||
|
|
||||||
|
self.t_bbox = t_bbox
|
||||||
|
|
||||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||||
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
|
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
|
||||||
|
|
|
||||||
|
|
@ -344,9 +344,9 @@ def flag_font_size(textline, direction):
|
||||||
fchars = [t[0] for t in chars]
|
fchars = [t[0] for t in chars]
|
||||||
if ''.join(fchars).strip():
|
if ''.join(fchars).strip():
|
||||||
flist.append(''.join(fchars))
|
flist.append(''.join(fchars))
|
||||||
fstring = ''.join(flist).strip('\n')
|
fstring = ''.join(flist)
|
||||||
else:
|
else:
|
||||||
fstring = ''.join([t.get_text() for t in textline]).strip('\n')
|
fstring = ''.join([t.get_text() for t in textline])
|
||||||
return fstring
|
return fstring
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -419,7 +419,7 @@ def split_textline(table, textline, direction, flag_size=False):
|
||||||
grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
|
grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
|
||||||
else:
|
else:
|
||||||
gchars = [t[2].get_text() for t in chars]
|
gchars = [t[2].get_text() for t in chars]
|
||||||
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
|
grouped_chars.append((key[0], key[1], ''.join(gchars)))
|
||||||
return grouped_chars
|
return grouped_chars
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -500,7 +500,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False):
|
||||||
if flag_size:
|
if flag_size:
|
||||||
return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
|
return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
|
||||||
else:
|
else:
|
||||||
return [(r_idx, c_idx, t.get_text().strip('\n'))], error
|
return [(r_idx, c_idx, t.get_text())], error
|
||||||
|
|
||||||
|
|
||||||
def compute_accuracy(error_weights):
|
def compute_accuracy(error_weights):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue