Fix #192
parent
33cea45346
commit
5e71f0b0e6
|
|
@ -271,10 +271,11 @@ class Lattice(BaseParser):
|
|||
tk, self.vertical_segments, self.horizontal_segments)
|
||||
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
|
||||
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
|
||||
self.t_bbox = t_bbox
|
||||
|
||||
for direction in self.t_bbox:
|
||||
self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0))
|
||||
t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
|
||||
t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
|
||||
|
||||
self.t_bbox = t_bbox
|
||||
|
||||
cols, rows = zip(*self.table_bbox[tk])
|
||||
cols, rows = list(cols), list(rows)
|
||||
|
|
|
|||
|
|
@ -293,10 +293,11 @@ class Stream(BaseParser):
|
|||
t_bbox = {}
|
||||
t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
|
||||
t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
|
||||
self.t_bbox = t_bbox
|
||||
|
||||
for direction in self.t_bbox:
|
||||
self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0))
|
||||
t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
|
||||
t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
|
||||
|
||||
self.t_bbox = t_bbox
|
||||
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
|
||||
|
|
|
|||
|
|
@ -344,9 +344,9 @@ def flag_font_size(textline, direction):
|
|||
fchars = [t[0] for t in chars]
|
||||
if ''.join(fchars).strip():
|
||||
flist.append(''.join(fchars))
|
||||
fstring = ''.join(flist).strip('\n')
|
||||
fstring = ''.join(flist)
|
||||
else:
|
||||
fstring = ''.join([t.get_text() for t in textline]).strip('\n')
|
||||
fstring = ''.join([t.get_text() for t in textline])
|
||||
return fstring
|
||||
|
||||
|
||||
|
|
@ -419,7 +419,7 @@ def split_textline(table, textline, direction, flag_size=False):
|
|||
grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
|
||||
else:
|
||||
gchars = [t[2].get_text() for t in chars]
|
||||
grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
|
||||
grouped_chars.append((key[0], key[1], ''.join(gchars)))
|
||||
return grouped_chars
|
||||
|
||||
|
||||
|
|
@ -500,7 +500,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False):
|
|||
if flag_size:
|
||||
return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
|
||||
else:
|
||||
return [(r_idx, c_idx, t.get_text().strip('\n'))], error
|
||||
return [(r_idx, c_idx, t.get_text())], error
|
||||
|
||||
|
||||
def compute_accuracy(error_weights):
|
||||
|
|
|
|||
Loading…
Reference in New Issue