From 5e71f0b0e6a3aec75bf4d2b17010a8707feccbc7 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Thu, 13 Dec 2018 12:50:30 +0530 Subject: [PATCH] Fix #192 --- camelot/parsers/lattice.py | 7 ++++--- camelot/parsers/stream.py | 7 ++++--- camelot/utils.py | 8 ++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 14430c8..8a85d1d 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -271,10 +271,11 @@ class Lattice(BaseParser): tk, self.vertical_segments, self.horizontal_segments) t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text) t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text) - self.t_bbox = t_bbox - for direction in self.t_bbox: - self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0)) + t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0)) + t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0)) + + self.t_bbox = t_bbox cols, rows = zip(*self.table_bbox[tk]) cols, rows = list(cols), list(rows) diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index d9481cc..eab8276 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -293,10 +293,11 @@ class Stream(BaseParser): t_bbox = {} t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text) t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text) - self.t_bbox = t_bbox - for direction in self.t_bbox: - self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0)) + t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0)) + t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0)) + + self.t_bbox = t_bbox text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox) rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol) diff --git a/camelot/utils.py b/camelot/utils.py index 2d735c8..cd55e4e 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -344,9 +344,9 @@ def flag_font_size(textline, direction): fchars = [t[0] for t in chars] if ''.join(fchars).strip(): flist.append(''.join(fchars)) - fstring = ''.join(flist).strip('\n') + fstring = ''.join(flist) else: - fstring = ''.join([t.get_text() for t in textline]).strip('\n') + fstring = ''.join([t.get_text() for t in textline]) return fstring @@ -419,7 +419,7 @@ def split_textline(table, textline, direction, flag_size=False): grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction))) else: gchars = [t[2].get_text() for t in chars] - grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n'))) + grouped_chars.append((key[0], key[1], ''.join(gchars))) return grouped_chars @@ -500,7 +500,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False): if flag_size: return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error else: - return [(r_idx, c_idx, t.get_text().strip('\n'))], error + return [(r_idx, c_idx, t.get_text())], error def compute_accuracy(error_weights):