From ce727d955877186f871751c4adf33508f3208753 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Fri, 22 Mar 2019 02:28:29 +0530 Subject: [PATCH 1/3] Fix split text bug --- camelot/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/camelot/utils.py b/camelot/utils.py index 7b22307..48e39af 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -479,6 +479,10 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=''): (obj.x0 + obj.x1) / 2 <= cut[1]): cut_text.append((r, cut[0], obj)) break + else: + # TODO: add test + if cut == x_cuts[-1]: + cut_text.append((r, cut[0] + 1, obj)) elif isinstance(obj, LTAnno): cut_text.append((r, cut[0], obj)) elif direction == 'vertical' and not textline.is_empty(): @@ -496,6 +500,10 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=''): (obj.y0 + obj.y1) / 2 >= cut[1]): cut_text.append((cut[0], c, obj)) break + else: + # TODO: add test + if cut == y_cuts[-1]: + cut_text.append((cut[0] - 1, c, obj)) elif isinstance(obj, LTAnno): cut_text.append((cut[0], c, obj)) except IndexError: From 3071548898bef381253f3b8d9517d6a247bf33e9 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sat, 20 Apr 2019 21:04:09 +0530 Subject: [PATCH 2/3] Update HISTORY.md --- HISTORY.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 2dd00f5..aee5203 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,12 +4,17 @@ Release History master ------ +**Bugfixes** + +* Fix split text bug. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta. + * When a text string spans multiple columns, the last split chunk is completely ignored. + 0.7.2 (2019-01-10) ------------------ - **Bugfixes** +**Bugfixes** - * [#245](https://github.com/socialcopsdev/camelot/issues/245) Fix AttributeError for encrypted files. [#251](https://github.com/socialcopsdev/camelot/pull/251) by Yatin Taluja. +* [#245](https://github.com/socialcopsdev/camelot/issues/245) Fix AttributeError for encrypted files. [#251](https://github.com/socialcopsdev/camelot/pull/251) by Yatin Taluja. 0.7.1 (2019-01-06) From 7ff8b5b89c4bc100bed65eec06a22ed25273a919 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sat, 20 Apr 2019 21:05:05 +0530 Subject: [PATCH 3/3] Update HISTORY.md --- HISTORY.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index aee5203..aa5a1c1 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,8 +6,7 @@ master **Bugfixes** -* Fix split text bug. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta. - * When a text string spans multiple columns, the last split chunk is completely ignored. +* [#293](https://github.com/socialcopsdev/camelot/issues/293) Split text ignores all text to the right of last cut. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta. 0.7.2 (2019-01-10) ------------------