From c019e582bfb4daf01cfb908c0d0e0bd6fb75259a Mon Sep 17 00:00:00 2001 From: Sym Roe Date: Mon, 25 Feb 2019 09:20:09 +0000 Subject: [PATCH 1/4] Add __lt__ to Table to allow sorting Refs #277 --- camelot/core.py | 7 +++++++ tests/test_common.py | 26 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/camelot/core.py b/camelot/core.py index e82a11f..63425cc 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -329,6 +329,13 @@ class Table(object): def __repr__(self): return '<{} shape={}>'.format(self.__class__.__name__, self.shape) + def __lt__(self, other): + if self.page == other.page: + if self.order < other.order: + return True + if self.page < other.page: + return True + @property def data(self): """Returns two-dimensional list of strings in table. diff --git a/tests/test_common.py b/tests/test_common.py index 2335060..fb68bc2 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -5,6 +5,7 @@ import os import pandas as pd import camelot +from camelot.core import Table, TableList from .data import * @@ -247,3 +248,28 @@ def test_arabic(): filename = os.path.join(testdir, "tabula/arabic.pdf") tables = camelot.read_pdf(filename) assert df.equals(tables[0].df) + + +def test_table_order(): + def _mk_table(page, order): + t = Table([], []) + t.page = page + t.order = order + return t + + table_list = TableList( + [_mk_table(2, 1), _mk_table(1, 1), _mk_table(3, 4), _mk_table(1, 2)] + ) + + assert [(t.page, t.order) for t in sorted(table_list)] == [ + (1, 1), + (1, 2), + (2, 1), + (3, 4), + ] + assert [(t.page, t.order) for t in sorted(table_list, reverse=True)] == [ + (3, 4), + (2, 1), + (1, 2), + (1, 1), + ] From 8446271aa44ee4a4737b4042e4ecf23eef7007ae Mon Sep 17 00:00:00 2001 From: Sym Roe Date: Mon, 25 Feb 2019 09:22:25 +0000 Subject: [PATCH 2/4] Always sort TableList after reading PDF --- camelot/handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/camelot/handlers.py b/camelot/handlers.py index bd4c031..d773e4a 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -161,4 +161,4 @@ class PDFHandler(object): t = parser.extract_tables(p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs) tables.extend(t) - return TableList(tables) + return TableList(sorted(tables)) From 88466b8c4ef47c2170769111523cff3a9c7262eb Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Fri, 8 Mar 2019 21:04:34 +0530 Subject: [PATCH 3/4] Rename _mk_table to _make_table --- tests/test_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_common.py b/tests/test_common.py index fb68bc2..c04a151 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -251,14 +251,14 @@ def test_arabic(): def test_table_order(): - def _mk_table(page, order): + def _make_table(page, order): t = Table([], []) t.page = page t.order = order return t table_list = TableList( - [_mk_table(2, 1), _mk_table(1, 1), _mk_table(3, 4), _mk_table(1, 2)] + [_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)] ) assert [(t.page, t.order) for t in sorted(table_list)] == [ From ecf6febaa72f93fed58807c8f6751358b3e3691d Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Fri, 8 Mar 2019 21:20:43 +0530 Subject: [PATCH 4/4] Update HISTORY.md --- HISTORY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 2dd00f5..88bb661 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,8 @@ Release History master ------ +* [#277](https://github.com/socialcopsdev/camelot/issues/277) Sort TableList by order of tables in PDF. [#283](https://github.com/socialcopsdev/camelot/pull/283) by [Sym Roe](https://github.com/symroe). + 0.7.2 (2019-01-10) ------------------