diff --git a/camelot/core.py b/camelot/core.py index e82a11f..63425cc 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -329,6 +329,13 @@ class Table(object): def __repr__(self): return '<{} shape={}>'.format(self.__class__.__name__, self.shape) + def __lt__(self, other): + if self.page == other.page: + if self.order < other.order: + return True + if self.page < other.page: + return True + @property def data(self): """Returns two-dimensional list of strings in table. diff --git a/camelot/handlers.py b/camelot/handlers.py index bd4c031..d773e4a 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -161,4 +161,4 @@ class PDFHandler(object): t = parser.extract_tables(p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs) tables.extend(t) - return TableList(tables) + return TableList(sorted(tables)) diff --git a/tests/test_common.py b/tests/test_common.py index 2335060..c04a151 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -5,6 +5,7 @@ import os import pandas as pd import camelot +from camelot.core import Table, TableList from .data import * @@ -247,3 +248,28 @@ def test_arabic(): filename = os.path.join(testdir, "tabula/arabic.pdf") tables = camelot.read_pdf(filename) assert df.equals(tables[0].df) + + +def test_table_order(): + def _make_table(page, order): + t = Table([], []) + t.page = page + t.order = order + return t + + table_list = TableList( + [_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)] + ) + + assert [(t.page, t.order) for t in sorted(table_list)] == [ + (1, 1), + (1, 2), + (2, 1), + (3, 4), + ] + assert [(t.page, t.order) for t in sorted(table_list, reverse=True)] == [ + (3, 4), + (2, 1), + (1, 2), + (1, 1), + ]