Merge pull request #283 from symroe/277_table_sorting

[MRG] Sort TableList by order of tables in PDF
pull/2/head
Vinayak Mehta 2019-03-08 21:18:51 +05:30 committed by GitHub
commit a5343dcc25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 1 deletions

View File

@ -329,6 +329,13 @@ class Table(object):
def __repr__(self):
return '<{} shape={}>'.format(self.__class__.__name__, self.shape)
def __lt__(self, other):
if self.page == other.page:
if self.order < other.order:
return True
if self.page < other.page:
return True
@property
def data(self):
"""Returns two-dimensional list of strings in table.

View File

@ -161,4 +161,4 @@ class PDFHandler(object):
t = parser.extract_tables(p, suppress_stdout=suppress_stdout,
layout_kwargs=layout_kwargs)
tables.extend(t)
return TableList(tables)
return TableList(sorted(tables))

View File

@ -5,6 +5,7 @@ import os
import pandas as pd
import camelot
from camelot.core import Table, TableList
from .data import *
@ -247,3 +248,28 @@ def test_arabic():
filename = os.path.join(testdir, "tabula/arabic.pdf")
tables = camelot.read_pdf(filename)
assert df.equals(tables[0].df)
def test_table_order():
def _make_table(page, order):
t = Table([], [])
t.page = page
t.order = order
return t
table_list = TableList(
[_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
)
assert [(t.page, t.order) for t in sorted(table_list)] == [
(1, 1),
(1, 2),
(2, 1),
(3, 4),
]
assert [(t.page, t.order) for t in sorted(table_list, reverse=True)] == [
(3, 4),
(2, 1),
(1, 2),
(1, 1),
]