Merge pull request #283 from symroe/277_table_sorting
[MRG] Sort TableList by order of tables in PDFpull/2/head
commit
a5343dcc25
|
|
@ -329,6 +329,13 @@ class Table(object):
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return '<{} shape={}>'.format(self.__class__.__name__, self.shape)
|
return '<{} shape={}>'.format(self.__class__.__name__, self.shape)
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
if self.page == other.page:
|
||||||
|
if self.order < other.order:
|
||||||
|
return True
|
||||||
|
if self.page < other.page:
|
||||||
|
return True
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self):
|
def data(self):
|
||||||
"""Returns two-dimensional list of strings in table.
|
"""Returns two-dimensional list of strings in table.
|
||||||
|
|
|
||||||
|
|
@ -161,4 +161,4 @@ class PDFHandler(object):
|
||||||
t = parser.extract_tables(p, suppress_stdout=suppress_stdout,
|
t = parser.extract_tables(p, suppress_stdout=suppress_stdout,
|
||||||
layout_kwargs=layout_kwargs)
|
layout_kwargs=layout_kwargs)
|
||||||
tables.extend(t)
|
tables.extend(t)
|
||||||
return TableList(tables)
|
return TableList(sorted(tables))
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
import camelot
|
import camelot
|
||||||
|
from camelot.core import Table, TableList
|
||||||
|
|
||||||
from .data import *
|
from .data import *
|
||||||
|
|
||||||
|
|
@ -247,3 +248,28 @@ def test_arabic():
|
||||||
filename = os.path.join(testdir, "tabula/arabic.pdf")
|
filename = os.path.join(testdir, "tabula/arabic.pdf")
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
assert df.equals(tables[0].df)
|
assert df.equals(tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
def test_table_order():
|
||||||
|
def _make_table(page, order):
|
||||||
|
t = Table([], [])
|
||||||
|
t.page = page
|
||||||
|
t.order = order
|
||||||
|
return t
|
||||||
|
|
||||||
|
table_list = TableList(
|
||||||
|
[_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [(t.page, t.order) for t in sorted(table_list)] == [
|
||||||
|
(1, 1),
|
||||||
|
(1, 2),
|
||||||
|
(2, 1),
|
||||||
|
(3, 4),
|
||||||
|
]
|
||||||
|
assert [(t.page, t.order) for t in sorted(table_list, reverse=True)] == [
|
||||||
|
(3, 4),
|
||||||
|
(2, 1),
|
||||||
|
(1, 2),
|
||||||
|
(1, 1),
|
||||||
|
]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue