Merge branch 'master' into fix-split-bug

pull/2/head
Vinayak Mehta 2019-04-20 21:06:47 +05:30 committed by GitHub
commit 355ae818a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 1 deletions

View File

@ -7,6 +7,7 @@ master
**Bugfixes** **Bugfixes**
* [#293](https://github.com/socialcopsdev/camelot/issues/293) Split text ignores all text to the right of last cut. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta. * [#293](https://github.com/socialcopsdev/camelot/issues/293) Split text ignores all text to the right of last cut. [#294](https://github.com/socialcopsdev/camelot/pull/294) by Vinayak Mehta.
* [#277](https://github.com/socialcopsdev/camelot/issues/277) Sort TableList by order of tables in PDF. [#283](https://github.com/socialcopsdev/camelot/pull/283) by [Sym Roe](https://github.com/symroe).
0.7.2 (2019-01-10) 0.7.2 (2019-01-10)
------------------ ------------------

View File

@ -329,6 +329,13 @@ class Table(object):
def __repr__(self): def __repr__(self):
return '<{} shape={}>'.format(self.__class__.__name__, self.shape) return '<{} shape={}>'.format(self.__class__.__name__, self.shape)
def __lt__(self, other):
if self.page == other.page:
if self.order < other.order:
return True
if self.page < other.page:
return True
@property @property
def data(self): def data(self):
"""Returns two-dimensional list of strings in table. """Returns two-dimensional list of strings in table.

View File

@ -161,4 +161,4 @@ class PDFHandler(object):
t = parser.extract_tables(p, suppress_stdout=suppress_stdout, t = parser.extract_tables(p, suppress_stdout=suppress_stdout,
layout_kwargs=layout_kwargs) layout_kwargs=layout_kwargs)
tables.extend(t) tables.extend(t)
return TableList(tables) return TableList(sorted(tables))

View File

@ -5,6 +5,7 @@ import os
import pandas as pd import pandas as pd
import camelot import camelot
from camelot.core import Table, TableList
from .data import * from .data import *
@ -247,3 +248,28 @@ def test_arabic():
filename = os.path.join(testdir, "tabula/arabic.pdf") filename = os.path.join(testdir, "tabula/arabic.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
assert df.equals(tables[0].df) assert df.equals(tables[0].df)
def test_table_order():
def _make_table(page, order):
t = Table([], [])
t.page = page
t.order = order
return t
table_list = TableList(
[_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
)
assert [(t.page, t.order) for t in sorted(table_list)] == [
(1, 1),
(1, 2),
(2, 1),
(3, 4),
]
assert [(t.page, t.order) for t in sorted(table_list, reverse=True)] == [
(3, 4),
(2, 1),
(1, 2),
(1, 1),
]