Merge pull request #189 from camelot-dev/fix-179
[MRG] Prevent taking max of an empty setpull/196/head
commit
937185412a
|
|
@ -121,6 +121,7 @@ class Stream(BaseParser):
|
|||
row_y = 0
|
||||
rows = []
|
||||
temp = []
|
||||
|
||||
for t in text:
|
||||
# is checking for upright necessary?
|
||||
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
||||
|
|
@ -131,7 +132,9 @@ class Stream(BaseParser):
|
|||
temp = []
|
||||
row_y = t.y0
|
||||
temp.append(t)
|
||||
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
if len(rows) > 1:
|
||||
__ = rows.pop(0) # TODO: hacky
|
||||
return rows
|
||||
|
||||
|
|
@ -345,6 +348,9 @@ class Stream(BaseParser):
|
|||
else:
|
||||
# calculate mode of the list of number of elements in
|
||||
# each row to guess the number of columns
|
||||
if not len(elements):
|
||||
cols = [(text_x_min, text_x_max)]
|
||||
else:
|
||||
ncols = max(set(elements), key=elements.count)
|
||||
if ncols == 1:
|
||||
# if mode is 1, the page usually contains not tables
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -160,8 +160,8 @@ def test_cli_output_format():
|
|||
|
||||
def test_cli_quiet():
|
||||
with TemporaryDirectory() as tempdir:
|
||||
infile = os.path.join(testdir, "blank.pdf")
|
||||
outfile = os.path.join(tempdir, "blank.csv")
|
||||
infile = os.path.join(testdir, "empty.pdf")
|
||||
outfile = os.path.join(tempdir, "empty.csv")
|
||||
runner = CliRunner()
|
||||
|
||||
result = runner.invoke(
|
||||
|
|
|
|||
|
|
@ -55,15 +55,33 @@ def test_image_warning():
|
|||
)
|
||||
|
||||
|
||||
def test_no_tables_found():
|
||||
filename = os.path.join(testdir, "blank.pdf")
|
||||
def test_lattice_no_tables_on_page():
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename)
|
||||
tables = camelot.read_pdf(filename, flavor="lattice")
|
||||
assert str(e.value) == "No tables found on page-1"
|
||||
|
||||
|
||||
def test_stream_no_tables_on_page():
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert str(e.value) == "No tables found on page-1"
|
||||
|
||||
|
||||
def test_stream_no_tables_in_area():
|
||||
filename = os.path.join(testdir, "only_page_number.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert str(e.value) == "No tables found in table area 1"
|
||||
|
||||
|
||||
def test_no_tables_found_logs_suppressed():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
with warnings.catch_warnings():
|
||||
|
|
@ -77,7 +95,7 @@ def test_no_tables_found_logs_suppressed():
|
|||
|
||||
|
||||
def test_no_tables_found_warnings_suppressed():
|
||||
filename = os.path.join(testdir, "blank.pdf")
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
# the test should fail if any warning is thrown
|
||||
warnings.simplefilter("error")
|
||||
|
|
|
|||
Loading…
Reference in New Issue