[MRG + 1] Add suppress_warnings flag (#155)
* Add suppress_warnings flag * Add --quiet flag to cli (to suppress warnings) * Remove TODO and update commentpull/2/head
parent
1d064adc3e
commit
3def4a5aea
|
|
@ -38,6 +38,7 @@ pass_config = click.make_pass_decorator(Config)
|
||||||
' font size. Useful to detect super/subscripts.')
|
' font size. Useful to detect super/subscripts.')
|
||||||
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
|
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
|
||||||
help='PDFMiner char_margin, line_margin and word_margin.')
|
help='PDFMiner char_margin, line_margin and word_margin.')
|
||||||
|
@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.')
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def cli(ctx, *args, **kwargs):
|
def cli(ctx, *args, **kwargs):
|
||||||
"""Camelot: PDF Table Extraction for Humans"""
|
"""Camelot: PDF Table Extraction for Humans"""
|
||||||
|
|
@ -89,6 +90,7 @@ def lattice(c, *args, **kwargs):
|
||||||
output = conf.pop('output')
|
output = conf.pop('output')
|
||||||
f = conf.pop('format')
|
f = conf.pop('format')
|
||||||
compress = conf.pop('zip')
|
compress = conf.pop('zip')
|
||||||
|
suppress_warnings = conf.pop('quiet')
|
||||||
plot_type = kwargs.pop('plot_type')
|
plot_type = kwargs.pop('plot_type')
|
||||||
filepath = kwargs.pop('filepath')
|
filepath = kwargs.pop('filepath')
|
||||||
kwargs.update(conf)
|
kwargs.update(conf)
|
||||||
|
|
@ -99,7 +101,8 @@ def lattice(c, *args, **kwargs):
|
||||||
kwargs['copy_text'] = None if not copy_text else copy_text
|
kwargs['copy_text'] = None if not copy_text else copy_text
|
||||||
kwargs['shift_text'] = list(kwargs['shift_text'])
|
kwargs['shift_text'] = list(kwargs['shift_text'])
|
||||||
|
|
||||||
tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs)
|
tables = read_pdf(filepath, pages=pages, flavor='lattice',
|
||||||
|
suppress_warnings=suppress_warnings, **kwargs)
|
||||||
click.echo('Found {} tables'.format(tables.n))
|
click.echo('Found {} tables'.format(tables.n))
|
||||||
if plot_type is not None:
|
if plot_type is not None:
|
||||||
for table in tables:
|
for table in tables:
|
||||||
|
|
@ -134,6 +137,7 @@ def stream(c, *args, **kwargs):
|
||||||
output = conf.pop('output')
|
output = conf.pop('output')
|
||||||
f = conf.pop('format')
|
f = conf.pop('format')
|
||||||
compress = conf.pop('zip')
|
compress = conf.pop('zip')
|
||||||
|
suppress_warnings = conf.pop('quiet')
|
||||||
plot_type = kwargs.pop('plot_type')
|
plot_type = kwargs.pop('plot_type')
|
||||||
filepath = kwargs.pop('filepath')
|
filepath = kwargs.pop('filepath')
|
||||||
kwargs.update(conf)
|
kwargs.update(conf)
|
||||||
|
|
@ -143,7 +147,8 @@ def stream(c, *args, **kwargs):
|
||||||
columns = list(kwargs['columns'])
|
columns = list(kwargs['columns'])
|
||||||
kwargs['columns'] = None if not columns else columns
|
kwargs['columns'] = None if not columns else columns
|
||||||
|
|
||||||
tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs)
|
tables = read_pdf(filepath, pages=pages, flavor='stream',
|
||||||
|
suppress_warnings=suppress_warnings, **kwargs)
|
||||||
click.echo('Found {} tables'.format(tables.n))
|
click.echo('Found {} tables'.format(tables.n))
|
||||||
if plot_type is not None:
|
if plot_type is not None:
|
||||||
for table in tables:
|
for table in tables:
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,12 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
import warnings
|
||||||
|
|
||||||
from .handlers import PDFHandler
|
from .handlers import PDFHandler
|
||||||
from .utils import validate_input, remove_extra
|
from .utils import validate_input, remove_extra
|
||||||
|
|
||||||
|
|
||||||
def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
|
def read_pdf(filepath, pages='1', flavor='lattice', suppress_warnings=False,
|
||||||
|
**kwargs):
|
||||||
"""Read PDF and return extracted tables.
|
"""Read PDF and return extracted tables.
|
||||||
|
|
||||||
Note: kwargs annotated with ^ can only be used with flavor='stream'
|
Note: kwargs annotated with ^ can only be used with flavor='stream'
|
||||||
|
|
@ -20,6 +22,8 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
|
||||||
flavor : str (default: 'lattice')
|
flavor : str (default: 'lattice')
|
||||||
The parsing method to use ('lattice' or 'stream').
|
The parsing method to use ('lattice' or 'stream').
|
||||||
Lattice is used by default.
|
Lattice is used by default.
|
||||||
|
suppress_warnings : bool, optional (default: False)
|
||||||
|
Prevent warnings from being emitted by Camelot.
|
||||||
table_area : list, optional (default: None)
|
table_area : list, optional (default: None)
|
||||||
List of table area strings of the form x1,y1,x2,y2
|
List of table area strings of the form x1,y1,x2,y2
|
||||||
where (x1, y1) -> left-top and (x2, y2) -> right-bottom
|
where (x1, y1) -> left-top and (x2, y2) -> right-bottom
|
||||||
|
|
@ -85,6 +89,10 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
|
||||||
raise NotImplementedError("Unknown flavor specified."
|
raise NotImplementedError("Unknown flavor specified."
|
||||||
" Use either 'lattice' or 'stream'")
|
" Use either 'lattice' or 'stream'")
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
if suppress_warnings:
|
||||||
|
warnings.simplefilter("ignore")
|
||||||
|
|
||||||
validate_input(kwargs, flavor=flavor)
|
validate_input(kwargs, flavor=flavor)
|
||||||
p = PDFHandler(filepath, pages)
|
p = PDFHandler(filepath, pages)
|
||||||
kwargs = remove_extra(kwargs, flavor=flavor)
|
kwargs = remove_extra(kwargs, flavor=flavor)
|
||||||
|
|
|
||||||
|
|
@ -77,3 +77,17 @@ def test_cli_output_format():
|
||||||
result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
|
result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
|
||||||
'stream', infile])
|
'stream', infile])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
def test_cli_quiet_flag():
|
||||||
|
with TemporaryDirectory() as tempdir:
|
||||||
|
infile = os.path.join(testdir, 'blank.pdf')
|
||||||
|
outfile = os.path.join(tempdir, 'blank.csv')
|
||||||
|
runner = CliRunner()
|
||||||
|
|
||||||
|
result = runner.invoke(cli, ['--format', 'csv', '--output', outfile,
|
||||||
|
'stream', infile])
|
||||||
|
assert 'No tables found on page-1' in result.output
|
||||||
|
|
||||||
|
result = runner.invoke(cli, ['--quiet', '--format', 'csv',
|
||||||
|
'--output', outfile, 'stream', infile])
|
||||||
|
assert 'No tables found on page-1' not in result.output
|
||||||
|
|
|
||||||
|
|
@ -43,11 +43,20 @@ def test_stream_equal_length():
|
||||||
|
|
||||||
def test_no_tables_found():
|
def test_no_tables_found():
|
||||||
filename = os.path.join(testdir, 'blank.pdf')
|
filename = os.path.join(testdir, 'blank.pdf')
|
||||||
# TODO: use pytest.warns
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter('error')
|
warnings.simplefilter('error')
|
||||||
try:
|
with pytest.raises(UserWarning) as e:
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
except Exception as e:
|
assert str(e.value) == 'No tables found on page-1'
|
||||||
assert type(e).__name__ == 'UserWarning'
|
|
||||||
assert str(e) == 'No tables found on page-1'
|
|
||||||
|
def test_no_tables_found_warnings_supressed():
|
||||||
|
filename = os.path.join(testdir, 'blank.pdf')
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
# the test should fail if any warning is thrown
|
||||||
|
warnings.simplefilter('error')
|
||||||
|
try:
|
||||||
|
tables = camelot.read_pdf(filename, suppress_warnings=True)
|
||||||
|
except Warning as e:
|
||||||
|
warning_text = str(e)
|
||||||
|
pytest.fail('Unexpected warning: {}'.format(warning_text))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue