From 3def4a5aea1c9309b6d98614a12aa3f788271e64 Mon Sep 17 00:00:00 2001 From: Jonathan Lloyd Date: Fri, 19 Oct 2018 12:25:00 +0100 Subject: [PATCH] [MRG + 1] Add suppress_warnings flag (#155) * Add suppress_warnings flag * Add --quiet flag to cli (to suppress warnings) * Remove TODO and update comment --- camelot/cli.py | 9 +++++++-- camelot/io.py | 20 ++++++++++++++------ tests/test_cli.py | 14 ++++++++++++++ tests/test_common.py | 2 +- tests/test_errors.py | 19 ++++++++++++++----- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/camelot/cli.py b/camelot/cli.py index 6a7a08b..e400204 100644 --- a/camelot/cli.py +++ b/camelot/cli.py @@ -38,6 +38,7 @@ pass_config = click.make_pass_decorator(Config) ' font size. Useful to detect super/subscripts.') @click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1), help='PDFMiner char_margin, line_margin and word_margin.') +@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.') @click.pass_context def cli(ctx, *args, **kwargs): """Camelot: PDF Table Extraction for Humans""" @@ -89,6 +90,7 @@ def lattice(c, *args, **kwargs): output = conf.pop('output') f = conf.pop('format') compress = conf.pop('zip') + suppress_warnings = conf.pop('quiet') plot_type = kwargs.pop('plot_type') filepath = kwargs.pop('filepath') kwargs.update(conf) @@ -99,7 +101,8 @@ def lattice(c, *args, **kwargs): kwargs['copy_text'] = None if not copy_text else copy_text kwargs['shift_text'] = list(kwargs['shift_text']) - tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs) + tables = read_pdf(filepath, pages=pages, flavor='lattice', + suppress_warnings=suppress_warnings, **kwargs) click.echo('Found {} tables'.format(tables.n)) if plot_type is not None: for table in tables: @@ -134,6 +137,7 @@ def stream(c, *args, **kwargs): output = conf.pop('output') f = conf.pop('format') compress = conf.pop('zip') + suppress_warnings = conf.pop('quiet') plot_type = kwargs.pop('plot_type') filepath = kwargs.pop('filepath') kwargs.update(conf) @@ -143,7 +147,8 @@ def stream(c, *args, **kwargs): columns = list(kwargs['columns']) kwargs['columns'] = None if not columns else columns - tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs) + tables = read_pdf(filepath, pages=pages, flavor='stream', + suppress_warnings=suppress_warnings, **kwargs) click.echo('Found {} tables'.format(tables.n)) if plot_type is not None: for table in tables: diff --git a/camelot/io.py b/camelot/io.py index 90adc96..5cdb542 100644 --- a/camelot/io.py +++ b/camelot/io.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- +import warnings from .handlers import PDFHandler from .utils import validate_input, remove_extra -def read_pdf(filepath, pages='1', flavor='lattice', **kwargs): +def read_pdf(filepath, pages='1', flavor='lattice', suppress_warnings=False, + **kwargs): """Read PDF and return extracted tables. Note: kwargs annotated with ^ can only be used with flavor='stream' @@ -20,6 +22,8 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs): flavor : str (default: 'lattice') The parsing method to use ('lattice' or 'stream'). Lattice is used by default. + suppress_warnings : bool, optional (default: False) + Prevent warnings from being emitted by Camelot. table_area : list, optional (default: None) List of table area strings of the form x1,y1,x2,y2 where (x1, y1) -> left-top and (x2, y2) -> right-bottom @@ -85,8 +89,12 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs): raise NotImplementedError("Unknown flavor specified." " Use either 'lattice' or 'stream'") - validate_input(kwargs, flavor=flavor) - p = PDFHandler(filepath, pages) - kwargs = remove_extra(kwargs, flavor=flavor) - tables = p.parse(flavor=flavor, **kwargs) - return tables + with warnings.catch_warnings(): + if suppress_warnings: + warnings.simplefilter("ignore") + + validate_input(kwargs, flavor=flavor) + p = PDFHandler(filepath, pages) + kwargs = remove_extra(kwargs, flavor=flavor) + tables = p.parse(flavor=flavor, **kwargs) + return tables diff --git a/tests/test_cli.py b/tests/test_cli.py index b89eab3..4797eae 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -77,3 +77,17 @@ def test_cli_output_format(): result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'), 'stream', infile]) assert result.exit_code == 0 + +def test_cli_quiet_flag(): + with TemporaryDirectory() as tempdir: + infile = os.path.join(testdir, 'blank.pdf') + outfile = os.path.join(tempdir, 'blank.csv') + runner = CliRunner() + + result = runner.invoke(cli, ['--format', 'csv', '--output', outfile, + 'stream', infile]) + assert 'No tables found on page-1' in result.output + + result = runner.invoke(cli, ['--quiet', '--format', 'csv', + '--output', outfile, 'stream', infile]) + assert 'No tables found on page-1' not in result.output diff --git a/tests/test_common.py b/tests/test_common.py index e872fb8..2e7c24e 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -139,7 +139,7 @@ def test_lattice_shift_text(): tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b']) assert df_rb.equals(tables[0].df) - + def test_repr(): filename = os.path.join(testdir, "foo.pdf") tables = camelot.read_pdf(filename) diff --git a/tests/test_errors.py b/tests/test_errors.py index a6ac35a..86e9e5c 100755 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -43,11 +43,20 @@ def test_stream_equal_length(): def test_no_tables_found(): filename = os.path.join(testdir, 'blank.pdf') - # TODO: use pytest.warns with warnings.catch_warnings(): warnings.simplefilter('error') - try: + with pytest.raises(UserWarning) as e: tables = camelot.read_pdf(filename) - except Exception as e: - assert type(e).__name__ == 'UserWarning' - assert str(e) == 'No tables found on page-1' + assert str(e.value) == 'No tables found on page-1' + + +def test_no_tables_found_warnings_supressed(): + filename = os.path.join(testdir, 'blank.pdf') + with warnings.catch_warnings(): + # the test should fail if any warning is thrown + warnings.simplefilter('error') + try: + tables = camelot.read_pdf(filename, suppress_warnings=True) + except Warning as e: + warning_text = str(e) + pytest.fail('Unexpected warning: {}'.format(warning_text))