diff --git a/HISTORY.md b/HISTORY.md index 878dc08..851a71d 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,7 @@ master **Improvements** * [#207](https://github.com/socialcopsdev/camelot/issues/207) Add a plot type for Stream text edges and detected table areas. [#224](https://github.com/socialcopsdev/camelot/pull/224) by Vinayak Mehta. +* [#204](https://github.com/socialcopsdev/camelot/issues/204) `suppress_warnings` is now called `suppress_stdout`. [#225](https://github.com/socialcopsdev/camelot/pull/225) by Vinayak Mehta. **Bugfixes** diff --git a/camelot/cli.py b/camelot/cli.py index 1b995aa..e978a3c 100644 --- a/camelot/cli.py +++ b/camelot/cli.py @@ -30,6 +30,7 @@ pass_config = click.make_pass_decorator(Config) @click.group() @click.version_option(version=__version__) +@click.option('-q', '--quiet', is_flag=False, help='Suppress logs and warnings.') @click.option('-p', '--pages', default='1', help='Comma-separated page numbers.' ' Example: 1,3,4 or 1,4-end.') @click.option('-pw', '--password', help='Password for decryption.') @@ -44,7 +45,6 @@ pass_config = click.make_pass_decorator(Config) ' font size. Useful to detect super/subscripts.') @click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1), help='PDFMiner char_margin, line_margin and word_margin.') -@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.') @click.pass_context def cli(ctx, *args, **kwargs): """Camelot: PDF Table Extraction for Humans""" @@ -96,7 +96,7 @@ def lattice(c, *args, **kwargs): output = conf.pop('output') f = conf.pop('format') compress = conf.pop('zip') - suppress_warnings = conf.pop('quiet') + quiet = conf.pop('quiet') plot_type = kwargs.pop('plot_type') filepath = kwargs.pop('filepath') kwargs.update(conf) @@ -117,7 +117,7 @@ def lattice(c, *args, **kwargs): raise click.UsageError('Please specify output file format using --format') tables = read_pdf(filepath, pages=pages, flavor='lattice', - suppress_warnings=suppress_warnings, **kwargs) + suppress_stdout=quiet, **kwargs) click.echo('Found {} tables'.format(tables.n)) if plot_type is not None: for table in tables: @@ -149,7 +149,7 @@ def stream(c, *args, **kwargs): output = conf.pop('output') f = conf.pop('format') compress = conf.pop('zip') - suppress_warnings = conf.pop('quiet') + quiet = conf.pop('quiet') plot_type = kwargs.pop('plot_type') filepath = kwargs.pop('filepath') kwargs.update(conf) @@ -169,7 +169,7 @@ def stream(c, *args, **kwargs): raise click.UsageError('Please specify output file format using --format') tables = read_pdf(filepath, pages=pages, flavor='stream', - suppress_warnings=suppress_warnings, **kwargs) + suppress_stdout=quiet, **kwargs) click.echo('Found {} tables'.format(tables.n)) if plot_type is not None: for table in tables: diff --git a/camelot/handlers.py b/camelot/handlers.py index 47070a1..a312131 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -125,7 +125,7 @@ class PDFHandler(object): with open(fpath, 'wb') as f: outfile.write(f) - def parse(self, flavor='lattice', **kwargs): + def parse(self, flavor='lattice', suppress_stdout=False, **kwargs): """Extracts tables by calling parser.get_tables on all single page PDFs. @@ -134,6 +134,8 @@ class PDFHandler(object): flavor : str (default: 'lattice') The parsing method to use ('lattice' or 'stream'). Lattice is used by default. + suppress_stdout : str (default: False) + Suppress logs and warnings. kwargs : dict See camelot.read_pdf kwargs. @@ -151,6 +153,6 @@ class PDFHandler(object): for p in self.pages] parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs) for p in pages: - t = parser.extract_tables(p) + t = parser.extract_tables(p, suppress_stdout=suppress_stdout) tables.extend(t) return TableList(tables) diff --git a/camelot/io.py b/camelot/io.py index 3766a7b..4b436ff 100644 --- a/camelot/io.py +++ b/camelot/io.py @@ -6,7 +6,7 @@ from .utils import validate_input, remove_extra def read_pdf(filepath, pages='1', password=None, flavor='lattice', - suppress_warnings=False, **kwargs): + suppress_stdout=False, **kwargs): """Read PDF and return extracted tables. Note: kwargs annotated with ^ can only be used with flavor='stream' @@ -24,8 +24,8 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice', flavor : str (default: 'lattice') The parsing method to use ('lattice' or 'stream'). Lattice is used by default. - suppress_warnings : bool, optional (default: False) - Prevent warnings from being emitted by Camelot. + suppress_stdout : bool, optional (default: True) + Print all logs and warnings. table_areas : list, optional (default: None) List of table area strings of the form x1,y1,x2,y2 where (x1, y1) -> left-top and (x2, y2) -> right-bottom @@ -92,11 +92,11 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice', " Use either 'lattice' or 'stream'") with warnings.catch_warnings(): - if suppress_warnings: + if suppress_stdout: warnings.simplefilter("ignore") validate_input(kwargs, flavor=flavor) p = PDFHandler(filepath, pages=pages, password=password) kwargs = remove_extra(kwargs, flavor=flavor) - tables = p.parse(flavor=flavor, **kwargs) + tables = p.parse(flavor=flavor, suppress_stdout=suppress_stdout, **kwargs) return tables diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 14d8f6c..cca6789 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -345,9 +345,10 @@ class Lattice(BaseParser): return table - def extract_tables(self, filename): + def extract_tables(self, filename, suppress_stdout=False): self._generate_layout(filename) - logger.info('Processing {}'.format(os.path.basename(self.rootname))) + if not suppress_stdout: + logger.info('Processing {}'.format(os.path.basename(self.rootname))) if not self.horizontal_text: warnings.warn("No tables found on {}".format( diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index b6785df..0fbae71 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -384,9 +384,10 @@ class Stream(BaseParser): return table - def extract_tables(self, filename): + def extract_tables(self, filename, suppress_stdout=False): self._generate_layout(filename) - logger.info('Processing {}'.format(os.path.basename(self.rootname))) + if not suppress_stdout: + logger.info('Processing {}'.format(os.path.basename(self.rootname))) if not self.horizontal_text: warnings.warn("No tables found on {}".format( diff --git a/docs/user/cli.rst b/docs/user/cli.rst index 384b985..81dd0bc 100644 --- a/docs/user/cli.rst +++ b/docs/user/cli.rst @@ -15,6 +15,7 @@ You can print the help for the interface by typing ``camelot --help`` in your fa Options: --version Show the version and exit. + -v, --verbose Verbose. -p, --pages TEXT Comma-separated page numbers. Example: 1,3,4 or 1,4-end. -pw, --password TEXT Password for decryption. @@ -28,7 +29,6 @@ You can print the help for the interface by typing ``camelot --help`` in your fa -M, --margins ... PDFMiner char_margin, line_margin and word_margin. - -q, --quiet Suppress warnings. --help Show this message and exit. Commands: diff --git a/tests/test_errors.py b/tests/test_errors.py index a52aae4..e0dc24a 100755 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -50,13 +50,25 @@ def test_no_tables_found(): assert str(e.value) == 'No tables found on page-1' +def test_no_tables_found_logs_suppressed(): + filename = os.path.join(testdir, 'foo.pdf') + with warnings.catch_warnings(): + # the test should fail if any warning is thrown + warnings.simplefilter('error') + try: + tables = camelot.read_pdf(filename, suppress_stdout=True) + except Warning as e: + warning_text = str(e) + pytest.fail('Unexpected warning: {}'.format(warning_text)) + + def test_no_tables_found_warnings_suppressed(): filename = os.path.join(testdir, 'blank.pdf') with warnings.catch_warnings(): # the test should fail if any warning is thrown warnings.simplefilter('error') try: - tables = camelot.read_pdf(filename, suppress_warnings=True) + tables = camelot.read_pdf(filename, suppress_stdout=True) except Warning as e: warning_text = str(e) pytest.fail('Unexpected warning: {}'.format(warning_text))