Change kwarg name
parent
de0079a711
commit
591cfd5291
|
|
@ -7,7 +7,7 @@ master
|
||||||
**Improvements**
|
**Improvements**
|
||||||
|
|
||||||
* [#207](https://github.com/socialcopsdev/camelot/issues/207) Add a plot type for Stream text edges and detected table areas. [#224](https://github.com/socialcopsdev/camelot/pull/224) by Vinayak Mehta.
|
* [#207](https://github.com/socialcopsdev/camelot/issues/207) Add a plot type for Stream text edges and detected table areas. [#224](https://github.com/socialcopsdev/camelot/pull/224) by Vinayak Mehta.
|
||||||
* [#204](https://github.com/socialcopsdev/camelot/issues/204) `suppress_warnings` is now called `verbose`. By default, all logs and warnings will be printed. [#225](https://github.com/socialcopsdev/camelot/pull/225) by Vinayak Mehta.
|
* [#204](https://github.com/socialcopsdev/camelot/issues/204) `suppress_warnings` is now called `suppress_stdout`. [#225](https://github.com/socialcopsdev/camelot/pull/225) by Vinayak Mehta.
|
||||||
|
|
||||||
**Bugfixes**
|
**Bugfixes**
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ pass_config = click.make_pass_decorator(Config)
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@click.version_option(version=__version__)
|
@click.version_option(version=__version__)
|
||||||
@click.option('-v', '--verbose', is_flag=True, help='Verbose.')
|
@click.option('-q', '--quiet', is_flag=False, help='Suppress logs and warnings.')
|
||||||
@click.option('-p', '--pages', default='1', help='Comma-separated page numbers.'
|
@click.option('-p', '--pages', default='1', help='Comma-separated page numbers.'
|
||||||
' Example: 1,3,4 or 1,4-end.')
|
' Example: 1,3,4 or 1,4-end.')
|
||||||
@click.option('-pw', '--password', help='Password for decryption.')
|
@click.option('-pw', '--password', help='Password for decryption.')
|
||||||
|
|
@ -96,7 +96,7 @@ def lattice(c, *args, **kwargs):
|
||||||
output = conf.pop('output')
|
output = conf.pop('output')
|
||||||
f = conf.pop('format')
|
f = conf.pop('format')
|
||||||
compress = conf.pop('zip')
|
compress = conf.pop('zip')
|
||||||
verbose = conf.pop('verbose')
|
quiet = conf.pop('quiet')
|
||||||
plot_type = kwargs.pop('plot_type')
|
plot_type = kwargs.pop('plot_type')
|
||||||
filepath = kwargs.pop('filepath')
|
filepath = kwargs.pop('filepath')
|
||||||
kwargs.update(conf)
|
kwargs.update(conf)
|
||||||
|
|
@ -117,7 +117,7 @@ def lattice(c, *args, **kwargs):
|
||||||
raise click.UsageError('Please specify output file format using --format')
|
raise click.UsageError('Please specify output file format using --format')
|
||||||
|
|
||||||
tables = read_pdf(filepath, pages=pages, flavor='lattice',
|
tables = read_pdf(filepath, pages=pages, flavor='lattice',
|
||||||
verbose=verbose, **kwargs)
|
suppress_stdout=quiet, **kwargs)
|
||||||
click.echo('Found {} tables'.format(tables.n))
|
click.echo('Found {} tables'.format(tables.n))
|
||||||
if plot_type is not None:
|
if plot_type is not None:
|
||||||
for table in tables:
|
for table in tables:
|
||||||
|
|
@ -149,7 +149,7 @@ def stream(c, *args, **kwargs):
|
||||||
output = conf.pop('output')
|
output = conf.pop('output')
|
||||||
f = conf.pop('format')
|
f = conf.pop('format')
|
||||||
compress = conf.pop('zip')
|
compress = conf.pop('zip')
|
||||||
verbose = conf.pop('verbose')
|
quiet = conf.pop('quiet')
|
||||||
plot_type = kwargs.pop('plot_type')
|
plot_type = kwargs.pop('plot_type')
|
||||||
filepath = kwargs.pop('filepath')
|
filepath = kwargs.pop('filepath')
|
||||||
kwargs.update(conf)
|
kwargs.update(conf)
|
||||||
|
|
@ -169,7 +169,7 @@ def stream(c, *args, **kwargs):
|
||||||
raise click.UsageError('Please specify output file format using --format')
|
raise click.UsageError('Please specify output file format using --format')
|
||||||
|
|
||||||
tables = read_pdf(filepath, pages=pages, flavor='stream',
|
tables = read_pdf(filepath, pages=pages, flavor='stream',
|
||||||
verbose=verbose, **kwargs)
|
suppress_stdout=quiet, **kwargs)
|
||||||
click.echo('Found {} tables'.format(tables.n))
|
click.echo('Found {} tables'.format(tables.n))
|
||||||
if plot_type is not None:
|
if plot_type is not None:
|
||||||
for table in tables:
|
for table in tables:
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,7 @@ class PDFHandler(object):
|
||||||
with open(fpath, 'wb') as f:
|
with open(fpath, 'wb') as f:
|
||||||
outfile.write(f)
|
outfile.write(f)
|
||||||
|
|
||||||
def parse(self, flavor='lattice', verbose=True, **kwargs):
|
def parse(self, flavor='lattice', suppress_stdout=False, **kwargs):
|
||||||
"""Extracts tables by calling parser.get_tables on all single
|
"""Extracts tables by calling parser.get_tables on all single
|
||||||
page PDFs.
|
page PDFs.
|
||||||
|
|
||||||
|
|
@ -134,8 +134,8 @@ class PDFHandler(object):
|
||||||
flavor : str (default: 'lattice')
|
flavor : str (default: 'lattice')
|
||||||
The parsing method to use ('lattice' or 'stream').
|
The parsing method to use ('lattice' or 'stream').
|
||||||
Lattice is used by default.
|
Lattice is used by default.
|
||||||
verbose : str (default: True)
|
suppress_stdout : str (default: False)
|
||||||
Print all logs and warnings.
|
Suppress logs and warnings.
|
||||||
kwargs : dict
|
kwargs : dict
|
||||||
See camelot.read_pdf kwargs.
|
See camelot.read_pdf kwargs.
|
||||||
|
|
||||||
|
|
@ -153,6 +153,6 @@ class PDFHandler(object):
|
||||||
for p in self.pages]
|
for p in self.pages]
|
||||||
parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
|
parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
|
||||||
for p in pages:
|
for p in pages:
|
||||||
t = parser.extract_tables(p, verbose=verbose)
|
t = parser.extract_tables(p, suppress_stdout=suppress_stdout)
|
||||||
tables.extend(t)
|
tables.extend(t)
|
||||||
return TableList(tables)
|
return TableList(tables)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from .utils import validate_input, remove_extra
|
||||||
|
|
||||||
|
|
||||||
def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
||||||
verbose=True, **kwargs):
|
suppress_stdout=False, **kwargs):
|
||||||
"""Read PDF and return extracted tables.
|
"""Read PDF and return extracted tables.
|
||||||
|
|
||||||
Note: kwargs annotated with ^ can only be used with flavor='stream'
|
Note: kwargs annotated with ^ can only be used with flavor='stream'
|
||||||
|
|
@ -24,7 +24,7 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
||||||
flavor : str (default: 'lattice')
|
flavor : str (default: 'lattice')
|
||||||
The parsing method to use ('lattice' or 'stream').
|
The parsing method to use ('lattice' or 'stream').
|
||||||
Lattice is used by default.
|
Lattice is used by default.
|
||||||
verbose : bool, optional (default: True)
|
suppress_stdout : bool, optional (default: True)
|
||||||
Print all logs and warnings.
|
Print all logs and warnings.
|
||||||
table_areas : list, optional (default: None)
|
table_areas : list, optional (default: None)
|
||||||
List of table area strings of the form x1,y1,x2,y2
|
List of table area strings of the form x1,y1,x2,y2
|
||||||
|
|
@ -92,11 +92,11 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
||||||
" Use either 'lattice' or 'stream'")
|
" Use either 'lattice' or 'stream'")
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
if not verbose:
|
if suppress_stdout:
|
||||||
warnings.simplefilter("ignore")
|
warnings.simplefilter("ignore")
|
||||||
|
|
||||||
validate_input(kwargs, flavor=flavor)
|
validate_input(kwargs, flavor=flavor)
|
||||||
p = PDFHandler(filepath, pages=pages, password=password)
|
p = PDFHandler(filepath, pages=pages, password=password)
|
||||||
kwargs = remove_extra(kwargs, flavor=flavor)
|
kwargs = remove_extra(kwargs, flavor=flavor)
|
||||||
tables = p.parse(flavor=flavor, verbose=verbose, **kwargs)
|
tables = p.parse(flavor=flavor, suppress_stdout=suppress_stdout, **kwargs)
|
||||||
return tables
|
return tables
|
||||||
|
|
|
||||||
|
|
@ -345,9 +345,9 @@ class Lattice(BaseParser):
|
||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
def extract_tables(self, filename, verbose=True):
|
def extract_tables(self, filename, suppress_stdout=False):
|
||||||
self._generate_layout(filename)
|
self._generate_layout(filename)
|
||||||
if verbose:
|
if not suppress_stdout:
|
||||||
logger.info('Processing {}'.format(os.path.basename(self.rootname)))
|
logger.info('Processing {}'.format(os.path.basename(self.rootname)))
|
||||||
|
|
||||||
if not self.horizontal_text:
|
if not self.horizontal_text:
|
||||||
|
|
|
||||||
|
|
@ -384,9 +384,9 @@ class Stream(BaseParser):
|
||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
def extract_tables(self, filename, verbose=True):
|
def extract_tables(self, filename, suppress_stdout=False):
|
||||||
self._generate_layout(filename)
|
self._generate_layout(filename)
|
||||||
if verbose:
|
if not suppress_stdout:
|
||||||
logger.info('Processing {}'.format(os.path.basename(self.rootname)))
|
logger.info('Processing {}'.format(os.path.basename(self.rootname)))
|
||||||
|
|
||||||
if not self.horizontal_text:
|
if not self.horizontal_text:
|
||||||
|
|
|
||||||
|
|
@ -50,13 +50,25 @@ def test_no_tables_found():
|
||||||
assert str(e.value) == 'No tables found on page-1'
|
assert str(e.value) == 'No tables found on page-1'
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_tables_found_logs_suppressed():
|
||||||
|
filename = os.path.join(testdir, 'foo.pdf')
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
# the test should fail if any warning is thrown
|
||||||
|
warnings.simplefilter('error')
|
||||||
|
try:
|
||||||
|
tables = camelot.read_pdf(filename, suppress_stdout=True)
|
||||||
|
except Warning as e:
|
||||||
|
warning_text = str(e)
|
||||||
|
pytest.fail('Unexpected warning: {}'.format(warning_text))
|
||||||
|
|
||||||
|
|
||||||
def test_no_tables_found_warnings_suppressed():
|
def test_no_tables_found_warnings_suppressed():
|
||||||
filename = os.path.join(testdir, 'blank.pdf')
|
filename = os.path.join(testdir, 'blank.pdf')
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
# the test should fail if any warning is thrown
|
# the test should fail if any warning is thrown
|
||||||
warnings.simplefilter('error')
|
warnings.simplefilter('error')
|
||||||
try:
|
try:
|
||||||
tables = camelot.read_pdf(filename, verbose=False)
|
tables = camelot.read_pdf(filename, suppress_stdout=True)
|
||||||
except Warning as e:
|
except Warning as e:
|
||||||
warning_text = str(e)
|
warning_text = str(e)
|
||||||
pytest.fail('Unexpected warning: {}'.format(warning_text))
|
pytest.fail('Unexpected warning: {}'.format(warning_text))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue