From 93b4dabcc2cc670517b0e071d2f6562f13a88ed6 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Mon, 24 Sep 2018 01:00:30 +0530 Subject: [PATCH] Update CLI --- camelot/cli.py | 162 ++++++++++++++++++++++++------------------------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/camelot/cli.py b/camelot/cli.py index 4106ad8..02e37d3 100644 --- a/camelot/cli.py +++ b/camelot/cli.py @@ -20,21 +20,21 @@ pass_config = click.make_pass_decorator(Config) @click.group() @click.version_option(version=__version__) -@click.option("-p", "--pages", default="1", help="Comma-separated page numbers" - " to parse. Example: 1,3,4 or 1,4-end") -@click.option("-o", "--output", help="Output filepath.") -@click.option("-f", "--format", - type=click.Choice(["csv", "json", "excel", "html"]), - help="Output file format.") -@click.option("-z", "--zip", is_flag=True, help="Whether or not to create a ZIP" - " archive.") -@click.option("-split", "--split_text", is_flag=True, help="Whether or not to" - " split text if it spans across multiple cells.") -@click.option("-flag", "--flag_size", is_flag=True, help="(inactive) Whether or" - " not to flag text which has uncommon size. (Useful to detect" - " super/subscripts)") -@click.option("-M", "--margins", nargs=3, default=(1.0, 0.5, 0.1), - help="char_margin, line_margin, word_margin for PDFMiner.") +@click.option('-p', '--pages', default='1', help='Comma-separated page numbers' + ' to parse. Example: 1,3,4 or 1,4-end') +@click.option('-o', '--output', help='Output filepath.') +@click.option('-f', '--format', + type=click.Choice(['csv', 'json', 'excel', 'html']), + help='Output file format.') +@click.option('-z', '--zip', is_flag=True, help='Whether or not to create a ZIP' + ' archive.') +@click.option('-split', '--split_text', is_flag=True, help='Whether or not to' + ' split text if it spans across multiple cells.') +@click.option('-flag', '--flag_size', is_flag=True, help='(inactive) Whether or' + ' not to flag text which has uncommon size. (Useful to detect' + ' super/subscripts)') +@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1), + help='char_margin, line_margin, word_margin for PDFMiner.') @click.pass_context def cli(ctx, *args, **kwargs): ctx.obj = Config() @@ -43,53 +43,53 @@ def cli(ctx, *args, **kwargs): @cli.command('lattice') -@click.option("-T", "--table_area", default=[], multiple=True, - help="Table areas (x1,y1,x2,y2) to process.\n" - " x1, y1 -> left-top and x2, y2 -> right-bottom") -@click.option("-back", "--process_background", is_flag=True, - help="(with --mesh) Whether or not to process lines that are in" - " background.") -@click.option("-scale", "--line_size_scaling", default=15, - help="(with --mesh) Factor by which the page dimensions will be" - " divided to get smallest length of detected lines.") -@click.option("-copy", "--copy_text", default=[], type=click.Choice(["h", "v"]), - multiple=True, help="(with --mesh) Specify direction" - " in which text will be copied over in a spanning cell.") -@click.option("-shift", "--shift_text", default=["l", "t"], - type=click.Choice(["", "l", "r", "t", "b"]), multiple=True, - help="(with --mesh) Specify direction in which text in a spanning" - " cell should flow.") -@click.option("-l", "--line_close_tol", default=2, - help="(with --mesh) Tolerance parameter used to merge close vertical" - " lines and close horizontal lines.") -@click.option("-j", "--joint_close_tol", default=2, - help="(with --mesh) Tolerance parameter used to decide whether" - " the detected lines and points lie close to each other.") -@click.option("-block", "--threshold_blocksize", default=15, - help="(with --mesh) For adaptive thresholding, size of a pixel" - " neighborhood that is used to calculate a threshold value for" - " the pixel: 3, 5, 7, and so on.") -@click.option("-const", "--threshold_constant", default=-2, - help="(with --mesh) For adaptive thresholding, constant subtracted" - " from the mean or weighted mean.\nNormally, it is positive but" - " may be zero or negative as well.") -@click.option("-I", "--iterations", default=0, - help="(with --mesh) Number of times for erosion/dilation is" - " applied.") -@click.option("-plot", "--plot_type", - type=click.Choice(["text", "table", "contour", "joint", "line"]), - help="Plot geometry found on PDF page for debugging.") -@click.argument("filepath", type=click.Path(exists=True)) +@click.option('-T', '--table_area', default=[], multiple=True, + help='Table areas (x1,y1,x2,y2) to process.\n' + ' x1, y1 -> left-top and x2, y2 -> right-bottom') +@click.option('-back', '--process_background', is_flag=True, + help='Whether or not to process lines that are in' + ' background.') +@click.option('-scale', '--line_size_scaling', default=15, + help='Factor by which the page dimensions will be' + ' divided to get smallest length of detected lines.') +@click.option('-copy', '--copy_text', default=[], type=click.Choice(['h', 'v']), + multiple=True, help='Specify direction' + ' in which text will be copied over in a spanning cell.') +@click.option('-shift', '--shift_text', default=['l', 't'], + type=click.Choice(['', 'l', 'r', 't', 'b']), multiple=True, + help='Specify direction in which text in a spanning' + ' cell should flow.') +@click.option('-l', '--line_close_tol', default=2, + help='Tolerance parameter used to merge close vertical' + ' lines and close horizontal lines.') +@click.option('-j', '--joint_close_tol', default=2, + help='Tolerance parameter used to decide whether' + ' the detected lines and points lie close to each other.') +@click.option('-block', '--threshold_blocksize', default=15, + help='For adaptive thresholding, size of a pixel' + ' neighborhood that is used to calculate a threshold value for' + ' the pixel: 3, 5, 7, and so on.') +@click.option('-const', '--threshold_constant', default=-2, + help='For adaptive thresholding, constant subtracted' + ' from the mean or weighted mean.\nNormally, it is positive but' + ' may be zero or negative as well.') +@click.option('-I', '--iterations', default=0, + help='Number of times for erosion/dilation is' + ' applied.') +@click.option('-plot', '--plot_type', + type=click.Choice(['text', 'table', 'contour', 'joint', 'line']), + help='Plot geometry found on PDF page for debugging.') +@click.argument('filepath', type=click.Path(exists=True)) @pass_config def lattice(c, *args, **kwargs): """Use lines between text to parse table.""" conf = c.config - pages = conf.pop("pages") - output = conf.pop("output") - f = conf.pop("format") - compress = conf.pop("zip") + pages = conf.pop('pages') + output = conf.pop('output') + f = conf.pop('format') + compress = conf.pop('zip') plot_type = kwargs.pop('plot_type') - filepath = kwargs.pop("filepath") + filepath = kwargs.pop('filepath') kwargs.update(conf) table_area = list(kwargs['table_area']) @@ -99,42 +99,42 @@ def lattice(c, *args, **kwargs): kwargs['shift_text'] = list(kwargs['shift_text']) tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs) - click.echo(tables) + click.echo('Found {} tables'.format(tables.n)) if plot_type is not None: for table in tables: table.plot(plot_type) else: if output is None: - raise click.UsageError("Please specify output filepath using --output") + raise click.UsageError('Please specify output filepath using --output') if f is None: - raise click.UsageError("Please specify output format using --format") + raise click.UsageError('Please specify output format using --format') tables.export(output, f=f, compress=compress) @cli.command('stream') -@click.option("-T", "--table_area", default=[], multiple=True, - help="Table areas (x1,y1,x2,y2) to process.\n" - " x1, y1 -> left-top and x2, y2 -> right-bottom") -@click.option("-C", "--columns", default=[], multiple=True, - help="x-coordinates of column separators.") -@click.option("-r", "--row_close_tol", default=2, help="Rows will be" - " formed by combining text vertically within this tolerance.") -@click.option("-c", "--col_close_tol", default=0, help="Columns will" - " be formed by combining text horizontally within this tolerance.") -@click.option("-plot", "--plot_type", - type=click.Choice(["text", "table"]), - help="Plot geometry found on PDF page for debugging.") -@click.argument("filepath", type=click.Path(exists=True)) +@click.option('-T', '--table_area', default=[], multiple=True, + help='Table areas (x1,y1,x2,y2) to process.\n' + ' x1, y1 -> left-top and x2, y2 -> right-bottom') +@click.option('-C', '--columns', default=[], multiple=True, + help='x-coordinates of column separators.') +@click.option('-r', '--row_close_tol', default=2, help='Rows will be' + ' formed by combining text vertically within this tolerance.') +@click.option('-c', '--col_close_tol', default=0, help='Columns will' + ' be formed by combining text horizontally within this tolerance.') +@click.option('-plot', '--plot_type', + type=click.Choice(['text', 'table']), + help='Plot geometry found on PDF page for debugging.') +@click.argument('filepath', type=click.Path(exists=True)) @pass_config def stream(c, *args, **kwargs): """Use spaces between text to parse table.""" conf = c.config - pages = conf.pop("pages") - output = conf.pop("output") - f = conf.pop("format") - compress = conf.pop("zip") + pages = conf.pop('pages') + output = conf.pop('output') + f = conf.pop('format') + compress = conf.pop('zip') plot_type = kwargs.pop('plot_type') - filepath = kwargs.pop("filepath") + filepath = kwargs.pop('filepath') kwargs.update(conf) table_area = list(kwargs['table_area']) @@ -143,13 +143,13 @@ def stream(c, *args, **kwargs): kwargs['columns'] = None if not columns else columns tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs) - click.echo(tables) + click.echo('Found {} tables'.format(tables.n)) if plot_type is not None: for table in tables: table.plot(plot_type) else: if output is None: - raise click.UsageError("Please specify output filepath using --output") + raise click.UsageError('Please specify output filepath using --output') if f is None: - raise click.UsageError("Please specify output format using --format") + raise click.UsageError('Please specify output format using --format') tables.export(output, f=f, compress=compress) \ No newline at end of file