Update CLI

pull/2/head
Vinayak Mehta 2018-09-24 01:00:30 +05:30
parent 75f667ccd5
commit 93b4dabcc2
1 changed files with 81 additions and 81 deletions

View File

@ -20,21 +20,21 @@ pass_config = click.make_pass_decorator(Config)
@click.group() @click.group()
@click.version_option(version=__version__) @click.version_option(version=__version__)
@click.option("-p", "--pages", default="1", help="Comma-separated page numbers" @click.option('-p', '--pages', default='1', help='Comma-separated page numbers'
" to parse. Example: 1,3,4 or 1,4-end") ' to parse. Example: 1,3,4 or 1,4-end')
@click.option("-o", "--output", help="Output filepath.") @click.option('-o', '--output', help='Output filepath.')
@click.option("-f", "--format", @click.option('-f', '--format',
type=click.Choice(["csv", "json", "excel", "html"]), type=click.Choice(['csv', 'json', 'excel', 'html']),
help="Output file format.") help='Output file format.')
@click.option("-z", "--zip", is_flag=True, help="Whether or not to create a ZIP" @click.option('-z', '--zip', is_flag=True, help='Whether or not to create a ZIP'
" archive.") ' archive.')
@click.option("-split", "--split_text", is_flag=True, help="Whether or not to" @click.option('-split', '--split_text', is_flag=True, help='Whether or not to'
" split text if it spans across multiple cells.") ' split text if it spans across multiple cells.')
@click.option("-flag", "--flag_size", is_flag=True, help="(inactive) Whether or" @click.option('-flag', '--flag_size', is_flag=True, help='(inactive) Whether or'
" not to flag text which has uncommon size. (Useful to detect" ' not to flag text which has uncommon size. (Useful to detect'
" super/subscripts)") ' super/subscripts)')
@click.option("-M", "--margins", nargs=3, default=(1.0, 0.5, 0.1), @click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
help="char_margin, line_margin, word_margin for PDFMiner.") help='char_margin, line_margin, word_margin for PDFMiner.')
@click.pass_context @click.pass_context
def cli(ctx, *args, **kwargs): def cli(ctx, *args, **kwargs):
ctx.obj = Config() ctx.obj = Config()
@ -43,53 +43,53 @@ def cli(ctx, *args, **kwargs):
@cli.command('lattice') @cli.command('lattice')
@click.option("-T", "--table_area", default=[], multiple=True, @click.option('-T', '--table_area', default=[], multiple=True,
help="Table areas (x1,y1,x2,y2) to process.\n" help='Table areas (x1,y1,x2,y2) to process.\n'
" x1, y1 -> left-top and x2, y2 -> right-bottom") ' x1, y1 -> left-top and x2, y2 -> right-bottom')
@click.option("-back", "--process_background", is_flag=True, @click.option('-back', '--process_background', is_flag=True,
help="(with --mesh) Whether or not to process lines that are in" help='Whether or not to process lines that are in'
" background.") ' background.')
@click.option("-scale", "--line_size_scaling", default=15, @click.option('-scale', '--line_size_scaling', default=15,
help="(with --mesh) Factor by which the page dimensions will be" help='Factor by which the page dimensions will be'
" divided to get smallest length of detected lines.") ' divided to get smallest length of detected lines.')
@click.option("-copy", "--copy_text", default=[], type=click.Choice(["h", "v"]), @click.option('-copy', '--copy_text', default=[], type=click.Choice(['h', 'v']),
multiple=True, help="(with --mesh) Specify direction" multiple=True, help='Specify direction'
" in which text will be copied over in a spanning cell.") ' in which text will be copied over in a spanning cell.')
@click.option("-shift", "--shift_text", default=["l", "t"], @click.option('-shift', '--shift_text', default=['l', 't'],
type=click.Choice(["", "l", "r", "t", "b"]), multiple=True, type=click.Choice(['', 'l', 'r', 't', 'b']), multiple=True,
help="(with --mesh) Specify direction in which text in a spanning" help='Specify direction in which text in a spanning'
" cell should flow.") ' cell should flow.')
@click.option("-l", "--line_close_tol", default=2, @click.option('-l', '--line_close_tol', default=2,
help="(with --mesh) Tolerance parameter used to merge close vertical" help='Tolerance parameter used to merge close vertical'
" lines and close horizontal lines.") ' lines and close horizontal lines.')
@click.option("-j", "--joint_close_tol", default=2, @click.option('-j', '--joint_close_tol', default=2,
help="(with --mesh) Tolerance parameter used to decide whether" help='Tolerance parameter used to decide whether'
" the detected lines and points lie close to each other.") ' the detected lines and points lie close to each other.')
@click.option("-block", "--threshold_blocksize", default=15, @click.option('-block', '--threshold_blocksize', default=15,
help="(with --mesh) For adaptive thresholding, size of a pixel" help='For adaptive thresholding, size of a pixel'
" neighborhood that is used to calculate a threshold value for" ' neighborhood that is used to calculate a threshold value for'
" the pixel: 3, 5, 7, and so on.") ' the pixel: 3, 5, 7, and so on.')
@click.option("-const", "--threshold_constant", default=-2, @click.option('-const', '--threshold_constant', default=-2,
help="(with --mesh) For adaptive thresholding, constant subtracted" help='For adaptive thresholding, constant subtracted'
" from the mean or weighted mean.\nNormally, it is positive but" ' from the mean or weighted mean.\nNormally, it is positive but'
" may be zero or negative as well.") ' may be zero or negative as well.')
@click.option("-I", "--iterations", default=0, @click.option('-I', '--iterations', default=0,
help="(with --mesh) Number of times for erosion/dilation is" help='Number of times for erosion/dilation is'
" applied.") ' applied.')
@click.option("-plot", "--plot_type", @click.option('-plot', '--plot_type',
type=click.Choice(["text", "table", "contour", "joint", "line"]), type=click.Choice(['text', 'table', 'contour', 'joint', 'line']),
help="Plot geometry found on PDF page for debugging.") help='Plot geometry found on PDF page for debugging.')
@click.argument("filepath", type=click.Path(exists=True)) @click.argument('filepath', type=click.Path(exists=True))
@pass_config @pass_config
def lattice(c, *args, **kwargs): def lattice(c, *args, **kwargs):
"""Use lines between text to parse table.""" """Use lines between text to parse table."""
conf = c.config conf = c.config
pages = conf.pop("pages") pages = conf.pop('pages')
output = conf.pop("output") output = conf.pop('output')
f = conf.pop("format") f = conf.pop('format')
compress = conf.pop("zip") compress = conf.pop('zip')
plot_type = kwargs.pop('plot_type') plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop("filepath") filepath = kwargs.pop('filepath')
kwargs.update(conf) kwargs.update(conf)
table_area = list(kwargs['table_area']) table_area = list(kwargs['table_area'])
@ -99,42 +99,42 @@ def lattice(c, *args, **kwargs):
kwargs['shift_text'] = list(kwargs['shift_text']) kwargs['shift_text'] = list(kwargs['shift_text'])
tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs) tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs)
click.echo(tables) click.echo('Found {} tables'.format(tables.n))
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
table.plot(plot_type) table.plot(plot_type)
else: else:
if output is None: if output is None:
raise click.UsageError("Please specify output filepath using --output") raise click.UsageError('Please specify output filepath using --output')
if f is None: if f is None:
raise click.UsageError("Please specify output format using --format") raise click.UsageError('Please specify output format using --format')
tables.export(output, f=f, compress=compress) tables.export(output, f=f, compress=compress)
@cli.command('stream') @cli.command('stream')
@click.option("-T", "--table_area", default=[], multiple=True, @click.option('-T', '--table_area', default=[], multiple=True,
help="Table areas (x1,y1,x2,y2) to process.\n" help='Table areas (x1,y1,x2,y2) to process.\n'
" x1, y1 -> left-top and x2, y2 -> right-bottom") ' x1, y1 -> left-top and x2, y2 -> right-bottom')
@click.option("-C", "--columns", default=[], multiple=True, @click.option('-C', '--columns', default=[], multiple=True,
help="x-coordinates of column separators.") help='x-coordinates of column separators.')
@click.option("-r", "--row_close_tol", default=2, help="Rows will be" @click.option('-r', '--row_close_tol', default=2, help='Rows will be'
" formed by combining text vertically within this tolerance.") ' formed by combining text vertically within this tolerance.')
@click.option("-c", "--col_close_tol", default=0, help="Columns will" @click.option('-c', '--col_close_tol', default=0, help='Columns will'
" be formed by combining text horizontally within this tolerance.") ' be formed by combining text horizontally within this tolerance.')
@click.option("-plot", "--plot_type", @click.option('-plot', '--plot_type',
type=click.Choice(["text", "table"]), type=click.Choice(['text', 'table']),
help="Plot geometry found on PDF page for debugging.") help='Plot geometry found on PDF page for debugging.')
@click.argument("filepath", type=click.Path(exists=True)) @click.argument('filepath', type=click.Path(exists=True))
@pass_config @pass_config
def stream(c, *args, **kwargs): def stream(c, *args, **kwargs):
"""Use spaces between text to parse table.""" """Use spaces between text to parse table."""
conf = c.config conf = c.config
pages = conf.pop("pages") pages = conf.pop('pages')
output = conf.pop("output") output = conf.pop('output')
f = conf.pop("format") f = conf.pop('format')
compress = conf.pop("zip") compress = conf.pop('zip')
plot_type = kwargs.pop('plot_type') plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop("filepath") filepath = kwargs.pop('filepath')
kwargs.update(conf) kwargs.update(conf)
table_area = list(kwargs['table_area']) table_area = list(kwargs['table_area'])
@ -143,13 +143,13 @@ def stream(c, *args, **kwargs):
kwargs['columns'] = None if not columns else columns kwargs['columns'] = None if not columns else columns
tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs) tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs)
click.echo(tables) click.echo('Found {} tables'.format(tables.n))
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
table.plot(plot_type) table.plot(plot_type)
else: else:
if output is None: if output is None:
raise click.UsageError("Please specify output filepath using --output") raise click.UsageError('Please specify output filepath using --output')
if f is None: if f is None:
raise click.UsageError("Please specify output format using --format") raise click.UsageError('Please specify output format using --format')
tables.export(output, f=f, compress=compress) tables.export(output, f=f, compress=compress)