Add flavors

2018-09-23 10:53:32 +05:30 · 2018-09-23 10:53:32 +05:30 · 3170a9689f
parent 4a30c5a514
commit 3170a9689f
11 changed files with 207 additions and 305 deletions
--- a/camelot/init.py
+++ b/camelot/init.py
@ -1,4 +1,3 @@
 from .__version__ import __version__
-from .io import read_pdf
+from .io import read_pdf
 from .plotting import plot_geometry
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -5,18 +5,9 @@ import click
 from . import __version__
 from .io import read_pdf
 from .plotting import plot_geometry
 from .utils import validate_input, remove_extra
 class Mutex(click.Option):
    def handle_parse_result(self, ctx, opts, args):
        mesh = opts.get('mesh', False)
        geometry_type = opts.get('geometry_type', False)
        validate_input(opts, mesh=mesh, geometry_type=geometry_type)
        return super(Mutex, self).handle_parse_result(ctx, opts, args)
@click.command()
@click.version_option(version=__version__)
@click.option("-p", "--pages", default="1", help="Comma-separated page numbers"
@ -27,8 +18,6 @@ class Mutex(click.Option):
              help="Output file format.")
@click.option("-z", "--zip", is_flag=True, help="Whether or not to create a ZIP"
              " archive.")
@click.option("-m", "--mesh", is_flag=True, help="Whether or not to"
              " use Lattice method of parsing. Stream is used by default.")
@click.option("-T", "--table_area", default=[], multiple=True,
              help="Table areas (x1,y1,x2,y2) to process.\n"
              " x1, y1 -> left-top and x2, y2 -> right-bottom")
@ -39,12 +28,44 @@ class Mutex(click.Option):
              " super/subscripts)")
@click.option("-M", "--margins", nargs=3, default=(1.0, 0.5, 0.1),
              help="char_margin, line_margin, word_margin for PDFMiner.")
-@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
+@click.option("-G", "--geometry_type",
-              help="x-coordinates of column separators.")
+              type=click.Choice(["text", "table", "contour", "joint", "line"]),
-@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
+              help="Plot geometry found on pdf page for debugging.\n\n"
-              " formed by combining text vertically within this tolerance.")
+              "text: Plot text objects. (Useful to get table_area and"
-@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
+              " columns coordinates)\ntable: Plot parsed table.\n"
-              " be formed by combining text horizontally within this tolerance.")
+              "contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
              " intersections.\nline (with --mesh): Plot detected lines.")
@click.argument("filepath", type=click.Path(exists=True))
 def cli(*args, **kwargs):
    pages = kwargs.pop("pages")
    output = kwargs.pop("output")
    f = kwargs.pop("format")
    compress = kwargs.pop("zip")
    mesh = kwargs.pop("mesh")
    geometry_type = kwargs.pop("geometry_type")
    filepath = kwargs.pop("filepath")
    table_area = list(kwargs['table_area'])
    kwargs['table_area'] = None if not table_area else table_area
    columns = list(kwargs['columns'])
    kwargs['columns'] = None if not columns else columns
    copy_text = list(kwargs['copy_text'])
    kwargs['copy_text'] = None if not copy_text else copy_text
    kwargs['shift_text'] = list(kwargs['shift_text'])
    kwargs = remove_extra(kwargs, mesh=mesh)
    tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
    click.echo(tables)
    if output is None:
        raise click.UsageError("Please specify an output filepath using --output")
    if f is None:
        raise click.UsageError("Please specify an output format using --format")
    tables.export(output, f=f, compress=compress)
@click.option("-T", "--table_area", default=[], multiple=True,
              help="Table areas (x1,y1,x2,y2) to process.\n"
              " x1, y1 -> left-top and x2, y2 -> right-bottom")
@click.option("-back", "--process_background", is_flag=True, cls=Mutex,
              help="(with --mesh) Whether or not to process lines that are in"
              " background.")
@ -75,40 +96,18 @@ class Mutex(click.Option):
@click.option("-I", "--iterations", default=0, cls=Mutex,
              help="(with --mesh) Number of times for erosion/dilation is"
              " applied.")
-@click.option("-G", "--geometry_type",
+def lattice(*args, **kwargs):
-              type=click.Choice(["text", "table", "contour", "joint", "line"]),
+    pass
              help="Plot geometry found on pdf page for debugging.\n\n"
              "text: Plot text objects. (Useful to get table_area and"
              " columns coordinates)\ntable: Plot parsed table.\n"
              "contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
              " intersections.\nline (with --mesh): Plot detected lines.")
@click.argument("filepath", type=click.Path(exists=True))
 def cli(*args, **kwargs):
    pages = kwargs.pop("pages")
    output = kwargs.pop("output")
    f = kwargs.pop("format")
    compress = kwargs.pop("zip")
    mesh = kwargs.pop("mesh")
    geometry_type = kwargs.pop("geometry_type")
    filepath = kwargs.pop("filepath")
    table_area = list(kwargs['table_area'])
    kwargs['table_area'] = None if not table_area else table_area
    columns = list(kwargs['columns'])
    kwargs['columns'] = None if not columns else columns
    copy_text = list(kwargs['copy_text'])
    kwargs['copy_text'] = None if not copy_text else copy_text
    kwargs['shift_text'] = list(kwargs['shift_text'])
-    kwargs = remove_extra(kwargs, mesh=mesh)
+@click.option("-T", "--table_area", default=[], multiple=True,
-    if geometry_type is None:
+              help="Table areas (x1,y1,x2,y2) to process.\n"
-        tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
+              " x1, y1 -> left-top and x2, y2 -> right-bottom")
-        click.echo(tables)
+@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
-        if output is None:
+              help="x-coordinates of column separators.")
-            raise click.UsageError("Please specify an output filepath using --output")
+@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
-        if f is None:
+              " formed by combining text vertically within this tolerance.")
-            raise click.UsageError("Please specify an output format using --format")
+@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
-        tables.export(output, f=f, compress=compress)
+              " be formed by combining text horizontally within this tolerance.")
-    else:
+def stream(*args, **kwargs):
-        plot_geometry(filepath, pages=pages, mesh=mesh,
+    pass
                      geometry_type=geometry_type, **kwargs)
--- a/camelot/core.py
+++ b/camelot/core.py
@ -6,6 +6,8 @@ import tempfile
 import numpy as np
 import pandas as pd
 from .plotting import *
 class Cell(object):
    """Defines a cell in a table with coordinates relative to a
@ -318,6 +320,32 @@ class Table(object):
                    cell.hspan = True
        return self
    def plot(self, geometry_type):
        """Plot geometry found on PDF page based on geometry_type
        specified, useful for debugging and playing with different
        parameters to get the best output.
        Parameters
        ----------
        geometry_type : str
            The geometry type for which a plot should be generated.
            Can be 'text', 'table', 'contour', 'joint', 'line'
        """
        if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
            raise NotImplementedError("{} cannot be plotted with flavor='stream'")
        if geometry_type == 'text':
            plot_text(self._text)
        elif geometry_type == 'table':
            plot_table(self)
        elif geometry_type == 'contour':
            plot_contour(self._image)
        elif geometry_type == 'joint':
            plot_joint(self._image)
        elif geometry_type == 'line':
            plot_line(self._segments)
    def to_csv(self, path, **kwargs):
        """Writes Table to a comma-separated values (csv) file.
@ -488,36 +516,4 @@ class TableList(object):
            if compress:
                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
-                    z.write(filepath, os.path.basename(filepath))
+                    z.write(filepath, os.path.basename(filepath))
 class Geometry(object):
    def __init__(self):
        self.text = []
        self.images = ()
        self.segments = ()
        self.tables = []
    def __repr__(self):
        return '<{} text={} images={} segments={} tables={}>'.format(
            self.__class__.__name__,
            len(self.text),
            len(self.images),
            len(self.segments),
            len(self.tables))
 class GeometryList(object):
    def __init__(self, geometry):
        self.text = [g.text for g in geometry]
        self.images = [g.images for g in geometry]
        self.segments = [g.segments for g in geometry]
        self.tables = [g.tables for g in geometry]
    def __repr__(self):
        return '<{} text={} images={} segments={} tables={}>'.format(
            self.__class__.__name__,
            len(self.text),
            len(self.images),
            len(self.segments),
            len(self.tables))
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -2,7 +2,7 @@ import os
 from PyPDF2 import PdfFileReader, PdfFileWriter
-from .core import TableList, GeometryList
+from .core import TableList
 from .parsers import Stream, Lattice
 from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
                    get_rotation)
@ -17,7 +17,7 @@ class PDFHandler(object):
    ----------
    filename : str
        Path to pdf file.
-    pages : str
+    pages : str, optional (default: '1')
        Comma-separated page numbers to parse.
        Example: 1,3,4 or 1,4-end
@ -35,7 +35,7 @@ class PDFHandler(object):
        ----------
        filename : str
            Path to pdf file.
-        pages : str
+        pages : str, optional (default: '1')
            Comma-separated page numbers to parse.
            Example: 1,3,4 or 1,4-end
@ -112,15 +112,15 @@ class PDFHandler(object):
                with open(fpath, 'wb') as f:
                    outfile.write(f)
-    def parse(self, mesh=False, **kwargs):
+    def parse(self, flavor='lattice', **kwargs):
        """Extracts tables by calling parser.get_tables on all single
        page pdfs.
        Parameters
        ----------
-        mesh : bool (default: False)
+        flavor : str (default: 'lattice')
-            Whether or not to use Lattice method of parsing. Stream
+            The parsing method to use ('lattice' or 'stream').
-            is used by default.
+            Lattice is used by default.
        kwargs : dict
            See camelot.read_pdf kwargs.
@ -134,15 +134,13 @@ class PDFHandler(object):
        """
        tables = []
        geometry = []
        with TemporaryDirectory() as tempdir:
            for p in self.pages:
                self._save_page(self.filename, p, tempdir)
            pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
                     for p in self.pages]
-            parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
+            parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
            for p in pages:
-                t, g = parser.extract_tables(p)
+                t = parser.extract_tables(p)
                tables.extend(t)
-                geometry.append(g)
+        return TableList(tables)
        return TableList(tables), GeometryList(geometry)
--- a/camelot/io.py
+++ b/camelot/io.py
@ -2,22 +2,22 @@ from .handlers import PDFHandler
 from .utils import validate_input, remove_extra
-def read_pdf(filepath, pages='1', mesh=False, **kwargs):
+def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
    """Read PDF and return parsed data tables.
-    Note: kwargs annotated with ^ can only be used with mesh=False
+    Note: kwargs annotated with ^ can only be used with flavor='stream'
-    and kwargs annotated with * can only be used with mesh=True.
+    and kwargs annotated with * can only be used with flavor='lattice'.
    Parameters
    ----------
    filepath : str
        Path to pdf file.
-    pages : str
+    pages : str, optional (default: '1')
        Comma-separated page numbers to parse.
        Example: 1,3,4 or 1,4-end
-    mesh : bool (default: False)
+    flavor : str (default: 'lattice')
-        Whether or not to use Lattice method of parsing. Stream
+        The parsing method to use ('lattice' or 'stream').
-        is used by default.
+        Lattice is used by default.
    table_area : list, optional (default: None)
        List of table areas to process as strings of the form
        x1,y1,x2,y2 where (x1, y1) -> left-top and
@ -85,8 +85,8 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs):
    tables : camelot.core.TableList
    """
-    validate_input(kwargs, mesh=mesh)
+    validate_input(kwargs, flavor=flavor)
    p = PDFHandler(filepath, pages)
-    kwargs = remove_extra(kwargs, mesh=mesh)
+    kwargs = remove_extra(kwargs, flavor=flavor)
-    tables, __ = p.parse(mesh=mesh, **kwargs)
+    tables, __ = p.parse(flavor=flavor, **kwargs)
    return tables
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -194,7 +194,8 @@ class Lattice(BaseParser):
            stderr=subprocess.STDOUT)
    def _generate_table_bbox(self):
-        self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
+        self.image, self.threshold = adaptive_threshold(
            self.imagename, process_background=self.process_background,
            blocksize=self.threshold_blocksize, c=self.threshold_constant)
        image_width = self.image.shape[1]
        image_height = self.image.shape[0]
@ -297,11 +298,20 @@ class Lattice(BaseParser):
        table.shape = table.df.shape
        whitespace = compute_whitespace(data)
        table.flavor = 'lattice'
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
        # for plotting
        _text = []
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
        table._text = _text
        table._image = (self.image, self.table_bbox_unscaled)
        table._segments = (self.vertical_segments, self.horizontal_segments)
        return table
    def extract_tables(self, filename):
@ -311,7 +321,7 @@ class Lattice(BaseParser):
        if not self.horizontal_text:
            logger.info("No tables found on {}".format(
                os.path.basename(self.rootname)))
-            return [], self.g
+            return []
        self._generate_image()
        self._generate_table_bbox()
@ -324,13 +334,4 @@ class Lattice(BaseParser):
            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
            _tables.append(table)
-        if self.debug:
+        return _tables
            text = []
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
            self.g.text = text
            self.g.images = (self.image, self.table_bbox_unscaled)
            self.g.segments = (self.vertical_segments, self.horizontal_segments)
            self.g.tables = _tables
        return _tables, self.g
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -333,11 +333,20 @@ class Stream(BaseParser):
        table.shape = table.df.shape
        whitespace = compute_whitespace(data)
        table.flavor = 'stream'
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
        # for plotting
        _text = []
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
        table._text = _text
        table._image = None
        table._segments = None
        return table
    def extract_tables(self, filename):
@ -347,7 +356,7 @@ class Stream(BaseParser):
        if not self.horizontal_text:
            logger.info("No tables found on {}".format(
                os.path.basename(self.rootname)))
-            return [], self.g
+            return []
        self._generate_table_bbox()
@ -359,11 +368,4 @@ class Stream(BaseParser):
            table = self._generate_table(table_idx, cols, rows)
            _tables.append(table)
-        if self.debug:
+        return _tables
            text = []
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
            self.g.text = text
            self.g.tables = _tables
        return _tables, self.g
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -2,165 +2,72 @@ import cv2
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
-from .handlers import PDFHandler
+
-from .utils import validate_input, remove_extra
+def plot_text(text):
    fig = plt.figure()
    ax = fig.add_subplot(111, aspect='equal')
    xs, ys = [], []
    for t in text:
        xs.extend([t[0], t[1]])
        ys.extend([t[2], t[3]])
        ax.add_patch(
            patches.Rectangle(
                (t[0], t[1]),
                t[2] - t[0],
                t[3] - t[1]
            )
        )
    ax.set_xlim(min(xs) - 10, max(xs) + 10)
    ax.set_ylim(min(ys) - 10, max(ys) + 10)
    plt.show()
-def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs):
+def plot_table(table):
-    """Plot geometry found on pdf page based on type specified,
+    for row in table.cells:
-    useful for debugging and playing with different parameters to get
+        for cell in row:
-    the best output.
+            if cell.left:
                plt.plot([cell.lb[0], cell.lt[0]],
                            [cell.lb[1], cell.lt[1]])
            if cell.right:
                plt.plot([cell.rb[0], cell.rt[0]],
                            [cell.rb[1], cell.rt[1]])
            if cell.top:
                plt.plot([cell.lt[0], cell.rt[0]],
                            [cell.lt[1], cell.rt[1]])
            if cell.bottom:
                plt.plot([cell.lb[0], cell.rb[0]],
                            [cell.lb[1], cell.rb[1]])
    plt.show()
    Note: kwargs annotated with ^ can only be used with mesh=False
    and kwargs annotated with * can only be used with mesh=True.
-    Parameters
+def plot_contour(image):
-    ----------
+    img, table_bbox = image
-    filepath : str
+    for t in table_bbox.keys():
-        Path to pdf file.
+        cv2.rectangle(img, (t[0], t[1]),
-    pages : str
+                      (t[2], t[3]), (255, 0, 0), 20)
-        Comma-separated page numbers to parse.
+    plt.imshow(img)
-        Example: 1,3,4 or 1,4-end
+    plt.show()
    mesh : bool (default: False)
        Whether or not to use Lattice method of parsing. Stream
        is used by default.
    geometry_type : str, optional (default: None)
        * 'text' : Plot text objects found on page. (Useful to get \
                   table_area and columns coordinates)
        * 'table' : Plot parsed table.
        * 'contour'* : Plot detected rectangles.
        * 'joint'* : Plot detected line intersections.
        * 'line'* : Plot detected lines.
    table_area : list, optional (default: None)
        List of table areas to process as strings of the form
        x1,y1,x2,y2 where (x1, y1) -> left-top and
        (x2, y2) -> right-bottom in pdf coordinate space.
    columns^ : list, optional (default: None)
        List of column x-coordinates as strings where the coordinates
        are comma-separated.
    split_text : bool, optional (default: False)
        Whether or not to split a text line if it spans across
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
        if its size is different from rest of the string. (Useful for
        super and subscripts.)
    row_close_tol^ : int, optional (default: 2)
        Rows will be formed by combining text vertically
        within this tolerance.
    col_close_tol^ : int, optional (default: 0)
        Columns will be formed by combining text horizontally
        within this tolerance.
    process_background* : bool, optional (default: False)
        Whether or not to process lines that are in background.
    line_size_scaling* : int, optional (default: 15)
        Factor by which the page dimensions will be divided to get
        smallest length of lines that should be detected.
        The larger this value, smaller the detected lines. Making it
        too large will lead to text being detected as lines.
    copy_text* : list, optional (default: None)
        {'h', 'v'}
        Select one or more strings from above and pass them as a list
        to specify the direction in which text should be copied over
        when a cell spans multiple rows or columns.
    shift_text* : list, optional (default: ['l', 't'])
        {'l', 'r', 't', 'b'}
        Select one or more strings from above and pass them as a list
        to specify where the text in a spanning cell should flow.
    line_close_tol* : int, optional (default: 2)
        Tolerance parameter used to merge vertical and horizontal
        detected lines which lie close to each other.
    joint_close_tol* : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
    threshold_blocksize* : int, optional (default: 15)
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.
-        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+def plot_joint(image):
-    threshold_constant* : int, optional (default: -2)
+    img, table_bbox = image
-        Constant subtracted from the mean or weighted mean.
+    x_coord = []
-        Normally, it is positive but may be zero or negative as well.
+    y_coord = []
    for k in table_bbox.keys():
        for coord in table_bbox[k]:
            x_coord.append(coord[0])
            y_coord.append(coord[1])
    max_x, max_y = max(x_coord), max(y_coord)
    plt.plot(x_coord, y_coord, 'ro')
    plt.axis([0, max_x + 100, max_y + 100, 0])
    plt.imshow(img)
    plt.show()
        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
    iterations* : int, optional (default: 0)
        Number of times for erosion/dilation is applied.
-        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+def plot_line(segments):
-    margins : tuple
+    vertical, horizontal = segments
-        PDFMiner margins. (char_margin, line_margin, word_margin)
+    for v in vertical:
-
+        plt.plot([v[0], v[2]], [v[1], v[3]])
-        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+    for h in horizontal:
-
+        plt.plot([h[0], h[2]], [h[1], h[3]])
-    """
+    plt.show()
    validate_input(kwargs, mesh=mesh, geometry_type=geometry_type)
    p = PDFHandler(filepath, pages)
    kwargs = remove_extra(kwargs, mesh=mesh)
    debug = True if geometry_type is not None else False
    kwargs.update({'debug': debug})
    __, geometry = p.parse(mesh=mesh, **kwargs)
    if geometry_type == 'text':
        for text in geometry.text:
            fig = plt.figure()
            ax = fig.add_subplot(111, aspect='equal')
            xs, ys = [], []
            for t in text:
                xs.extend([t[0], t[1]])
                ys.extend([t[2], t[3]])
                ax.add_patch(
                    patches.Rectangle(
                        (t[0], t[1]),
                        t[2] - t[0],
                        t[3] - t[1]
                    )
                )
            ax.set_xlim(min(xs) - 10, max(xs) + 10)
            ax.set_ylim(min(ys) - 10, max(ys) + 10)
            plt.show()
    elif geometry_type == 'table':
        for tables in geometry.tables:
            for table in tables:
                for row in table.cells:
                    for cell in row:
                        if cell.left:
                            plt.plot([cell.lb[0], cell.lt[0]],
                                     [cell.lb[1], cell.lt[1]])
                        if cell.right:
                            plt.plot([cell.rb[0], cell.rt[0]],
                                     [cell.rb[1], cell.rt[1]])
                        if cell.top:
                            plt.plot([cell.lt[0], cell.rt[0]],
                                     [cell.lt[1], cell.rt[1]])
                        if cell.bottom:
                            plt.plot([cell.lb[0], cell.rb[0]],
                                     [cell.lb[1], cell.rb[1]])
            plt.show()
    elif geometry_type == 'contour':
        for img, table_bbox in geometry.images:
            for t in table_bbox.keys():
                cv2.rectangle(img, (t[0], t[1]),
                              (t[2], t[3]), (255, 0, 0), 20)
            plt.imshow(img)
            plt.show()
    elif geometry_type == 'joint':
        for img, table_bbox in geometry.images:
            x_coord = []
            y_coord = []
            for k in table_bbox.keys():
                for coord in table_bbox[k]:
                    x_coord.append(coord[0])
                    y_coord.append(coord[1])
            max_x, max_y = max(x_coord), max(y_coord)
            plt.plot(x_coord, y_coord, 'ro')
            plt.axis([0, max_x + 100, max_y + 100, 0])
            plt.imshow(img)
            plt.show()
    elif geometry_type == 'line':
        for v_s, h_s in geometry.segments:
            for v in v_s:
                plt.plot([v[0], v[2]], [v[1], v[3]])
            for h in h_s:
                plt.plot([h[0], h[2]], [h[1], h[3]])
            plt.show()
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -38,25 +38,25 @@ lattice_kwargs = [
 ]
-def validate_input(kwargs, mesh=False, geometry_type=False):
+def validate_input(kwargs, flavor='lattice', geometry_type=False):
-    def check_intersection(parser_kwargs, input_kwargs, message_bool):
+    def check_intersection(parser_kwargs, input_kwargs):
        isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
        if isec:
-            raise ValueError("{} can not be used with mesh set to {}".format(
+            raise ValueError("{} cannot be used with flavor='{}'".format(
-                             ",".join(sorted(isec)), message_bool))
+                             ",".join(sorted(isec)), flavor))
-    if mesh:
+    if flavor == 'lattice':
-        check_intersection(stream_kwargs, kwargs, True)
+        check_intersection(stream_kwargs, kwargs)
    else:
-        check_intersection(lattice_kwargs, kwargs, False)
+        check_intersection(lattice_kwargs, kwargs)
    if geometry_type:
-        if not mesh and geometry_type in ['contour', 'joint', 'line']:
+        if flavor != 'lattice' and geometry_type in ['contour', 'joint', 'line']:
-            raise ValueError("Use geometry_type={} with mesh set to True".format(
+            raise ValueError("Use geometry_type='{}' with flavor='lattice'".format(
                             geometry_type))
-def remove_extra(kwargs, mesh=False):
+def remove_extra(kwargs, flavor='lattice'):
-    if mesh:
+    if flavor == 'lattice':
        for key in kwargs.keys():
            if key in stream_kwargs:
                kwargs.pop(key)
--- a/setup.py
+++ b/setup.py
@ -9,7 +9,7 @@ with open(os.path.join(here, 'camelot', '__version__.py'), 'r') as f:
    exec(f.read(), about)
 # TODO: Move these to __version__.py
-NAME = 'camelot'
+NAME = 'camelot-py'
 VERSION = about['__version__']
 DESCRIPTION = 'PDF Table Parsing for Humans'
 with open('README.md') as f:
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -18,11 +18,11 @@ def test_stream_table_rotated():
    df = pd.DataFrame(data_stream_table_rotated)
    filename = os.path.join(testdir, "clockwise_table_2.pdf")
-    tables = camelot.read_pdf(filename)
+    tables = camelot.read_pdf(filename, flavor="stream")
    assert df.equals(tables[0].df)
    filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
-    tables = camelot.read_pdf(filename)
+    tables = camelot.read_pdf(filename, flavor="stream")
    assert df.equals(tables[0].df)
@ -30,7 +30,7 @@ def test_stream_table_area():
    df = pd.DataFrame(data_stream_table_area_single)
    filename = os.path.join(testdir, "tabula/us-007.pdf")
-    tables = camelot.read_pdf(filename, table_area=["320,500,573,335"])
+    tables = camelot.read_pdf(filename, flavor="stream", table_area=["320,500,573,335"])
    assert df.equals(tables[0].df)
@ -39,7 +39,7 @@ def test_stream_columns():
    filename = os.path.join(testdir, "mexican_towns.pdf")
    tables = camelot.read_pdf(
-        filename, columns=["67,180,230,425,475"], row_close_tol=10)
+        filename, flavor="stream", columns=["67,180,230,425,475"], row_close_tol=10)
    assert df.equals(tables[0].df)
@ -48,7 +48,7 @@ def test_lattice():
    filename = os.path.join(testdir,
        "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
-    tables = camelot.read_pdf(filename, pages="2", mesh=True)
+    tables = camelot.read_pdf(filename, pages="2")
    assert df.equals(tables[0].df)
@ -56,11 +56,11 @@ def test_lattice_table_rotated():
    df = pd.DataFrame(data_lattice_table_rotated)
    filename = os.path.join(testdir, "clockwise_table_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True)
+    tables = camelot.read_pdf(filename)
    assert df.equals(tables[0].df)
    filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True)
+    tables = camelot.read_pdf(filename)
    assert df.equals(tables[0].df)
@ -68,7 +68,7 @@ def test_lattice_process_background():
    df = pd.DataFrame(data_lattice_process_background)
    filename = os.path.join(testdir, "background_lines_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True, process_background=True)
+    tables = camelot.read_pdf(filename, process_background=True)
    assert df.equals(tables[1].df)
@ -76,5 +76,5 @@ def test_lattice_copy_text():
    df = pd.DataFrame(data_lattice_copy_text)
    filename = os.path.join(testdir, "row_span_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True, line_size_scaling=60, copy_text="v")
+    tables = camelot.read_pdf(filename, line_size_scaling=60, copy_text="v")
    assert df.equals(tables[0].df)