From 3170a9689f851f500be9fc3202eaad8e469449ab Mon Sep 17 00:00:00 2001
From: Vinayak Mehta <vmehta94@gmail.com>
Date: Sun, 23 Sep 2018 10:53:32 +0530
Subject: [PATCH] Add flavors

---
 camelot/__init__.py        |   3 +-
 camelot/cli.py             | 103 +++++++++---------
 camelot/core.py            |  62 +++++------
 camelot/handlers.py        |  22 ++--
 camelot/io.py              |  20 ++--
 camelot/parsers/lattice.py |  25 ++---
 camelot/parsers/stream.py  |  20 ++--
 camelot/plotting.py        | 215 +++++++++++--------------------------
 camelot/utils.py           |  22 ++--
 setup.py                   |   2 +-
 tests/test_common.py       |  18 ++--
 11 files changed, 207 insertions(+), 305 deletions(-)

diff --git a/camelot/__init__.py b/camelot/__init__.py
index 6e416e4..b762cea 100644
--- a/camelot/__init__.py
+++ b/camelot/__init__.py
@@ -1,4 +1,3 @@
 from .__version__ import __version__
 
-from .io import read_pdf
-from .plotting import plot_geometry
\ No newline at end of file
+from .io import read_pdf
\ No newline at end of file
diff --git a/camelot/cli.py b/camelot/cli.py
index 98bb681..822bd44 100644
--- a/camelot/cli.py
+++ b/camelot/cli.py
@@ -5,18 +5,9 @@ import click
 
 from . import __version__
 from .io import read_pdf
-from .plotting import plot_geometry
 from .utils import validate_input, remove_extra
 
 
-class Mutex(click.Option):
-    def handle_parse_result(self, ctx, opts, args):
-        mesh = opts.get('mesh', False)
-        geometry_type = opts.get('geometry_type', False)
-        validate_input(opts, mesh=mesh, geometry_type=geometry_type)
-        return super(Mutex, self).handle_parse_result(ctx, opts, args)
-
-
 @click.command()
 @click.version_option(version=__version__)
 @click.option("-p", "--pages", default="1", help="Comma-separated page numbers"
@@ -27,8 +18,6 @@ class Mutex(click.Option):
               help="Output file format.")
 @click.option("-z", "--zip", is_flag=True, help="Whether or not to create a ZIP"
               " archive.")
-@click.option("-m", "--mesh", is_flag=True, help="Whether or not to"
-              " use Lattice method of parsing. Stream is used by default.")
 @click.option("-T", "--table_area", default=[], multiple=True,
               help="Table areas (x1,y1,x2,y2) to process.\n"
               " x1, y1 -> left-top and x2, y2 -> right-bottom")
@@ -39,12 +28,44 @@ class Mutex(click.Option):
               " super/subscripts)")
 @click.option("-M", "--margins", nargs=3, default=(1.0, 0.5, 0.1),
               help="char_margin, line_margin, word_margin for PDFMiner.")
-@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
-              help="x-coordinates of column separators.")
-@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
-              " formed by combining text vertically within this tolerance.")
-@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
-              " be formed by combining text horizontally within this tolerance.")
+@click.option("-G", "--geometry_type",
+              type=click.Choice(["text", "table", "contour", "joint", "line"]),
+              help="Plot geometry found on pdf page for debugging.\n\n"
+              "text: Plot text objects. (Useful to get table_area and"
+              " columns coordinates)\ntable: Plot parsed table.\n"
+              "contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
+              " intersections.\nline (with --mesh): Plot detected lines.")
+@click.argument("filepath", type=click.Path(exists=True))
+def cli(*args, **kwargs):
+    pages = kwargs.pop("pages")
+    output = kwargs.pop("output")
+    f = kwargs.pop("format")
+    compress = kwargs.pop("zip")
+    mesh = kwargs.pop("mesh")
+    geometry_type = kwargs.pop("geometry_type")
+    filepath = kwargs.pop("filepath")
+
+    table_area = list(kwargs['table_area'])
+    kwargs['table_area'] = None if not table_area else table_area
+    columns = list(kwargs['columns'])
+    kwargs['columns'] = None if not columns else columns
+    copy_text = list(kwargs['copy_text'])
+    kwargs['copy_text'] = None if not copy_text else copy_text
+    kwargs['shift_text'] = list(kwargs['shift_text'])
+
+    kwargs = remove_extra(kwargs, mesh=mesh)
+    tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
+    click.echo(tables)
+    if output is None:
+        raise click.UsageError("Please specify an output filepath using --output")
+    if f is None:
+        raise click.UsageError("Please specify an output format using --format")
+    tables.export(output, f=f, compress=compress)
+
+
+@click.option("-T", "--table_area", default=[], multiple=True,
+              help="Table areas (x1,y1,x2,y2) to process.\n"
+              " x1, y1 -> left-top and x2, y2 -> right-bottom")
 @click.option("-back", "--process_background", is_flag=True, cls=Mutex,
               help="(with --mesh) Whether or not to process lines that are in"
               " background.")
@@ -75,40 +96,18 @@ class Mutex(click.Option):
 @click.option("-I", "--iterations", default=0, cls=Mutex,
               help="(with --mesh) Number of times for erosion/dilation is"
               " applied.")
-@click.option("-G", "--geometry_type",
-              type=click.Choice(["text", "table", "contour", "joint", "line"]),
-              help="Plot geometry found on pdf page for debugging.\n\n"
-              "text: Plot text objects. (Useful to get table_area and"
-              " columns coordinates)\ntable: Plot parsed table.\n"
-              "contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
-              " intersections.\nline (with --mesh): Plot detected lines.")
-@click.argument("filepath", type=click.Path(exists=True))
-def cli(*args, **kwargs):
-    pages = kwargs.pop("pages")
-    output = kwargs.pop("output")
-    f = kwargs.pop("format")
-    compress = kwargs.pop("zip")
-    mesh = kwargs.pop("mesh")
-    geometry_type = kwargs.pop("geometry_type")
-    filepath = kwargs.pop("filepath")
+def lattice(*args, **kwargs):
+    pass
 
-    table_area = list(kwargs['table_area'])
-    kwargs['table_area'] = None if not table_area else table_area
-    columns = list(kwargs['columns'])
-    kwargs['columns'] = None if not columns else columns
-    copy_text = list(kwargs['copy_text'])
-    kwargs['copy_text'] = None if not copy_text else copy_text
-    kwargs['shift_text'] = list(kwargs['shift_text'])
 
-    kwargs = remove_extra(kwargs, mesh=mesh)
-    if geometry_type is None:
-        tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
-        click.echo(tables)
-        if output is None:
-            raise click.UsageError("Please specify an output filepath using --output")
-        if f is None:
-            raise click.UsageError("Please specify an output format using --format")
-        tables.export(output, f=f, compress=compress)
-    else:
-        plot_geometry(filepath, pages=pages, mesh=mesh,
-                      geometry_type=geometry_type, **kwargs)
\ No newline at end of file
+@click.option("-T", "--table_area", default=[], multiple=True,
+              help="Table areas (x1,y1,x2,y2) to process.\n"
+              " x1, y1 -> left-top and x2, y2 -> right-bottom")
+@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
+              help="x-coordinates of column separators.")
+@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
+              " formed by combining text vertically within this tolerance.")
+@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
+              " be formed by combining text horizontally within this tolerance.")
+def stream(*args, **kwargs):
+    pass
\ No newline at end of file
diff --git a/camelot/core.py b/camelot/core.py
index 22b7442..3813e60 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -6,6 +6,8 @@ import tempfile
 import numpy as np
 import pandas as pd
 
+from .plotting import *
+
 
 class Cell(object):
     """Defines a cell in a table with coordinates relative to a
@@ -318,6 +320,32 @@ class Table(object):
                     cell.hspan = True
         return self
 
+    def plot(self, geometry_type):
+        """Plot geometry found on PDF page based on geometry_type
+        specified, useful for debugging and playing with different
+        parameters to get the best output.
+
+        Parameters
+        ----------
+        geometry_type : str
+            The geometry type for which a plot should be generated.
+            Can be 'text', 'table', 'contour', 'joint', 'line'
+
+        """
+        if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
+            raise NotImplementedError("{} cannot be plotted with flavor='stream'")
+
+        if geometry_type == 'text':
+            plot_text(self._text)
+        elif geometry_type == 'table':
+            plot_table(self)
+        elif geometry_type == 'contour':
+            plot_contour(self._image)
+        elif geometry_type == 'joint':
+            plot_joint(self._image)
+        elif geometry_type == 'line':
+            plot_line(self._segments)
+
     def to_csv(self, path, **kwargs):
         """Writes Table to a comma-separated values (csv) file.
 
@@ -488,36 +516,4 @@ class TableList(object):
             if compress:
                 zipname = os.path.join(os.path.dirname(path), root) + '.zip'
                 with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
-                    z.write(filepath, os.path.basename(filepath))
-
-
-class Geometry(object):
-    def __init__(self):
-        self.text = []
-        self.images = ()
-        self.segments = ()
-        self.tables = []
-
-    def __repr__(self):
-        return '<{} text={} images={} segments={} tables={}>'.format(
-            self.__class__.__name__,
-            len(self.text),
-            len(self.images),
-            len(self.segments),
-            len(self.tables))
-
-
-class GeometryList(object):
-    def __init__(self, geometry):
-        self.text = [g.text for g in geometry]
-        self.images = [g.images for g in geometry]
-        self.segments = [g.segments for g in geometry]
-        self.tables = [g.tables for g in geometry]
-
-    def __repr__(self):
-        return '<{} text={} images={} segments={} tables={}>'.format(
-            self.__class__.__name__,
-            len(self.text),
-            len(self.images),
-            len(self.segments),
-            len(self.tables))
\ No newline at end of file
+                    z.write(filepath, os.path.basename(filepath))
\ No newline at end of file
diff --git a/camelot/handlers.py b/camelot/handlers.py
index 59b31c3..0ea9785 100644
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@@ -2,7 +2,7 @@ import os
 
 from PyPDF2 import PdfFileReader, PdfFileWriter
 
-from .core import TableList, GeometryList
+from .core import TableList
 from .parsers import Stream, Lattice
 from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
                     get_rotation)
@@ -17,7 +17,7 @@ class PDFHandler(object):
     ----------
     filename : str
         Path to pdf file.
-    pages : str
+    pages : str, optional (default: '1')
         Comma-separated page numbers to parse.
         Example: 1,3,4 or 1,4-end
 
@@ -35,7 +35,7 @@ class PDFHandler(object):
         ----------
         filename : str
             Path to pdf file.
-        pages : str
+        pages : str, optional (default: '1')
             Comma-separated page numbers to parse.
             Example: 1,3,4 or 1,4-end
 
@@ -112,15 +112,15 @@ class PDFHandler(object):
                 with open(fpath, 'wb') as f:
                     outfile.write(f)
 
-    def parse(self, mesh=False, **kwargs):
+    def parse(self, flavor='lattice', **kwargs):
         """Extracts tables by calling parser.get_tables on all single
         page pdfs.
 
         Parameters
         ----------
-        mesh : bool (default: False)
-            Whether or not to use Lattice method of parsing. Stream
-            is used by default.
+        flavor : str (default: 'lattice')
+            The parsing method to use ('lattice' or 'stream').
+            Lattice is used by default.
         kwargs : dict
             See camelot.read_pdf kwargs.
 
@@ -134,15 +134,13 @@ class PDFHandler(object):
 
         """
         tables = []
-        geometry = []
         with TemporaryDirectory() as tempdir:
             for p in self.pages:
                 self._save_page(self.filename, p, tempdir)
             pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
                      for p in self.pages]
-            parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
+            parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
             for p in pages:
-                t, g = parser.extract_tables(p)
+                t = parser.extract_tables(p)
                 tables.extend(t)
-                geometry.append(g)
-        return TableList(tables), GeometryList(geometry)
\ No newline at end of file
+        return TableList(tables)
\ No newline at end of file
diff --git a/camelot/io.py b/camelot/io.py
index 328b107..f581735 100644
--- a/camelot/io.py
+++ b/camelot/io.py
@@ -2,22 +2,22 @@ from .handlers import PDFHandler
 from .utils import validate_input, remove_extra
 
 
-def read_pdf(filepath, pages='1', mesh=False, **kwargs):
+def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
     """Read PDF and return parsed data tables.
 
-    Note: kwargs annotated with ^ can only be used with mesh=False
-    and kwargs annotated with * can only be used with mesh=True.
+    Note: kwargs annotated with ^ can only be used with flavor='stream'
+    and kwargs annotated with * can only be used with flavor='lattice'.
 
     Parameters
     ----------
     filepath : str
         Path to pdf file.
-    pages : str
+    pages : str, optional (default: '1')
         Comma-separated page numbers to parse.
         Example: 1,3,4 or 1,4-end
-    mesh : bool (default: False)
-        Whether or not to use Lattice method of parsing. Stream
-        is used by default.
+    flavor : str (default: 'lattice')
+        The parsing method to use ('lattice' or 'stream').
+        Lattice is used by default.
     table_area : list, optional (default: None)
         List of table areas to process as strings of the form
         x1,y1,x2,y2 where (x1, y1) -> left-top and
@@ -85,8 +85,8 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs):
     tables : camelot.core.TableList
 
     """
-    validate_input(kwargs, mesh=mesh)
+    validate_input(kwargs, flavor=flavor)
     p = PDFHandler(filepath, pages)
-    kwargs = remove_extra(kwargs, mesh=mesh)
-    tables, __ = p.parse(mesh=mesh, **kwargs)
+    kwargs = remove_extra(kwargs, flavor=flavor)
+    tables, __ = p.parse(flavor=flavor, **kwargs)
     return tables
\ No newline at end of file
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index 9e569ab..5de6faa 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -194,7 +194,8 @@ class Lattice(BaseParser):
             stderr=subprocess.STDOUT)
 
     def _generate_table_bbox(self):
-        self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
+        self.image, self.threshold = adaptive_threshold(
+            self.imagename, process_background=self.process_background,
             blocksize=self.threshold_blocksize, c=self.threshold_constant)
         image_width = self.image.shape[1]
         image_height = self.image.shape[0]
@@ -297,11 +298,20 @@ class Lattice(BaseParser):
         table.shape = table.df.shape
 
         whitespace = compute_whitespace(data)
+        table.flavor = 'lattice'
         table.accuracy = accuracy
         table.whitespace = whitespace
         table.order = table_idx + 1
         table.page = int(os.path.basename(self.rootname).replace('page-', ''))
 
+        # for plotting
+        _text = []
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+        table._text = _text
+        table._image = (self.image, self.table_bbox_unscaled)
+        table._segments = (self.vertical_segments, self.horizontal_segments)
+
         return table
 
     def extract_tables(self, filename):
@@ -311,7 +321,7 @@ class Lattice(BaseParser):
         if not self.horizontal_text:
             logger.info("No tables found on {}".format(
                 os.path.basename(self.rootname)))
-            return [], self.g
+            return []
 
         self._generate_image()
         self._generate_table_bbox()
@@ -324,13 +334,4 @@ class Lattice(BaseParser):
             table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
             _tables.append(table)
 
-        if self.debug:
-            text = []
-            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
-            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
-            self.g.text = text
-            self.g.images = (self.image, self.table_bbox_unscaled)
-            self.g.segments = (self.vertical_segments, self.horizontal_segments)
-            self.g.tables = _tables
-
-        return _tables, self.g
\ No newline at end of file
+        return _tables
\ No newline at end of file
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 6d29a05..b3acf38 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -333,11 +333,20 @@ class Stream(BaseParser):
         table.shape = table.df.shape
 
         whitespace = compute_whitespace(data)
+        table.flavor = 'stream'
         table.accuracy = accuracy
         table.whitespace = whitespace
         table.order = table_idx + 1
         table.page = int(os.path.basename(self.rootname).replace('page-', ''))
 
+        # for plotting
+        _text = []
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+        table._text = _text
+        table._image = None
+        table._segments = None
+
         return table
 
     def extract_tables(self, filename):
@@ -347,7 +356,7 @@ class Stream(BaseParser):
         if not self.horizontal_text:
             logger.info("No tables found on {}".format(
                 os.path.basename(self.rootname)))
-            return [], self.g
+            return []
 
         self._generate_table_bbox()
 
@@ -359,11 +368,4 @@ class Stream(BaseParser):
             table = self._generate_table(table_idx, cols, rows)
             _tables.append(table)
 
-        if self.debug:
-            text = []
-            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
-            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
-            self.g.text = text
-            self.g.tables = _tables
-
-        return _tables, self.g
\ No newline at end of file
+        return _tables
\ No newline at end of file
diff --git a/camelot/plotting.py b/camelot/plotting.py
index 7a94b53..9c06887 100644
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@@ -2,165 +2,72 @@ import cv2
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 
-from .handlers import PDFHandler
-from .utils import validate_input, remove_extra
+
+def plot_text(text):
+    fig = plt.figure()
+    ax = fig.add_subplot(111, aspect='equal')
+    xs, ys = [], []
+    for t in text:
+        xs.extend([t[0], t[1]])
+        ys.extend([t[2], t[3]])
+        ax.add_patch(
+            patches.Rectangle(
+                (t[0], t[1]),
+                t[2] - t[0],
+                t[3] - t[1]
+            )
+        )
+    ax.set_xlim(min(xs) - 10, max(xs) + 10)
+    ax.set_ylim(min(ys) - 10, max(ys) + 10)
+    plt.show()
 
 
-def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs):
-    """Plot geometry found on pdf page based on type specified,
-    useful for debugging and playing with different parameters to get
-    the best output.
+def plot_table(table):
+    for row in table.cells:
+        for cell in row:
+            if cell.left:
+                plt.plot([cell.lb[0], cell.lt[0]],
+                            [cell.lb[1], cell.lt[1]])
+            if cell.right:
+                plt.plot([cell.rb[0], cell.rt[0]],
+                            [cell.rb[1], cell.rt[1]])
+            if cell.top:
+                plt.plot([cell.lt[0], cell.rt[0]],
+                            [cell.lt[1], cell.rt[1]])
+            if cell.bottom:
+                plt.plot([cell.lb[0], cell.rb[0]],
+                            [cell.lb[1], cell.rb[1]])
+    plt.show()
 
-    Note: kwargs annotated with ^ can only be used with mesh=False
-    and kwargs annotated with * can only be used with mesh=True.
 
-    Parameters
-    ----------
-    filepath : str
-        Path to pdf file.
-    pages : str
-        Comma-separated page numbers to parse.
-        Example: 1,3,4 or 1,4-end
-    mesh : bool (default: False)
-        Whether or not to use Lattice method of parsing. Stream
-        is used by default.
-    geometry_type : str, optional (default: None)
-        * 'text' : Plot text objects found on page. (Useful to get \
-                   table_area and columns coordinates)
-        * 'table' : Plot parsed table.
-        * 'contour'* : Plot detected rectangles.
-        * 'joint'* : Plot detected line intersections.
-        * 'line'* : Plot detected lines.
-    table_area : list, optional (default: None)
-        List of table areas to process as strings of the form
-        x1,y1,x2,y2 where (x1, y1) -> left-top and
-        (x2, y2) -> right-bottom in pdf coordinate space.
-    columns^ : list, optional (default: None)
-        List of column x-coordinates as strings where the coordinates
-        are comma-separated.
-    split_text : bool, optional (default: False)
-        Whether or not to split a text line if it spans across
-        multiple cells.
-    flag_size : bool, optional (default: False)
-        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string. (Useful for
-        super and subscripts.)
-    row_close_tol^ : int, optional (default: 2)
-        Rows will be formed by combining text vertically
-        within this tolerance.
-    col_close_tol^ : int, optional (default: 0)
-        Columns will be formed by combining text horizontally
-        within this tolerance.
-    process_background* : bool, optional (default: False)
-        Whether or not to process lines that are in background.
-    line_size_scaling* : int, optional (default: 15)
-        Factor by which the page dimensions will be divided to get
-        smallest length of lines that should be detected.
+def plot_contour(image):
+    img, table_bbox = image
+    for t in table_bbox.keys():
+        cv2.rectangle(img, (t[0], t[1]),
+                      (t[2], t[3]), (255, 0, 0), 20)
+    plt.imshow(img)
+    plt.show()
 
-        The larger this value, smaller the detected lines. Making it
-        too large will lead to text being detected as lines.
-    copy_text* : list, optional (default: None)
-        {'h', 'v'}
-        Select one or more strings from above and pass them as a list
-        to specify the direction in which text should be copied over
-        when a cell spans multiple rows or columns.
-    shift_text* : list, optional (default: ['l', 't'])
-        {'l', 'r', 't', 'b'}
-        Select one or more strings from above and pass them as a list
-        to specify where the text in a spanning cell should flow.
-    line_close_tol* : int, optional (default: 2)
-        Tolerance parameter used to merge vertical and horizontal
-        detected lines which lie close to each other.
-    joint_close_tol* : int, optional (default: 2)
-        Tolerance parameter used to decide whether the detected lines
-        and points lie close to each other.
-    threshold_blocksize* : int, optional (default: 15)
-        Size of a pixel neighborhood that is used to calculate a
-        threshold value for the pixel: 3, 5, 7, and so on.
 
-        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
-    threshold_constant* : int, optional (default: -2)
-        Constant subtracted from the mean or weighted mean.
-        Normally, it is positive but may be zero or negative as well.
+def plot_joint(image):
+    img, table_bbox = image
+    x_coord = []
+    y_coord = []
+    for k in table_bbox.keys():
+        for coord in table_bbox[k]:
+            x_coord.append(coord[0])
+            y_coord.append(coord[1])
+    max_x, max_y = max(x_coord), max(y_coord)
+    plt.plot(x_coord, y_coord, 'ro')
+    plt.axis([0, max_x + 100, max_y + 100, 0])
+    plt.imshow(img)
+    plt.show()
 
-        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
-    iterations* : int, optional (default: 0)
-        Number of times for erosion/dilation is applied.
 
-        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
-    margins : tuple
-        PDFMiner margins. (char_margin, line_margin, word_margin)
-
-        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
-
-    """
-    validate_input(kwargs, mesh=mesh, geometry_type=geometry_type)
-    p = PDFHandler(filepath, pages)
-    kwargs = remove_extra(kwargs, mesh=mesh)
-    debug = True if geometry_type is not None else False
-    kwargs.update({'debug': debug})
-    __, geometry = p.parse(mesh=mesh, **kwargs)
-
-    if geometry_type == 'text':
-        for text in geometry.text:
-            fig = plt.figure()
-            ax = fig.add_subplot(111, aspect='equal')
-            xs, ys = [], []
-            for t in text:
-                xs.extend([t[0], t[1]])
-                ys.extend([t[2], t[3]])
-                ax.add_patch(
-                    patches.Rectangle(
-                        (t[0], t[1]),
-                        t[2] - t[0],
-                        t[3] - t[1]
-                    )
-                )
-            ax.set_xlim(min(xs) - 10, max(xs) + 10)
-            ax.set_ylim(min(ys) - 10, max(ys) + 10)
-            plt.show()
-    elif geometry_type == 'table':
-        for tables in geometry.tables:
-            for table in tables:
-                for row in table.cells:
-                    for cell in row:
-                        if cell.left:
-                            plt.plot([cell.lb[0], cell.lt[0]],
-                                     [cell.lb[1], cell.lt[1]])
-                        if cell.right:
-                            plt.plot([cell.rb[0], cell.rt[0]],
-                                     [cell.rb[1], cell.rt[1]])
-                        if cell.top:
-                            plt.plot([cell.lt[0], cell.rt[0]],
-                                     [cell.lt[1], cell.rt[1]])
-                        if cell.bottom:
-                            plt.plot([cell.lb[0], cell.rb[0]],
-                                     [cell.lb[1], cell.rb[1]])
-            plt.show()
-    elif geometry_type == 'contour':
-        for img, table_bbox in geometry.images:
-            for t in table_bbox.keys():
-                cv2.rectangle(img, (t[0], t[1]),
-                              (t[2], t[3]), (255, 0, 0), 20)
-            plt.imshow(img)
-            plt.show()
-    elif geometry_type == 'joint':
-        for img, table_bbox in geometry.images:
-            x_coord = []
-            y_coord = []
-            for k in table_bbox.keys():
-                for coord in table_bbox[k]:
-                    x_coord.append(coord[0])
-                    y_coord.append(coord[1])
-            max_x, max_y = max(x_coord), max(y_coord)
-            plt.plot(x_coord, y_coord, 'ro')
-            plt.axis([0, max_x + 100, max_y + 100, 0])
-            plt.imshow(img)
-            plt.show()
-    elif geometry_type == 'line':
-        for v_s, h_s in geometry.segments:
-            for v in v_s:
-                plt.plot([v[0], v[2]], [v[1], v[3]])
-            for h in h_s:
-                plt.plot([h[0], h[2]], [h[1], h[3]])
-            plt.show()
\ No newline at end of file
+def plot_line(segments):
+    vertical, horizontal = segments
+    for v in vertical:
+        plt.plot([v[0], v[2]], [v[1], v[3]])
+    for h in horizontal:
+        plt.plot([h[0], h[2]], [h[1], h[3]])
+    plt.show()
\ No newline at end of file
diff --git a/camelot/utils.py b/camelot/utils.py
index c0f4a59..156373e 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -38,25 +38,25 @@ lattice_kwargs = [
 ]
 
 
-def validate_input(kwargs, mesh=False, geometry_type=False):
-    def check_intersection(parser_kwargs, input_kwargs, message_bool):
+def validate_input(kwargs, flavor='lattice', geometry_type=False):
+    def check_intersection(parser_kwargs, input_kwargs):
         isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
         if isec:
-            raise ValueError("{} can not be used with mesh set to {}".format(
-                             ",".join(sorted(isec)), message_bool))
+            raise ValueError("{} cannot be used with flavor='{}'".format(
+                             ",".join(sorted(isec)), flavor))
 
-    if mesh:
-        check_intersection(stream_kwargs, kwargs, True)
+    if flavor == 'lattice':
+        check_intersection(stream_kwargs, kwargs)
     else:
-        check_intersection(lattice_kwargs, kwargs, False)
+        check_intersection(lattice_kwargs, kwargs)
     if geometry_type:
-        if not mesh and geometry_type in ['contour', 'joint', 'line']:
-            raise ValueError("Use geometry_type={} with mesh set to True".format(
+        if flavor != 'lattice' and geometry_type in ['contour', 'joint', 'line']:
+            raise ValueError("Use geometry_type='{}' with flavor='lattice'".format(
                              geometry_type))
 
 
-def remove_extra(kwargs, mesh=False):
-    if mesh:
+def remove_extra(kwargs, flavor='lattice'):
+    if flavor == 'lattice':
         for key in kwargs.keys():
             if key in stream_kwargs:
                 kwargs.pop(key)
diff --git a/setup.py b/setup.py
index d37bcf2..00d6e8f 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ with open(os.path.join(here, 'camelot', '__version__.py'), 'r') as f:
     exec(f.read(), about)
 
 # TODO: Move these to __version__.py
-NAME = 'camelot'
+NAME = 'camelot-py'
 VERSION = about['__version__']
 DESCRIPTION = 'PDF Table Parsing for Humans'
 with open('README.md') as f:
diff --git a/tests/test_common.py b/tests/test_common.py
index 52f966a..065a9e2 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -18,11 +18,11 @@ def test_stream_table_rotated():
     df = pd.DataFrame(data_stream_table_rotated)
 
     filename = os.path.join(testdir, "clockwise_table_2.pdf")
-    tables = camelot.read_pdf(filename)
+    tables = camelot.read_pdf(filename, flavor="stream")
     assert df.equals(tables[0].df)
 
     filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
-    tables = camelot.read_pdf(filename)
+    tables = camelot.read_pdf(filename, flavor="stream")
     assert df.equals(tables[0].df)
 
 
@@ -30,7 +30,7 @@ def test_stream_table_area():
     df = pd.DataFrame(data_stream_table_area_single)
 
     filename = os.path.join(testdir, "tabula/us-007.pdf")
-    tables = camelot.read_pdf(filename, table_area=["320,500,573,335"])
+    tables = camelot.read_pdf(filename, flavor="stream", table_area=["320,500,573,335"])
     assert df.equals(tables[0].df)
 
 
@@ -39,7 +39,7 @@ def test_stream_columns():
 
     filename = os.path.join(testdir, "mexican_towns.pdf")
     tables = camelot.read_pdf(
-        filename, columns=["67,180,230,425,475"], row_close_tol=10)
+        filename, flavor="stream", columns=["67,180,230,425,475"], row_close_tol=10)
     assert df.equals(tables[0].df)
 
 
@@ -48,7 +48,7 @@ def test_lattice():
 
     filename = os.path.join(testdir,
         "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
-    tables = camelot.read_pdf(filename, pages="2", mesh=True)
+    tables = camelot.read_pdf(filename, pages="2")
     assert df.equals(tables[0].df)
 
 
@@ -56,11 +56,11 @@ def test_lattice_table_rotated():
     df = pd.DataFrame(data_lattice_table_rotated)
 
     filename = os.path.join(testdir, "clockwise_table_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True)
+    tables = camelot.read_pdf(filename)
     assert df.equals(tables[0].df)
 
     filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True)
+    tables = camelot.read_pdf(filename)
     assert df.equals(tables[0].df)
 
 
@@ -68,7 +68,7 @@ def test_lattice_process_background():
     df = pd.DataFrame(data_lattice_process_background)
 
     filename = os.path.join(testdir, "background_lines_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True, process_background=True)
+    tables = camelot.read_pdf(filename, process_background=True)
     assert df.equals(tables[1].df)
 
 
@@ -76,5 +76,5 @@ def test_lattice_copy_text():
     df = pd.DataFrame(data_lattice_copy_text)
 
     filename = os.path.join(testdir, "row_span_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True, line_size_scaling=60, copy_text="v")
+    tables = camelot.read_pdf(filename, line_size_scaling=60, copy_text="v")
     assert df.equals(tables[0].df)
\ No newline at end of file