Merge pull request #107 from socialcopsdev/add_flavor

Deprecate mesh and add flavors
2018-09-23 14:04:38 +05:30 · 2018-09-23 14:04:38 +05:30 · 481b62a9f6
parent 4a30c5a514 a70befe528
commit 481b62a9f6
21 changed files with 397 additions and 445 deletions
--- a/README.md
+++ b/README.md
@ -12,7 +12,7 @@
 <pre>
 >>> import camelot
->>> tables = camelot.read_pdf('foo.pdf', mesh=True)
+>>> tables = camelot.read_pdf('foo.pdf')
 >>> tables
 &lt;TableList tables=1&gt;
 >>> tables.export('foo.csv', f='csv', compress=True) # json, excel, html
--- a/camelot/init.py
+++ b/camelot/init.py
@ -1,4 +1,3 @@
 from .__version__ import __version__
-from .io import read_pdf
+from .io import read_pdf
 from .plotting import plot_geometry
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -5,19 +5,20 @@ import click
 from . import __version__
 from .io import read_pdf
 from .plotting import plot_geometry
 from .utils import validate_input, remove_extra
-class Mutex(click.Option):
+class Config(object):
-    def handle_parse_result(self, ctx, opts, args):
+    def  __init__(self):
-        mesh = opts.get('mesh', False)
+        self.config = {}
-        geometry_type = opts.get('geometry_type', False)
+
-        validate_input(opts, mesh=mesh, geometry_type=geometry_type)
+    def set_config(self, key, value):
-        return super(Mutex, self).handle_parse_result(ctx, opts, args)
+        self.config[key] = value
-@click.command()
+pass_config = click.make_pass_decorator(Config)
@click.group()
@click.version_option(version=__version__)
@click.option("-p", "--pages", default="1", help="Comma-separated page numbers"
              " to parse. Example: 1,3,4 or 1,4-end")
@ -27,11 +28,6 @@ class Mutex(click.Option):
              help="Output file format.")
@click.option("-z", "--zip", is_flag=True, help="Whether or not to create a ZIP"
              " archive.")
@click.option("-m", "--mesh", is_flag=True, help="Whether or not to"
              " use Lattice method of parsing. Stream is used by default.")
@click.option("-T", "--table_area", default=[], multiple=True,
              help="Table areas (x1,y1,x2,y2) to process.\n"
              " x1, y1 -> left-top and x2, y2 -> right-bottom")
@click.option("-split", "--split_text", is_flag=True, help="Whether or not to"
              " split text if it spans across multiple cells.")
@click.option("-flag", "--flag_size", is_flag=True, help="(inactive) Whether or"
@ -39,76 +35,121 @@ class Mutex(click.Option):
              " super/subscripts)")
@click.option("-M", "--margins", nargs=3, default=(1.0, 0.5, 0.1),
              help="char_margin, line_margin, word_margin for PDFMiner.")
-@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
+@click.pass_context
-              help="x-coordinates of column separators.")
+def cli(ctx, *args, **kwargs):
-@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
+    ctx.obj = Config()
-              " formed by combining text vertically within this tolerance.")
+    for key, value in kwargs.iteritems():
-@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
+        ctx.obj.set_config(key, value)
-              " be formed by combining text horizontally within this tolerance.")
+
-@click.option("-back", "--process_background", is_flag=True, cls=Mutex,
+
@cli.command('lattice')
@click.option("-T", "--table_area", default=[], multiple=True,
              help="Table areas (x1,y1,x2,y2) to process.\n"
              " x1, y1 -> left-top and x2, y2 -> right-bottom")
@click.option("-back", "--process_background", is_flag=True,
              help="(with --mesh) Whether or not to process lines that are in"
              " background.")
-@click.option("-scale", "--line_size_scaling", default=15, cls=Mutex,
+@click.option("-scale", "--line_size_scaling", default=15,
              help="(with --mesh) Factor by which the page dimensions will be"
              " divided to get smallest length of detected lines.")
@click.option("-copy", "--copy_text", default=[], type=click.Choice(["h", "v"]),
-              multiple=True, cls=Mutex, help="(with --mesh) Specify direction"
+              multiple=True, help="(with --mesh) Specify direction"
              " in which text will be copied over in a spanning cell.")
@click.option("-shift", "--shift_text", default=["l", "t"],
-              type=click.Choice(["", "l", "r", "t", "b"]), multiple=True, cls=Mutex,
+              type=click.Choice(["", "l", "r", "t", "b"]), multiple=True,
              help="(with --mesh) Specify direction in which text in a spanning"
              " cell should flow.")
-@click.option("-l", "--line_close_tol", default=2, cls=Mutex,
+@click.option("-l", "--line_close_tol", default=2,
              help="(with --mesh) Tolerance parameter used to merge close vertical"
              " lines and close horizontal lines.")
-@click.option("-j", "--joint_close_tol", default=2, cls=Mutex,
+@click.option("-j", "--joint_close_tol", default=2,
              help="(with --mesh) Tolerance parameter used to decide whether"
              " the detected lines and points lie close to each other.")
-@click.option("-block", "--threshold_blocksize", default=15, cls=Mutex,
+@click.option("-block", "--threshold_blocksize", default=15,
              help="(with --mesh) For adaptive thresholding, size of a pixel"
              " neighborhood that is used to calculate a threshold value for"
              " the pixel: 3, 5, 7, and so on.")
-@click.option("-const", "--threshold_constant", default=-2, cls=Mutex,
+@click.option("-const", "--threshold_constant", default=-2,
              help="(with --mesh) For adaptive thresholding, constant subtracted"
              " from the mean or weighted mean.\nNormally, it is positive but"
              " may be zero or negative as well.")
-@click.option("-I", "--iterations", default=0, cls=Mutex,
+@click.option("-I", "--iterations", default=0,
              help="(with --mesh) Number of times for erosion/dilation is"
              " applied.")
-@click.option("-G", "--geometry_type",
+@click.option("-plot", "--plot_type",
              type=click.Choice(["text", "table", "contour", "joint", "line"]),
-              help="Plot geometry found on pdf page for debugging.\n\n"
+              help="Plot geometry found on PDF page for debugging.")
              "text: Plot text objects. (Useful to get table_area and"
              " columns coordinates)\ntable: Plot parsed table.\n"
              "contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
              " intersections.\nline (with --mesh): Plot detected lines.")
@click.argument("filepath", type=click.Path(exists=True))
-def cli(*args, **kwargs):
+@pass_config
-    pages = kwargs.pop("pages")
+def lattice(c, *args, **kwargs):
-    output = kwargs.pop("output")
+    """Use lines between text to parse table."""
-    f = kwargs.pop("format")
+    conf = c.config
-    compress = kwargs.pop("zip")
+    pages = conf.pop("pages")
-    mesh = kwargs.pop("mesh")
+    output = conf.pop("output")
-    geometry_type = kwargs.pop("geometry_type")
+    f = conf.pop("format")
    compress = conf.pop("zip")
    plot_type = kwargs.pop('plot_type')
    filepath = kwargs.pop("filepath")
    kwargs.update(conf)
    table_area = list(kwargs['table_area'])
    kwargs['table_area'] = None if not table_area else table_area
    copy_text = list(kwargs['copy_text'])
    kwargs['copy_text'] = None if not copy_text else copy_text
    kwargs['shift_text'] = list(kwargs['shift_text'])
    tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs)
    click.echo(tables)
    if plot_type is not None:
        for table in tables:
            table.plot(plot_type)
    else:
        if output is None:
            raise click.UsageError("Please specify output filepath using --output")
        if f is None:
            raise click.UsageError("Please specify output format using --format")
        tables.export(output, f=f, compress=compress)
@cli.command('stream')
@click.option("-T", "--table_area", default=[], multiple=True,
              help="Table areas (x1,y1,x2,y2) to process.\n"
              " x1, y1 -> left-top and x2, y2 -> right-bottom")
@click.option("-C", "--columns", default=[], multiple=True,
              help="x-coordinates of column separators.")
@click.option("-r", "--row_close_tol", default=2, help="Rows will be"
              " formed by combining text vertically within this tolerance.")
@click.option("-c", "--col_close_tol", default=0, help="Columns will"
              " be formed by combining text horizontally within this tolerance.")
@click.option("-plot", "--plot_type",
              type=click.Choice(["text", "table"]),
              help="Plot geometry found on PDF page for debugging.")
@click.argument("filepath", type=click.Path(exists=True))
@pass_config
 def stream(c, *args, **kwargs):
    """Use spaces between text to parse table."""
    conf = c.config
    pages = conf.pop("pages")
    output = conf.pop("output")
    f = conf.pop("format")
    compress = conf.pop("zip")
    plot_type = kwargs.pop('plot_type')
    filepath = kwargs.pop("filepath")
    kwargs.update(conf)
    table_area = list(kwargs['table_area'])
    kwargs['table_area'] = None if not table_area else table_area
    columns = list(kwargs['columns'])
    kwargs['columns'] = None if not columns else columns
    copy_text = list(kwargs['copy_text'])
    kwargs['copy_text'] = None if not copy_text else copy_text
    kwargs['shift_text'] = list(kwargs['shift_text'])
-    kwargs = remove_extra(kwargs, mesh=mesh)
+    tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs)
-    if geometry_type is None:
+    click.echo(tables)
-        tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
+    if plot_type is not None:
-        click.echo(tables)
+        for table in tables:
-        if output is None:
+            table.plot(plot_type)
            raise click.UsageError("Please specify an output filepath using --output")
        if f is None:
            raise click.UsageError("Please specify an output format using --format")
        tables.export(output, f=f, compress=compress)
    else:
-        plot_geometry(filepath, pages=pages, mesh=mesh,
+        if output is None:
-                      geometry_type=geometry_type, **kwargs)
+            raise click.UsageError("Please specify output filepath using --output")
        if f is None:
            raise click.UsageError("Please specify output format using --format")
        tables.export(output, f=f, compress=compress)
--- a/camelot/core.py
+++ b/camelot/core.py
@ -6,6 +6,8 @@ import tempfile
 import numpy as np
 import pandas as pd
 from .plotting import *
 class Cell(object):
    """Defines a cell in a table with coordinates relative to a
@ -318,6 +320,33 @@ class Table(object):
                    cell.hspan = True
        return self
    def plot(self, geometry_type):
        """Plot geometry found on PDF page based on geometry_type
        specified, useful for debugging and playing with different
        parameters to get the best output.
        Parameters
        ----------
        geometry_type : str
            The geometry type for which a plot should be generated.
            Can be 'text', 'table', 'contour', 'joint', 'line'
        """
        if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
            raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
                                       geometry_type))
        if geometry_type == 'text':
            plot_text(self._text)
        elif geometry_type == 'table':
            plot_table(self)
        elif geometry_type == 'contour':
            plot_contour(self._image)
        elif geometry_type == 'joint':
            plot_joint(self._image)
        elif geometry_type == 'line':
            plot_line(self._segments)
    def to_csv(self, path, **kwargs):
        """Writes Table to a comma-separated values (csv) file.
@ -416,13 +445,25 @@ class TableList(object):
    def __getitem__(self, idx):
        return self._tables[idx]
    def __iter__(self):
        self._n = 0
        return self
    def next(self):
        if self._n < len(self):
            r = self._tables[self._n]
            self._n += 1
            return r
        else:
            raise StopIteration
    @staticmethod
    def _format_func(table, f):
        return getattr(table, 'to_{}'.format(f))
    @property
    def n(self):
-        return len(self._tables)
+        return len(self)
    def _write_file(self, f=None, **kwargs):
        dirname = kwargs.get('dirname')
@ -488,36 +529,4 @@ class TableList(object):
            if compress:
                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
-                    z.write(filepath, os.path.basename(filepath))
+                    z.write(filepath, os.path.basename(filepath))
 class Geometry(object):
    def __init__(self):
        self.text = []
        self.images = ()
        self.segments = ()
        self.tables = []
    def __repr__(self):
        return '<{} text={} images={} segments={} tables={}>'.format(
            self.__class__.__name__,
            len(self.text),
            len(self.images),
            len(self.segments),
            len(self.tables))
 class GeometryList(object):
    def __init__(self, geometry):
        self.text = [g.text for g in geometry]
        self.images = [g.images for g in geometry]
        self.segments = [g.segments for g in geometry]
        self.tables = [g.tables for g in geometry]
    def __repr__(self):
        return '<{} text={} images={} segments={} tables={}>'.format(
            self.__class__.__name__,
            len(self.text),
            len(self.images),
            len(self.segments),
            len(self.tables))
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -2,7 +2,7 @@ import os
 from PyPDF2 import PdfFileReader, PdfFileWriter
-from .core import TableList, GeometryList
+from .core import TableList
 from .parsers import Stream, Lattice
 from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
                    get_rotation)
@ -17,7 +17,7 @@ class PDFHandler(object):
    ----------
    filename : str
        Path to pdf file.
-    pages : str
+    pages : str, optional (default: '1')
        Comma-separated page numbers to parse.
        Example: 1,3,4 or 1,4-end
@ -35,7 +35,7 @@ class PDFHandler(object):
        ----------
        filename : str
            Path to pdf file.
-        pages : str
+        pages : str, optional (default: '1')
            Comma-separated page numbers to parse.
            Example: 1,3,4 or 1,4-end
@ -112,15 +112,15 @@ class PDFHandler(object):
                with open(fpath, 'wb') as f:
                    outfile.write(f)
-    def parse(self, mesh=False, **kwargs):
+    def parse(self, flavor='lattice', **kwargs):
        """Extracts tables by calling parser.get_tables on all single
        page pdfs.
        Parameters
        ----------
-        mesh : bool (default: False)
+        flavor : str (default: 'lattice')
-            Whether or not to use Lattice method of parsing. Stream
+            The parsing method to use ('lattice' or 'stream').
-            is used by default.
+            Lattice is used by default.
        kwargs : dict
            See camelot.read_pdf kwargs.
@ -134,15 +134,13 @@ class PDFHandler(object):
        """
        tables = []
        geometry = []
        with TemporaryDirectory() as tempdir:
            for p in self.pages:
                self._save_page(self.filename, p, tempdir)
            pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
                     for p in self.pages]
-            parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
+            parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
            for p in pages:
-                t, g = parser.extract_tables(p)
+                t = parser.extract_tables(p)
                tables.extend(t)
-                geometry.append(g)
+        return TableList(tables)
        return TableList(tables), GeometryList(geometry)
--- a/camelot/io.py
+++ b/camelot/io.py
@ -2,22 +2,22 @@ from .handlers import PDFHandler
 from .utils import validate_input, remove_extra
-def read_pdf(filepath, pages='1', mesh=False, **kwargs):
+def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
    """Read PDF and return parsed data tables.
-    Note: kwargs annotated with ^ can only be used with mesh=False
+    Note: kwargs annotated with ^ can only be used with flavor='stream'
-    and kwargs annotated with * can only be used with mesh=True.
+    and kwargs annotated with * can only be used with flavor='lattice'.
    Parameters
    ----------
    filepath : str
        Path to pdf file.
-    pages : str
+    pages : str, optional (default: '1')
        Comma-separated page numbers to parse.
        Example: 1,3,4 or 1,4-end
-    mesh : bool (default: False)
+    flavor : str (default: 'lattice')
-        Whether or not to use Lattice method of parsing. Stream
+        The parsing method to use ('lattice' or 'stream').
-        is used by default.
+        Lattice is used by default.
    table_area : list, optional (default: None)
        List of table areas to process as strings of the form
        x1,y1,x2,y2 where (x1, y1) -> left-top and
@ -85,8 +85,12 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs):
    tables : camelot.core.TableList
    """
-    validate_input(kwargs, mesh=mesh)
+    if flavor not in ['lattice', 'stream']:
        raise NotImplementedError("Unknown flavor specified."
                                  " Use either 'lattice' or 'stream'")
    validate_input(kwargs, flavor=flavor)
    p = PDFHandler(filepath, pages)
-    kwargs = remove_extra(kwargs, mesh=mesh)
+    kwargs = remove_extra(kwargs, flavor=flavor)
-    tables, __ = p.parse(mesh=mesh, **kwargs)
+    tables = p.parse(flavor=flavor, **kwargs)
    return tables
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -1,6 +1,5 @@
 import os
 from ..core import Geometry
 from ..utils import get_page_layout, get_text_objects
@ -17,5 +16,4 @@ class BaseParser(object):
        self.horizontal_text = get_text_objects(self.layout, ltype="lh")
        self.vertical_text = get_text_objects(self.layout, ltype="lv")
        self.pdf_width, self.pdf_height = self.dimensions
-        self.rootname, __ = os.path.splitext(self.filename)
+        self.rootname, __ = os.path.splitext(self.filename)
        self.g = Geometry()
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -21,7 +21,7 @@ logger = setup_logging(__name__)
 class Lattice(BaseParser):
    """Lattice method of parsing looks for lines between text
-    to form a table.
+    to parse table.
    Parameters
    ----------
@ -77,17 +77,13 @@ class Lattice(BaseParser):
        PDFMiner margins. (char_margin, line_margin, word_margin)
        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
    debug : bool, optional (default: False)
        Whether or not to return all text objects on the page
        which can be used to generate a matplotlib plot, to get
        values for table_area(s) and debugging.
    """
    def __init__(self, table_area=None, process_background=False,
                 line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
                 split_text=False, flag_size=False, line_close_tol=2,
                 joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
-                 iterations=0, margins=(1.0, 0.5, 0.1), debug=False):
+                 iterations=0, margins=(1.0, 0.5, 0.1), **kwargs):
        self.table_area = table_area
        self.process_background = process_background
        self.line_size_scaling = line_size_scaling
@ -101,7 +97,6 @@ class Lattice(BaseParser):
        self.threshold_constant = threshold_constant
        self.iterations = iterations
        self.char_margin, self.line_margin, self.word_margin = margins
        self.debug = debug
    @staticmethod
    def _reduce_index(t, idx, shift_text):
@ -194,7 +189,8 @@ class Lattice(BaseParser):
            stderr=subprocess.STDOUT)
    def _generate_table_bbox(self):
-        self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
+        self.image, self.threshold = adaptive_threshold(
            self.imagename, process_background=self.process_background,
            blocksize=self.threshold_blocksize, c=self.threshold_constant)
        image_width = self.image.shape[1]
        image_height = self.image.shape[0]
@ -297,11 +293,20 @@ class Lattice(BaseParser):
        table.shape = table.df.shape
        whitespace = compute_whitespace(data)
        table.flavor = 'lattice'
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
        # for plotting
        _text = []
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
        table._text = _text
        table._image = (self.image, self.table_bbox_unscaled)
        table._segments = (self.vertical_segments, self.horizontal_segments)
        return table
    def extract_tables(self, filename):
@ -311,7 +316,7 @@ class Lattice(BaseParser):
        if not self.horizontal_text:
            logger.info("No tables found on {}".format(
                os.path.basename(self.rootname)))
-            return [], self.g
+            return []
        self._generate_image()
        self._generate_table_bbox()
@ -324,13 +329,4 @@ class Lattice(BaseParser):
            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
            _tables.append(table)
-        if self.debug:
+        return _tables
            text = []
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
            self.g.text = text
            self.g.images = (self.image, self.table_bbox_unscaled)
            self.g.segments = (self.vertical_segments, self.horizontal_segments)
            self.g.tables = _tables
        return _tables, self.g
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -16,7 +16,7 @@ logger = setup_logging(__name__)
 class Stream(BaseParser):
    """Stream method of parsing looks for spaces between text
-    to form a table.
+    to parse table.
    If you want to specify columns when specifying multiple table
    areas, make sure that the length of both lists are equal.
@ -47,15 +47,11 @@ class Stream(BaseParser):
        PDFMiner margins. (char_margin, line_margin, word_margin)
        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
    debug : bool, optional (default: False)
        Whether or not to return all text objects on the page
        which can be used to generate a matplotlib plot, to get
        values for table_area(s), columns and debugging.
    """
    def __init__(self, table_area=None, columns=None, split_text=False,
                 flag_size=False, row_close_tol=2, col_close_tol=0,
-                 margins=(1.0, 0.5, 0.1), debug=False):
+                 margins=(1.0, 0.5, 0.1), **kwargs):
        self.table_area = table_area
        self.columns = columns
        self._validate_columns()
@ -64,7 +60,6 @@ class Stream(BaseParser):
        self.row_close_tol = row_close_tol
        self.col_close_tol = col_close_tol
        self.char_margin, self.line_margin, self.word_margin = margins
        self.debug = debug
    @staticmethod
    def _text_bbox(t_bbox):
@ -333,11 +328,20 @@ class Stream(BaseParser):
        table.shape = table.df.shape
        whitespace = compute_whitespace(data)
        table.flavor = 'stream'
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
        # for plotting
        _text = []
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
        table._text = _text
        table._image = None
        table._segments = None
        return table
    def extract_tables(self, filename):
@ -347,7 +351,7 @@ class Stream(BaseParser):
        if not self.horizontal_text:
            logger.info("No tables found on {}".format(
                os.path.basename(self.rootname)))
-            return [], self.g
+            return []
        self._generate_table_bbox()
@ -359,11 +363,4 @@ class Stream(BaseParser):
            table = self._generate_table(table_idx, cols, rows)
            _tables.append(table)
-        if self.debug:
+        return _tables
            text = []
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
            self.g.text = text
            self.g.tables = _tables
        return _tables, self.g
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -2,165 +2,107 @@ import cv2
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 from .handlers import PDFHandler
 from .utils import validate_input, remove_extra
-
+def plot_text(text):
-def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs):
+    """Generates a plot for all text present on the PDF page.
    """Plot geometry found on pdf page based on type specified,
    useful for debugging and playing with different parameters to get
    the best output.
    Note: kwargs annotated with ^ can only be used with mesh=False
    and kwargs annotated with * can only be used with mesh=True.
    Parameters
    ----------
-    filepath : str
+    text : list
        Path to pdf file.
    pages : str
        Comma-separated page numbers to parse.
        Example: 1,3,4 or 1,4-end
    mesh : bool (default: False)
        Whether or not to use Lattice method of parsing. Stream
        is used by default.
    geometry_type : str, optional (default: None)
        * 'text' : Plot text objects found on page. (Useful to get \
                   table_area and columns coordinates)
        * 'table' : Plot parsed table.
        * 'contour'* : Plot detected rectangles.
        * 'joint'* : Plot detected line intersections.
        * 'line'* : Plot detected lines.
    table_area : list, optional (default: None)
        List of table areas to process as strings of the form
        x1,y1,x2,y2 where (x1, y1) -> left-top and
        (x2, y2) -> right-bottom in pdf coordinate space.
    columns^ : list, optional (default: None)
        List of column x-coordinates as strings where the coordinates
        are comma-separated.
    split_text : bool, optional (default: False)
        Whether or not to split a text line if it spans across
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
        if its size is different from rest of the string. (Useful for
        super and subscripts.)
    row_close_tol^ : int, optional (default: 2)
        Rows will be formed by combining text vertically
        within this tolerance.
    col_close_tol^ : int, optional (default: 0)
        Columns will be formed by combining text horizontally
        within this tolerance.
    process_background* : bool, optional (default: False)
        Whether or not to process lines that are in background.
    line_size_scaling* : int, optional (default: 15)
        Factor by which the page dimensions will be divided to get
        smallest length of lines that should be detected.
        The larger this value, smaller the detected lines. Making it
        too large will lead to text being detected as lines.
    copy_text* : list, optional (default: None)
        {'h', 'v'}
        Select one or more strings from above and pass them as a list
        to specify the direction in which text should be copied over
        when a cell spans multiple rows or columns.
    shift_text* : list, optional (default: ['l', 't'])
        {'l', 'r', 't', 'b'}
        Select one or more strings from above and pass them as a list
        to specify where the text in a spanning cell should flow.
    line_close_tol* : int, optional (default: 2)
        Tolerance parameter used to merge vertical and horizontal
        detected lines which lie close to each other.
    joint_close_tol* : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
    threshold_blocksize* : int, optional (default: 15)
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.
        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
    threshold_constant* : int, optional (default: -2)
        Constant subtracted from the mean or weighted mean.
        Normally, it is positive but may be zero or negative as well.
        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
    iterations* : int, optional (default: 0)
        Number of times for erosion/dilation is applied.
        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
    margins : tuple
        PDFMiner margins. (char_margin, line_margin, word_margin)
        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
    """
-    validate_input(kwargs, mesh=mesh, geometry_type=geometry_type)
+    fig = plt.figure()
-    p = PDFHandler(filepath, pages)
+    ax = fig.add_subplot(111, aspect='equal')
-    kwargs = remove_extra(kwargs, mesh=mesh)
+    xs, ys = [], []
-    debug = True if geometry_type is not None else False
+    for t in text:
-    kwargs.update({'debug': debug})
+        xs.extend([t[0], t[2]])
-    __, geometry = p.parse(mesh=mesh, **kwargs)
+        ys.extend([t[1], t[3]])
        ax.add_patch(
            patches.Rectangle(
                (t[0], t[1]),
                t[2] - t[0],
                t[3] - t[1]
            )
        )
    ax.set_xlim(min(xs) - 10, max(xs) + 10)
    ax.set_ylim(min(ys) - 10, max(ys) + 10)
    plt.show()
-    if geometry_type == 'text':
+
-        for text in geometry.text:
+def plot_table(table):
-            fig = plt.figure()
+    """Generates a plot for the table.
-            ax = fig.add_subplot(111, aspect='equal')
+
-            xs, ys = [], []
+    Parameters
-            for t in text:
+    ----------
-                xs.extend([t[0], t[1]])
+    table : camelot.core.Table
-                ys.extend([t[2], t[3]])
+
-                ax.add_patch(
+    """
-                    patches.Rectangle(
+    for row in table.cells:
-                        (t[0], t[1]),
+        for cell in row:
-                        t[2] - t[0],
+            if cell.left:
-                        t[3] - t[1]
+                plt.plot([cell.lb[0], cell.lt[0]],
-                    )
+                            [cell.lb[1], cell.lt[1]])
-                )
+            if cell.right:
-            ax.set_xlim(min(xs) - 10, max(xs) + 10)
+                plt.plot([cell.rb[0], cell.rt[0]],
-            ax.set_ylim(min(ys) - 10, max(ys) + 10)
+                            [cell.rb[1], cell.rt[1]])
-            plt.show()
+            if cell.top:
-    elif geometry_type == 'table':
+                plt.plot([cell.lt[0], cell.rt[0]],
-        for tables in geometry.tables:
+                            [cell.lt[1], cell.rt[1]])
-            for table in tables:
+            if cell.bottom:
-                for row in table.cells:
+                plt.plot([cell.lb[0], cell.rb[0]],
-                    for cell in row:
+                            [cell.lb[1], cell.rb[1]])
-                        if cell.left:
+    plt.show()
-                            plt.plot([cell.lb[0], cell.lt[0]],
+
-                                     [cell.lb[1], cell.lt[1]])
+
-                        if cell.right:
+def plot_contour(image):
-                            plt.plot([cell.rb[0], cell.rt[0]],
+    """Generates a plot for all table boundaries present on the
-                                     [cell.rb[1], cell.rt[1]])
+    PDF page.
-                        if cell.top:
+
-                            plt.plot([cell.lt[0], cell.rt[0]],
+    Parameters
-                                     [cell.lt[1], cell.rt[1]])
+    ----------
-                        if cell.bottom:
+    image : tuple
-                            plt.plot([cell.lb[0], cell.rb[0]],
+
-                                     [cell.lb[1], cell.rb[1]])
+    """
-            plt.show()
+    img, table_bbox = image
-    elif geometry_type == 'contour':
+    for t in table_bbox.keys():
-        for img, table_bbox in geometry.images:
+        cv2.rectangle(img, (t[0], t[1]),
-            for t in table_bbox.keys():
+                      (t[2], t[3]), (255, 0, 0), 20)
-                cv2.rectangle(img, (t[0], t[1]),
+    plt.imshow(img)
-                              (t[2], t[3]), (255, 0, 0), 20)
+    plt.show()
-            plt.imshow(img)
+
-            plt.show()
+
-    elif geometry_type == 'joint':
+def plot_joint(image):
-        for img, table_bbox in geometry.images:
+    """Generates a plot for all line intersections present on the
-            x_coord = []
+    PDF page.
-            y_coord = []
+
-            for k in table_bbox.keys():
+    Parameters
-                for coord in table_bbox[k]:
+    ----------
-                    x_coord.append(coord[0])
+    image : tuple
-                    y_coord.append(coord[1])
+
-            max_x, max_y = max(x_coord), max(y_coord)
+    """
-            plt.plot(x_coord, y_coord, 'ro')
+    img, table_bbox = image
-            plt.axis([0, max_x + 100, max_y + 100, 0])
+    x_coord = []
-            plt.imshow(img)
+    y_coord = []
-            plt.show()
+    for k in table_bbox.keys():
-    elif geometry_type == 'line':
+        for coord in table_bbox[k]:
-        for v_s, h_s in geometry.segments:
+            x_coord.append(coord[0])
-            for v in v_s:
+            y_coord.append(coord[1])
-                plt.plot([v[0], v[2]], [v[1], v[3]])
+    plt.plot(x_coord, y_coord, 'ro')
-            for h in h_s:
+    plt.imshow(img)
-                plt.plot([h[0], h[2]], [h[1], h[3]])
+    plt.show()
-            plt.show()
+
 def plot_line(segments):
    """Generates a plot for all line segments present on the PDF page.
    Parameters
    ----------
    segments : tuple
    """
    vertical, horizontal = segments
    for v in vertical:
        plt.plot([v[0], v[2]], [v[1], v[3]])
    for h in horizontal:
        plt.plot([h[0], h[2]], [h[1], h[3]])
    plt.show()
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -38,25 +38,25 @@ lattice_kwargs = [
 ]
-def validate_input(kwargs, mesh=False, geometry_type=False):
+def validate_input(kwargs, flavor='lattice', geometry_type=False):
-    def check_intersection(parser_kwargs, input_kwargs, message_bool):
+    def check_intersection(parser_kwargs, input_kwargs):
        isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
        if isec:
-            raise ValueError("{} can not be used with mesh set to {}".format(
+            raise ValueError("{} cannot be used with flavor='{}'".format(
-                             ",".join(sorted(isec)), message_bool))
+                             ",".join(sorted(isec)), flavor))
-    if mesh:
+    if flavor == 'lattice':
-        check_intersection(stream_kwargs, kwargs, True)
+        check_intersection(stream_kwargs, kwargs)
    else:
-        check_intersection(lattice_kwargs, kwargs, False)
+        check_intersection(lattice_kwargs, kwargs)
    if geometry_type:
-        if not mesh and geometry_type in ['contour', 'joint', 'line']:
+        if flavor != 'lattice' and geometry_type in ['contour', 'joint', 'line']:
-            raise ValueError("Use geometry_type={} with mesh set to True".format(
+            raise ValueError("Use geometry_type='{}' with flavor='lattice'".format(
                             geometry_type))
-def remove_extra(kwargs, mesh=False):
+def remove_extra(kwargs, flavor='lattice'):
-    if mesh:
+    if flavor == 'lattice':
        for key in kwargs.keys():
            if key in stream_kwargs:
                kwargs.pop(key)
--- a/docs/api.rst
+++ b/docs/api.rst
@ -8,7 +8,6 @@ API Reference
 Main Interface
 --------------
 .. autofunction:: camelot.read_pdf
 .. autofunction:: camelot.plot_geometry
 Lower-Level Classes
 -------------------
--- a/docs/index.rst
+++ b/docs/index.rst
@ -33,7 +33,7 @@ Release v\ |version|. (:ref:`Installation <install>`)
 ::
    >>> import camelot
-    >>> tables = camelot.read_pdf('foo.pdf', mesh=True)
+    >>> tables = camelot.read_pdf('foo.pdf')
    >>> tables
    <TableList tables=1>
    >>> tables.export('foo.csv', f='csv', compress=True) # json, excel, html
--- a/docs/user/advanced.rst
+++ b/docs/user/advanced.rst
@ -3,7 +3,7 @@
 Advanced Usage
 ==============
-This page covers some of the more advanced configurations for :ref:`Stream <stream>` and :ref:`Lattice <lattice>`.
+This page covers some of the more advanced configurations for :ref:`Lattice <lattice>` and :ref:`Stream <stream>`.
 Process background lines
 ------------------------
@ -21,7 +21,7 @@ To process background lines, you can pass ``process_background=True``.
 ::
-    >>> tables = camelot.read_pdf('background_lines.pdf', mesh=True, process_background=True)
+    >>> tables = camelot.read_pdf('background_lines.pdf', process_background=True)
    >>> tables[1].df
 .. csv-table::
@ -30,9 +30,9 @@ To process background lines, you can pass ``process_background=True``.
 Plot geometry
 -------------
-You can use the :meth:`plot_geometry() <camelot.plot_geometry>` method to plot various geometries that were detected by Camelot while processing the PDF page. This can help you select table areas, column separators and debug bad table outputs, by tweaking different configuration parameters.
+You can use a :class:`table <camelot.core.Table>` object's :meth:`plot() <camelot.core.TableList.plot>` method to plot various geometries that were detected by Camelot while processing the PDF page. This can help you select table areas, column separators and debug bad table outputs, by tweaking different configuration parameters.
-The following geometries are available for plotting. You can pass them to the :meth:`plot_geometry() <camelot.plot_geometry>` method with the ``geometry_type`` keyword argument, which will then generate a `matplotlib <https://matplotlib.org/>`_ plot.
+The following geometries are available for plotting. You can pass them to the :meth:`plot() <camelot.core.TableList.plot>` method, which will then generate a `matplotlib <https://matplotlib.org/>`_ plot for the passed geometry.
 - 'text'
 - 'table'
@ -40,22 +40,26 @@ The following geometries are available for plotting. You can pass them to the :m
 - 'line'
 - 'joint'
-.. note:: The last three geometries can only be used with :ref:`Lattice <lattice>`, i.e. when ``mesh=True``.
+.. note:: The last three geometries can only be used with :ref:`Lattice <lattice>`, i.e. when ``flavor='lattice'``.
-Let's generate a plot for each geometry using this `PDF <../_static/pdf/foo.pdf>`__ as an example.
+Let's generate a plot for each geometry using this `PDF <../_static/pdf/foo.pdf>`__ as an example. First, let's get all the tables out.
-.. warning:: By default, :meth:`plot_geometry() <camelot.plot_geometry>` will use the first page of the PDF. Since this method is useful only for debugging, it makes sense to use it for one page at a time. If you pass a page range to this method, multiple plots will be generated one by one, a new one popping up as you close the previous one. To abort, you can use ``Ctrl + C``.
+::
    >>> tables = camelot.read_pdf('foo.pdf')
    >>> tables
    <TableList n=1>
 .. _geometry_text:
 text
 ^^^^
-Passing ``geometry_type=text`` creates a plot for all the text present on a PDF page.
+Let's plot all the text present on the table's PDF page.
 ::
-    >>> camelot.plot_geometry('foo.pdf', geometry_type='text')
+    >>> tables[0].plot('text')
 .. figure:: ../_static/png/geometry_text.png
    :height: 674
@ -64,20 +68,20 @@ Passing ``geometry_type=text`` creates a plot for all the text present on a PDF
    :alt: A plot of all text on a PDF page
    :align: left
-This, as we shall later see, is very helpful with :ref:`Stream <stream>`, for noting table areas and column separators, in case Stream cannot guess them correctly.
+This, as we shall later see, is very helpful with :ref:`Stream <stream>`, for noting table areas and column separators, in case Stream does not guess them correctly.
-.. note:: As you can see in the image above, the *x-y* coordinates change as you move your mouse cursor, which can help you note coordinates.
+.. note:: The *x-y* coordinates shown aboe change as you move your mouse cursor on the image, which can help you note coordinates.
 .. _geometry_table:
 table
 ^^^^^
-Passing ``geometry_type=table`` creates a plot for tables detected on a PDF page. This geometry, along with contour, line and joint is useful for debugging and improving the parsing output, as we shall see later.
+Let's plot the table (to see if it was detected correctly or not). This geometry type, along with contour, line and joint is useful for debugging and improving the parsing output, in case the table wasn't detected correctly. More on that later.
 ::
-    >>> camelot.plot_geometry('foo.pdf', mesh=True, geometry_type='table')
+    >>> tables[0].plot('table')
 .. figure:: ../_static/png/geometry_table.png
    :height: 674
@ -86,16 +90,18 @@ Passing ``geometry_type=table`` creates a plot for tables detected on a PDF page
    :alt: A plot of all tables on a PDF page
    :align: left
 The table is perfect!
 .. _geometry_contour:
 contour
 ^^^^^^^
-Passing ``geometry_type=contour`` creates a plot for table boundaries detected on a PDF page.
+Now, let's plot all table boundaries present on the table's PDF page.
 ::
-    >>> camelot.plot_geometry('foo.pdf', mesh=True, geometry_type='contour')
+    >>> tables[0].plot('contour')
 .. figure:: ../_static/png/geometry_contour.png
    :height: 674
@ -109,11 +115,11 @@ Passing ``geometry_type=contour`` creates a plot for table boundaries detected o
 line
 ^^^^
-Passing ``geometry_type=line`` creates a plot for lines detected on a PDF page.
+Cool, let's plot all line segments present on the table's PDF page.
 ::
-    >>> camelot.plot_geometry('foo.pdf', geometry_type='line')
+    >>> tables[0].plot('line')
 .. figure:: ../_static/png/geometry_line.png
    :height: 674
@ -127,11 +133,11 @@ Passing ``geometry_type=line`` creates a plot for lines detected on a PDF page.
 joint
 ^^^^^
-Passing ``geometry_type=joint`` creates a plot for line intersections detected on a PDF page.
+Finally, let's plot all line intersections present on the table's PDF page.
 ::
-    >>> camelot.plot_geometry('foo.pdf', mesh=True, geometry_type='joint')
+    >>> tables[0].plot('joint')
 .. figure:: ../_static/png/geometry_joint.png
    :height: 674
@ -143,7 +149,7 @@ Passing ``geometry_type=joint`` creates a plot for line intersections detected o
 Specify table areas
 -------------------
-Since :ref:`Stream <stream>` treats the whole page as a table, `for now`_, it's useful to specify table boundaries in cases such as this `PDF <../_static/pdf/table_areas.pdf>`__. You can :ref:`plot the text <geometry_text>` on this page and note the left-top and right-bottom coordinates of the table.
+Since :ref:`Stream <stream>` treats the whole page as a table, `for now`_, it's useful to specify table boundaries in cases such as `these <../_static/pdf/table_areas.pdf>`__. You can :ref:`plot the text <geometry_text>` on this page and note the left-top and right-bottom coordinates of the table.
 Table areas that you want Camelot to analyze can be passed as a list of comma-separated strings to :meth:`read_pdf() <camelot.read_pdf>`, using the ``table_areas`` keyword argument.
@ -151,7 +157,7 @@ Table areas that you want Camelot to analyze can be passed as a list of comma-se
 ::
-    >>> tables = camelot.read_pdf('table_areas.pdf', table_areas=['316,499,566,337'])
+    >>> tables = camelot.read_pdf('table_areas.pdf', flavor='stream', table_areas=['316,499,566,337'])
    >>> tables[0].df
 .. csv-table::
@ -160,19 +166,19 @@ Table areas that you want Camelot to analyze can be passed as a list of comma-se
 Specify column separators
 -------------------------
-In cases like this `PDF <../_static/pdf/column_separators.pdf>`__, where the text is very close to each other, it is possible that Camelot may guess the column separators' coordinates incorrectly. To correct this, you can explicitly specify the *x* coordinate for each column separator by :ref:`plotting the text <geometry_text>` on the page.
+In cases like `these <../_static/pdf/column_separators.pdf>`__, where the text is very close to each other, it is possible that Camelot may guess the column separators' coordinates incorrectly. To correct this, you can explicitly specify the *x* coordinate for each column separator by :ref:`plotting the text <geometry_text>` on the page.
 You can pass the column separators as a list of comma-separated strings to :meth:`read_pdf() <camelot.read_pdf>`, using the ``columns`` keyword argument.
 In case you passed a single column separators string list, and no table area is specified, the separators will be applied to the whole page. When a list of table areas is specified and there is a need to specify column separators as well, **the length of both lists should be equal**. Each table area will be mapped to each column separators' string using their indices.
-If you have specified two table areas, ``table_areas=['12,23,43,54', '20,33,55,67']``, and only want to specify column separators for the first table (since you can see by looking at the table that Camelot will be able to get it perfectly!), you can pass an empty string for the second table in the column separators' list, like this, ``columns=['10,120,200,400', '']``.
+For example, if you have specified two table areas, ``table_areas=['12,23,43,54', '20,33,55,67']``, and only want to specify column separators for the first table, you can pass an empty string for the second table in the column separators' list, like this, ``columns=['10,120,200,400', '']``.
 Let's get back to the *x* coordinates we got from :ref:`plotting text <geometry_text>` that exists on this `PDF <../_static/pdf/column_separators.pdf>`__, and get the table out!
 ::
-    >>> tables = camelot.read_pdf('column_separators.pdf', columns=['72,95,209,327,442,529,566,606,683'])
+    >>> tables = camelot.read_pdf('column_separators.pdf', flavor='stream', columns=['72,95,209,327,442,529,566,606,683'])
    >>> tables[0].df
 .. csv-table::
@ -182,7 +188,7 @@ Let's get back to the *x* coordinates we got from :ref:`plotting text <geometry_
    "NUMBER TYPE DBA NAME","","","LICENSEE NAME","ADDRESS","CITY","ST","ZIP","PHONE NUMBER","EXPIRES"
    "...","...","...","...","...","...","...","...","...","..."
-Ah! Since `PDFMiner <https://euske.github.io/pdfminer/>`_ merged the strings, "NUMBER", "TYPE" and "DBA NAME", all of them were assigned to the same cell. Let's see how we can fix this in the next section.
+Ah! Since `PDFMiner <https://euske.github.io/pdfminer/>`_ merged the strings, "NUMBER", "TYPE" and "DBA NAME"; all of them were assigned to the same cell. Let's see how we can fix this in the next section.
 Split text along separators
 ---------------------------
@ -191,7 +197,7 @@ To deal with cases like the output from the previous section, you can pass ``spl
 ::
-    >>> tables = camelot.read_pdf('column_separators.pdf', columns=['72,95,209,327,442,529,566,606,683'], split_text=True)
+    >>> tables = camelot.read_pdf('column_separators.pdf', flavor='stream', columns=['72,95,209,327,442,529,566,606,683'], split_text=True)
    >>> tables[0].df
 .. csv-table::
@ -204,13 +210,13 @@ To deal with cases like the output from the previous section, you can pass ``spl
 Flag superscripts and subscripts
 --------------------------------
-There might be cases where you want to differentiate between the text and superscripts and subscripts, like this `PDF <../_static/pdf/superscript.pdf>`_.
+There might be cases where you want to differentiate between the text, and superscripts or subscripts, like this `PDF <../_static/pdf/superscript.pdf>`_.
 .. figure:: ../_static/png/superscript.png
    :alt: A PDF with superscripts
    :align: left
-In this case, the text that `other tools`_ return, will be ``24.912``. This is harmless as long as there is that decimal point involved. When it isn't there, you'll be left wondering why the results of your data analysis were 10x bigger!
+In this case, the text that `other tools`_ return, will be ``24.912``. This is harmless as long as there is that decimal point involved. But when it isn't there, you'll be left wondering why the results of your data analysis were 10x bigger!
 You can solve this by passing ``flag_size=True``, which will enclose the superscripts and subscripts with ``<s></s>``, based on font size, as shown below.
@ -218,7 +224,7 @@ You can solve this by passing ``flag_size=True``, which will enclose the supersc
 ::
-    >>> tables = camelot.read_pdf('superscript.pdf', flag_size=True)
+    >>> tables = camelot.read_pdf('superscript.pdf', flavor='stream', flag_size=True)
    >>> tables[0].df
 .. csv-table::
@ -236,7 +242,7 @@ You can pass ``row_close_tol=<+int>`` to group the rows closer together, as show
 ::
-    >>> tables = camelot.read_pdf('group_rows.pdf')
+    >>> tables = camelot.read_pdf('group_rows.pdf', flavor='stream')
    >>> tables[0].df
 .. csv-table::
@ -250,7 +256,7 @@ You can pass ``row_close_tol=<+int>`` to group the rows closer together, as show
 ::
-    >>> tables = camelot.read_pdf('group_rows.pdf', row_close_tol=10)
+    >>> tables = camelot.read_pdf('group_rows.pdf', flavor='stream', row_close_tol=10)
    >>> tables[0].df
 .. csv-table::
@ -266,11 +272,11 @@ Detect short lines
 There might be cases while using :ref:`Lattice <lattice>` when smaller lines don't get detected. The size of the smallest line that gets detected is calculated by dividing the PDF page's dimensions with a scaling factor called ``line_size_scaling``. By default, its value is 15.
-As you can already guess, the larger the ``line_size_scaling``, the smaller the size of lines getting detected.
+As you can guess, the larger the ``line_size_scaling``, the smaller the size of lines getting detected.
 .. warning:: Making ``line_size_scaling`` very large (>150) will lead to text getting detected as lines.
-Here's one `PDF <../_static/pdf/short_lines.pdf>`__ where small lines separating the the headers don't get detected with the default value of 15.
+Here's a `PDF <../_static/pdf/short_lines.pdf>`__ where small lines separating the the headers don't get detected with the default value of 15.
 .. figure:: ../_static/png/short_lines.png
    :alt: A PDF table with short lines
@ -280,7 +286,8 @@ Let's :ref:`plot the table <geometry_table>` for this PDF.
 ::
-    >>> camelot.plot_geometry('short_lines.pdf', mesh=True, geometry_type='table')
+    >>> tables = camelot.read_pdf('short_lines.pdf')
    >>> tables[0].plot('table')
 .. figure:: ../_static/png/short_lines_1.png
    :alt: A plot of the PDF table with short lines
@ -290,17 +297,17 @@ Clearly, the smaller lines separating the headers, couldn't be detected. Let's t
 ::
-    >>> camelot.plot_geometry('short_lines.pdf', mesh=True, geometry_type='table', line_size_scaling=40)
+    >>> tables = camelot.read_pdf('short_lines.pdf', line_size_scaling=40)
    >>> tables[0].plot('table')
 .. figure:: ../_static/png/short_lines_2.png
    :alt: An improved plot of the PDF table with short lines
    :align: left
-Voila! Camelot can now see those lines. Let's use this value in :meth:`read_pdf() <camelot.read_pdf>` and get our table.
+Voila! Camelot can now see those lines. Let's get our table.
 ::
    >>> tables = camelot.read_pdf('short_lines.pdf', mesh=True, line_size_scaling=40)
    >>> tables[0].df
 .. csv-table::
@ -332,7 +339,7 @@ We'll use the `PDF <../_static/pdf/short_lines.pdf>`__ from the previous example
 ::
-    >>> tables = camelot.read_pdf('short_lines.pdf', mesh=True, line_size_scaling=40, shift_text=[''])
+    >>> tables = camelot.read_pdf('short_lines.pdf', line_size_scaling=40, shift_text=[''])
    >>> tables[0].df
 .. csv-table::
@ -353,7 +360,7 @@ No surprises there, it did remain in place (observe the strings "2400" and "All
 ::
-    >>> tables = camelot.read_pdf('short_lines.pdf', mesh=True, line_size_scaling=40, shift_text=['r', 'b'])
+    >>> tables = camelot.read_pdf('short_lines.pdf', line_size_scaling=40, shift_text=['r', 'b'])
    >>> tables[0].df
 .. csv-table::
@ -381,7 +388,7 @@ Let's try it out on this `PDF <../_static/pdf/copy_text.pdf>`__. First, let's ch
 ::
-    >>> tables = camelot.read_pdf('copy_text.pdf', mesh=True)
+    >>> tables = camelot.read_pdf('copy_text.pdf')
    >>> tables[0].df
 .. csv-table::
@ -398,7 +405,7 @@ We don't need anything else. Now, let's pass ``copy_text=['v']`` to copy text in
 ::
-    >>> tables = camelot.read_pdf('copy_text.pdf', mesh=True, copy_text=['v'])
+    >>> tables = camelot.read_pdf('copy_text.pdf', copy_text=['v'])
    >>> tables[0].df
 .. csv-table::
--- a/docs/user/cli.rst
+++ b/docs/user/cli.rst
@ -5,25 +5,21 @@ Command-line interface
 Camelot comes with a command-line interface.
-You can print the help for the interface, by typing ``camelot --help`` in your favorite terminal program, as shown below.
+You can print the help for the interface, by typing ``camelot --help`` in your favorite terminal program, as shown below. Furthermore, you can print the help for each command, by typing ``camelot <command> --help``, try it out!
 ::
  $ camelot --help
-  Usage: camelot [OPTIONS] FILEPATH
+  Usage: camelot [OPTIONS] COMMAND [ARGS]...
  Options:
    --version                       Show the version and exit.
    -p, --pages TEXT                Comma-separated page numbers to parse.
                                    Example: 1,3,4 or 1,4-end
    -o, --output TEXT               Output filepath.
    -f, --format [csv|json|excel|html]
                                    Output file format.
    -z, --zip                       Whether or not to create a ZIP archive.
    -m, --mesh                      Whether or not to use Lattice method of
                                    parsing. Stream is used by default.
    -T, --table_area TEXT           Table areas (x1,y1,x2,y2) to process.
                                    x1, y1
                                    -> left-top and x2, y2 -> right-bottom
    -split, --split_text            Whether or not to split text if it spans
                                    across multiple cells.
    -flag, --flag_size              (inactive) Whether or not to flag text which
@ -32,47 +28,8 @@ You can print the help for the interface, by typing ``camelot --help`` in your f
    -M, --margins <FLOAT FLOAT FLOAT>...
                                    char_margin, line_margin, word_margin for
                                    PDFMiner.
-    -C, --columns TEXT              x-coordinates of column separators.
+    --help                          Show this message and exit.
-    -r, --row_close_tol INTEGER     Rows will be formed by combining text
+
-                                    vertically within this tolerance.
+  Commands:
-    -c, --col_close_tol INTEGER     Columns will be formed by combining text
+    lattice  Use lines between text to parse table.
-                                    horizontally within this tolerance.
+    stream   Use spaces between text to parse table.
    -back, --process_background     (with --mesh) Whether or not to process
                                    lines that are in background.
    -scale, --line_size_scaling INTEGER
                                    (with --mesh) Factor by which the page
                                    dimensions will be divided to get smallest
                                    length of detected lines.
    -copy, --copy_text [h|v]        (with --mesh) Specify direction in which
                                    text will be copied over in a spanning cell.
    -shift, --shift_text [|l|r|t|b]  (with --mesh) Specify direction in which
                                    text in a spanning cell should flow.
    -l, --line_close_tol INTEGER    (with --mesh) Tolerance parameter used to
                                    merge close vertical lines and close
                                    horizontal lines.
    -j, --joint_close_tol INTEGER   (with --mesh) Tolerance parameter used to
                                    decide whether the detected lines and points
                                    lie close to each other.
    -block, --threshold_blocksize INTEGER
                                    (with --mesh) For adaptive thresholding,
                                    size of a pixel neighborhood that is used to
                                    calculate a threshold value for the pixel:
                                    3, 5, 7, and so on.
    -const, --threshold_constant INTEGER
                                    (with --mesh) For adaptive thresholding,
                                    constant subtracted from the mean or
                                    weighted mean.
                                    Normally, it is positive but
                                    may be zero or negative as well.
    -I, --iterations INTEGER        (with --mesh) Number of times for
                                    erosion/dilation is applied.
    -G, --geometry_type [text|table|contour|joint|line]
                                    Plot geometry found on pdf page for
                                    debugging.
                                    text: Plot text objects. (Useful to get
                                          table_area and columns coordinates)
                                    table: Plot parsed table.
                                    contour (with --mesh): Plot detected rectangles.
                                    joint (with --mesh): Plot detected line intersections.
                                    line (with --mesh): Plot detected lines.
    --help                          Show this message and exit.
--- a/docs/user/how-it-works.rst
+++ b/docs/user/how-it-works.rst
@ -20,7 +20,7 @@ It is built on top of PDFMiner's functionality of grouping characters on a page
 .. _margins: https://euske.github.io/pdfminer/#tools
-.. note:: By default, Stream treats the whole PDF page as a table. Automatic table detection for Stream is `in the works`_.
+.. note:: By default, Stream treats the whole PDF page as a table, which isn't ideal when there are more than two tables on a page with different number of columns. Automatic table detection for Stream is `in the works`_.
 .. _in the works: https://github.com/socialcopsdev/camelot/issues/102
@ -29,13 +29,13 @@ It is built on top of PDFMiner's functionality of grouping characters on a page
 Lattice
 -------
-Lattice is more deterministic in nature, and does not rely on guesses. It can be used to parse tables that have demarcated lines between cells.
+Lattice is more deterministic in nature, and does not rely on guesses. It can be used to parse tables that have demarcated lines between cells, and can automatically parse multiple tables present on a page.
 It starts by converting the PDF page to an image using ghostscript and then processing it to get horizontal and vertical line segments by applying a set of morphological transformations (erosion and dilation) using OpenCV.
-Let's see how Lattice processes the `second page of this PDF`_, step-by-step.
+Let's see how Lattice processes the second page of `this PDF`_, step-by-step.
-.. _second page of this PDF: ../_static/pdf/us-030.pdf
+.. _this PDF: ../_static/pdf/us-030.pdf
 1. Line segments are detected.
--- a/docs/user/install.rst
+++ b/docs/user/install.rst
@ -8,16 +8,20 @@ This part of the documentation covers the installation of Camelot. First, you'll
 .. _tk: https://packages.ubuntu.com/trusty/python-tk
 .. _ghostscript: https://www.ghostscript.com/
-These can be installed using your system's package manager. If you use Ubuntu, run the following:
+These can be installed using your system's package manager. You can run the following based on your OS.
 ::
-    $ sudo apt install python-tk ghostscript
+For Ubuntu::
    $ apt install python-tk ghostscript
 For macOS::
    $ brew install tcl-tk ghostscript
 $ pip install camelot-py
 ------------------------
-After installing the dependencies, you can simply use pip to install Camelot:
+After installing the dependencies, you can simply use pip to install Camelot::
 ::
    $ pip install camelot-py
--- a/docs/user/intro.rst
+++ b/docs/user/intro.rst
@ -6,9 +6,9 @@ Introduction
 The Camelot Project
 -------------------
-The Portable Document Format (PDF) was born out of `The Camelot Project`_ when a need was felt for "a universal to communicate documents across a wide variety of machine configurations, operating systems and communication networks". The goal was to make these documents viewable on any display and printable on any modern printers. The invention of the `PostScript`_ page description language, which enabled the creation of fixed-layout flat documents (with text, fonts, graphics, images encapsulated), solved the problem.
+The Portable Document Format (PDF) was born out of `The Camelot Project`_ when a need was felt for "a universal to communicate documents across a wide variety of machine configurations, operating systems and communication networks". The goal was to make these documents viewable on any display and printable on any modern printers. The invention of the `PostScript`_ page description language, which enabled the creation of *fixed-layout* flat documents (with text, fonts, graphics, images encapsulated), solved the problem.
-At a very high level, PostScript defines instructions, such as, "place this character at this x,y coordinate on a plane". Spaces can be *simulated* by placing characters relatively far apart. Similarly, tables can be *simulated* by placing characters (and words) in two-dimensional grids. A PDF viewer just takes these instructions and draws everything for the user to view. Since it's just characters on a plane, there is no table data structure which can be directly extracted and used for analysis!
+At a very high level, PostScript defines instructions, such as, "place this character at this x,y coordinate on a plane". Spaces can be *simulated* by placing characters relatively far apart. Extending from that, tables can be *simulated* by placing characters (which constitute words) in two-dimensional grids. A PDF viewer just takes these instructions and draws everything for the user to view. Since it's just characters on a plane, there is no table data structure which can be extracted and used for analysis!
 Sadly, a lot of open data is given out as tables which are trapped inside PDF files.
@ -17,13 +17,14 @@ Sadly, a lot of open data is given out as tables which are trapped inside PDF fi
 Why another PDF Table Parsing library?
 --------------------------------------
-There are both open (`Tabula`_) and closed-source (`PDFTables`_, `smallpdf`_) tools that are used widely to extract tables from PDF files. They either give a nice output, or fail miserably. There is no in-between. This does not help most users, since everything in the real world, including PDF table extraction, is fuzzy. Which leads to creation of adhoc table extraction scripts for each different type of PDF that the user wants to parse.
+There are both open (`Tabula`_, `pdf-table-extract`_) and closed-source (`smallpdf`_, `PDFTables`_) tools that are widely used, to extract tables from PDF files. They either give a nice output, or fail miserably. There is no in-between. This is not helpful, since everything in the real world, including PDF table extraction, is fuzzy, leading to creation of adhoc table extraction scripts for each different type of PDF that the user wants to parse.
-Camelot was created with the goal of offering its users complete control over table extraction. If the users are not able to get the desired output with the default configuration, they should be able to tweak the parameters and get the tables out!
+Camelot was created with the goal of offering its users complete control over table extraction. If the users are not able to get the desired output with the default configuration, they should be able to tweak it and get the job done!
-Here is a `comparison`_ of Camelot's output with outputs from other PDF parsing libraries and tools.
+Here is a `comparison`_ of Camelot's output with outputs from other open-source PDF parsing libraries and tools.
 .. _Tabula: http://tabula.technology/
 .. _pdf-table-extract: https://github.com/ashima/pdf-table-extract
 .. _PDFTables: https://pdftables.com/
 .. _Smallpdf: https://smallpdf.com
 .. _comparison: https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Parsing-libraries-and-tools
@ -31,7 +32,7 @@ Here is a `comparison`_ of Camelot's output with outputs from other PDF parsing
 What's in a name?
 -----------------
-As you can already guess, this library is named after `The Camelot Project`_. The image on the left is taken from `Monty Python and the Holy Grail`_. In the movie, it is the castle "Camelot" where Arthur leads his men, the Knights of the Round Table, and then sets off elsewhere after deciding that it is "a silly place". Interestingly, the language in which this library is written was named after Monty Python.
+As you can already guess, this library is named after `The Camelot Project`_. Fun fact, "Camelot" is the name of the castle in `Monty Python and the Holy Grail`_, where Arthur leads his men, the Knights of the Round Table, and then sets off elsewhere after deciding that it is "a silly place". Interestingly, the language in which this library is written (Python) was named after Monty Python.
 .. _The Camelot Project: http://www.planetpdf.com/planetpdf/pdfs/warnock_camelot.pdf
 .. _Monty Python and the Holy Grail: https://en.wikipedia.org/wiki/Monty_Python_and_the_Holy_Grail
--- a/docs/user/quickstart.rst
+++ b/docs/user/quickstart.rst
@ -16,13 +16,13 @@ Begin by importing the Camelot module::
 Now, let's try to read a PDF. You can check out the PDF used in this example, `here`_. Since the PDF has a table with clearly demarcated lines, we will use the :ref:`Lattice <lattice>` method here. To do that we will set the ``mesh`` keyword argument to ``True``.
-.. note:: :ref:`Stream <stream>` is used by default.
+.. note:: :ref:`Lattice <lattice>` is used by default. You can use :ref:`Stream <stream>` with ``flavor='stream'``.
 .. _here: ../_static/pdf/foo.pdf
 ::
-    >>> tables = camelot.read_pdf('foo.pdf', mesh=True)
+    >>> tables = camelot.read_pdf('foo.pdf')
    >>> tables
    <TableList n=1>
@ -47,7 +47,7 @@ Let's print the parsing report.
        'page': 1
    }
-Woah! The accuracy is top-notch and whitespace is less, that means the table was parsed correctly (most probably). You can access the table as a pandas DataFrame by using the :class:`table <camelot.core.Table> object's` ``df`` property.
+Woah! The accuracy is top-notch and whitespace is less, that means the table was parsed correctly (most probably). You can access the table as a pandas DataFrame by using the :class:`table <camelot.core.Table>` object's ``df`` property.
 ::
@ -64,7 +64,7 @@ Looks good! You can be export the table as a CSV file using its :meth:`to_csv()
 This will export the table as a CSV file at the path specified. In this case, it is ``foo.csv`` in the current directory.
-You can also export all tables at once, using the ``tables`` object's :meth:`export() <camelot.core.TableList.export>` method.
+You can also export all tables at once, using the :class:`tables <camelot.core.TableList>` object's :meth:`export() <camelot.core.TableList.export>` method.
 ::
@ -72,11 +72,11 @@ You can also export all tables at once, using the ``tables`` object's :meth:`exp
 This will export all tables as CSV files at the path specified. Alternatively, you can use ``f='json'``, ``f='excel'`` or ``f='html'``.
-.. note:: The :meth:`export() <camelot.core.TableList.export>` method exports files with a ``page-*-table-*`` suffix. In the example above, the single table in the list will be exported to ``foo-page-1-table-1.csv``. If the list contains multiple tables, multiple files will be created. To avoid filling up your path with multiple files, you can use ``compress=True``, which will create a single ZIP archive at your path with all the exported files.
+.. note:: The :meth:`export() <camelot.core.TableList.export>` method exports files with a ``page-*-table-*`` suffix. In the example above, the single table in the list will be exported to ``foo-page-1-table-1.csv``. If the list contains multiple tables, multiple CSV files will be created. To avoid filling up your path with multiple files, you can use ``compress=True``, which will create a single ZIP file at your path with all the CSV files.
-.. note:: Camelot handles rotated PDF pages automatically. As an exercise, try to extract the table out of `this PDF file`_.
+.. note:: Camelot handles rotated PDF pages automatically. As an exercise, try to extract the table out of `this PDF`_.
-.. _this PDF file: ../_static/pdf/rotated.pdf
+.. _this PDF: ../_static/pdf/rotated.pdf
 Specify page numbers
 --------------------
--- a/setup.py
+++ b/setup.py
@ -9,7 +9,7 @@ with open(os.path.join(here, 'camelot', '__version__.py'), 'r') as f:
    exec(f.read(), about)
 # TODO: Move these to __version__.py
-NAME = 'camelot'
+NAME = 'camelot-py'
 VERSION = about['__version__']
 DESCRIPTION = 'PDF Table Parsing for Humans'
 with open('README.md') as f:
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -18,11 +18,11 @@ def test_stream_table_rotated():
    df = pd.DataFrame(data_stream_table_rotated)
    filename = os.path.join(testdir, "clockwise_table_2.pdf")
-    tables = camelot.read_pdf(filename)
+    tables = camelot.read_pdf(filename, flavor="stream")
    assert df.equals(tables[0].df)
    filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
-    tables = camelot.read_pdf(filename)
+    tables = camelot.read_pdf(filename, flavor="stream")
    assert df.equals(tables[0].df)
@ -30,7 +30,7 @@ def test_stream_table_area():
    df = pd.DataFrame(data_stream_table_area_single)
    filename = os.path.join(testdir, "tabula/us-007.pdf")
-    tables = camelot.read_pdf(filename, table_area=["320,500,573,335"])
+    tables = camelot.read_pdf(filename, flavor="stream", table_area=["320,500,573,335"])
    assert df.equals(tables[0].df)
@ -39,7 +39,7 @@ def test_stream_columns():
    filename = os.path.join(testdir, "mexican_towns.pdf")
    tables = camelot.read_pdf(
-        filename, columns=["67,180,230,425,475"], row_close_tol=10)
+        filename, flavor="stream", columns=["67,180,230,425,475"], row_close_tol=10)
    assert df.equals(tables[0].df)
@ -48,7 +48,7 @@ def test_lattice():
    filename = os.path.join(testdir,
        "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
-    tables = camelot.read_pdf(filename, pages="2", mesh=True)
+    tables = camelot.read_pdf(filename, pages="2")
    assert df.equals(tables[0].df)
@ -56,11 +56,11 @@ def test_lattice_table_rotated():
    df = pd.DataFrame(data_lattice_table_rotated)
    filename = os.path.join(testdir, "clockwise_table_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True)
+    tables = camelot.read_pdf(filename)
    assert df.equals(tables[0].df)
    filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True)
+    tables = camelot.read_pdf(filename)
    assert df.equals(tables[0].df)
@ -68,7 +68,7 @@ def test_lattice_process_background():
    df = pd.DataFrame(data_lattice_process_background)
    filename = os.path.join(testdir, "background_lines_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True, process_background=True)
+    tables = camelot.read_pdf(filename, process_background=True)
    assert df.equals(tables[1].df)
@ -76,5 +76,5 @@ def test_lattice_copy_text():
    df = pd.DataFrame(data_lattice_copy_text)
    filename = os.path.join(testdir, "row_span_1.pdf")
-    tables = camelot.read_pdf(filename, mesh=True, line_size_scaling=60, copy_text="v")
+    tables = camelot.read_pdf(filename, line_size_scaling=60, copy_text="v")
    assert df.equals(tables[0].df)