diff --git a/.gitignore b/.gitignore
index 14fc340..4fd453c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,7 @@ build/
 dist/
 *.egg-info/
 .coverage
+
+.pytest_cache/
+_build/
+_static/
diff --git a/Makefile b/Makefile
deleted file mode 100644
index f4183af..0000000
--- a/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-PYTHON ?= python
-NOSETESTS ?= nosetests
-
-help:
-	@echo "Please use \`make <target>' where <target> is one of"
-	@echo "  clean"
-	@echo "  dev            to install in develop mode"
-	@echo "  undev          to uninstall develop mode"
-	@echo "  install        to install for all users"
-	@echo "  test           to run tests"
-	@echo "  test-coverage  to run tests with coverage report"
-
-clean:
-	$(PYTHON) setup.py clean
-	rm -rf dist
-
-dev:
-	$(PYTHON) setup.py develop
-
-undev:
-	$(PYTHON) setup.py develop --uninstall
-
-install:
-	$(PYTHON) setup.py install
-
-test:
-	$(NOSETESTS) -s -v
-
-test-coverage:
-	rm -rf coverage .coverage
-	$(NOSETESTS) -s -v --with-coverage
diff --git a/README.md b/README.md
index 5cb4fc0..372cc09 100644
--- a/README.md
+++ b/README.md
@@ -1,67 +1,31 @@
-# camelot
+# Camelot: PDF Table Parsing for Humans
 
-Camelot is a Python 2.7 library and command-line tool for getting tables out of PDF files.
+Camelot is a Python 2.7 library and command-line tool for extracting tabular data from PDF files.
 
 ## Usage
 
 <pre>
-from camelot.pdf import Pdf
-from camelot.lattice import Lattice
-
-manager = Pdf(Lattice(), "/path/to/pdf")
-tables = manager.extract()
-</pre>
-
-Camelot comes with a CLI where you can specify page numbers, output format, output directory etc. By default, the output files are placed in the same directory as the PDF.
-
-<pre>
-Camelot: PDF parsing made simpler!
-
-usage:
- camelot [options] &lt;method&gt; [&lt;args&gt;...]
-
-options:
- -h, --help                Show this screen.
- -v, --version             Show version.
- -V, --verbose             Verbose.
- -p, --pages &lt;pageno&gt;      Comma-separated list of page numbers.
-                           Example: -p 1,3-6,10  [default: 1]
- -P, --parallel            Parallelize the parsing process.
- -f, --format &lt;format&gt;     Output format. (csv,tsv,html,json,xlsx) [default: csv]
- -l, --log                 Log to file.
- -o, --output &lt;directory&gt;  Output directory.
- -M, --cmargin &lt;cmargin&gt;   Char margin. Chars closer than cmargin are
-                           grouped together to form a word. [default: 2.0]
- -L, --lmargin &lt;lmargin&gt;   Line margin. Lines closer than lmargin are
-                           grouped together to form a textbox. [default: 0.5]
- -W, --wmargin &lt;wmargin&gt;   Word margin. Insert blank spaces between chars
-                           if distance between words is greater than word
-                           margin. [default: 0.1]
- -J, --split_text          Split text lines if they span across multiple cells.
- -K, --flag_size           Flag substring if its size differs from the whole string.
-                           Useful for super and subscripts.
- -X, --print-stats         List stats on the parsing process.
- -Y, --save-stats          Save stats to a file.
- -Z, --plot &lt;dist&gt;         Plot distributions. (page,all,rc)
-
-camelot methods:
- lattice  Looks for lines between data.
- stream   Looks for spaces between data.
- ocrl     Lattice, but for images.
- ocrs     Stream, but for images.
-
-See 'camelot &lt;method&gt; -h' for more information on a specific method.
+>>> import camelot
+>>> tables = camelot.read_pdf("foo.pdf")
+>>> tables
+&lt;TableList n=2&gt;
+>>> tables.export("foo.csv", f="csv", compress=True) # json, excel, html
+>>> tables[0]
+&lt;Table shape=(3,4)&gt;
+>>> tables[0].to_csv("foo.csv") # to_json, to_excel, to_html
+>>> tables[0].parsing_report
+{
+    "accuracy": 96,
+    "whitespace": 80,
+    "order": 1,
+    "page": 1
+}
+>>> df = tables[0].df
 </pre>
 
 ## Dependencies
 
-Currently, camelot works under Python 2.7.
-
-The required dependencies include [numpy](http://www.numpy.org/), [OpenCV](http://opencv.org/) and [ImageMagick](http://www.imagemagick.org/script/index.php).
-
-### Optional
-
-You'll need to install [Tesseract](https://github.com/tesseract-ocr/tesseract) if you want to extract tables from image based pdfs. Also, you'll need a tesseract language pack if your pdf isn't in english.
+The dependencies include [tk](https://wiki.tcl.tk/3743) and [ghostscript](https://www.ghostscript.com/).
 
 ## Installation
 
@@ -73,32 +37,32 @@ pip install -U pip setuptools
 
 ### Installing dependencies
 
-numpy can be install using `pip`. OpenCV and imagemagick can be installed using your system's default package manager.
+tk and ghostscript can be installed using your system's default package manager.
 
 #### Linux
 
-* Arch Linux
-
-<pre>
-sudo pacman -S opencv imagemagick
-</pre>
-
 * Ubuntu
 
 <pre>
-sudo apt-get install libopencv-dev python-opencv imagemagick
+sudo apt-get install python-opencv python-tk ghostscript
+</pre>
+
+* Arch Linux
+
+<pre>
+sudo pacman -S opencv tk ghostscript
 </pre>
 
 #### OS X
 
 <pre>
-brew install homebrew/science/opencv imagemagick
+brew install homebrew/science/opencv ghostscript
 </pre>
 
 Finally, `cd` into the project directory and install by
 
 <pre>
-make install
+python setup.py install
 </pre>
 
 ## Development
@@ -113,14 +77,14 @@ git clone https://github.com/socialcopsdev/camelot.git
 
 ### Contributing
 
-See [Contributing doc]().
+See [Contributing guidelines]().
 
 ### Testing
 
 <pre>
-make test
+python setup.py test
 </pre>
 
 ## License
 
-BSD License
+BSD License
\ No newline at end of file
diff --git a/camelot/__init__.py b/camelot/__init__.py
index 55aee51..6e416e4 100644
--- a/camelot/__init__.py
+++ b/camelot/__init__.py
@@ -1,3 +1,4 @@
-__version__ = '1.2.0'
+from .__version__ import __version__
 
-__all__ = ['pdf', 'lattice', 'stream', 'ocr']
+from .io import read_pdf
+from .plotting import plot_geometry
\ No newline at end of file
diff --git a/camelot/__version__.py b/camelot/__version__.py
new file mode 100644
index 0000000..b794fd4
--- /dev/null
+++ b/camelot/__version__.py
@@ -0,0 +1 @@
+__version__ = '0.1.0'
diff --git a/camelot/cell.py b/camelot/cell.py
deleted file mode 100644
index 8dfe8d3..0000000
--- a/camelot/cell.py
+++ /dev/null
@@ -1,128 +0,0 @@
-class Cell:
-    """Cell.
-    Defines a cell object with coordinates relative to a left-bottom
-    origin, which is also PDFMiner's coordinate space.
-
-    Parameters
-    ----------
-    x1 : float
-        x-coordinate of left-bottom point.
-
-    y1 : float
-        y-coordinate of left-bottom point.
-
-    x2 : float
-        x-coordinate of right-top point.
-
-    y2 : float
-        y-coordinate of right-top point.
-
-    Attributes
-    ----------
-    lb : tuple
-        Tuple representing left-bottom coordinates.
-
-    lt : tuple
-        Tuple representing left-top coordinates.
-
-    rb : tuple
-        Tuple representing right-bottom coordinates.
-
-    rt : tuple
-        Tuple representing right-top coordinates.
-
-    bbox : tuple
-        Tuple representing the cell's bounding box using the
-        lower-bottom and right-top coordinates.
-
-    left : bool
-        Whether or not cell is bounded on the left.
-
-    right : bool
-        Whether or not cell is bounded on the right.
-
-    top : bool
-        Whether or not cell is bounded on the top.
-
-    bottom : bool
-        Whether or not cell is bounded on the bottom.
-
-    text_objects : list
-        List of text objects assigned to cell.
-
-    text : string
-        Text assigned to cell.
-
-    spanning_h : bool
-        Whether or not cell spans/extends horizontally.
-
-    spanning_v : bool
-        Whether or not cell spans/extends vertically.
-    """
-
-    def __init__(self, x1, y1, x2, y2):
-
-        self.x1 = x1
-        self.y1 = y1
-        self.x2 = x2
-        self.y2 = y2
-        self.lb = (x1, y1)
-        self.lt = (x1, y2)
-        self.rb = (x2, y1)
-        self.rt = (x2, y2)
-        self.bbox = (x1, y1, x2, y2)
-        self.left = False
-        self.right = False
-        self.top = False
-        self.bottom = False
-        self.text_objects = []
-        self.text = ''
-        self.spanning_h = False
-        self.spanning_v = False
-        self.image = None
-
-    def add_text(self, text):
-        """Adds text to cell.
-
-        Parameters
-        ----------
-        text : string
-        """
-        self.text = ''.join([self.text, text])
-
-    def get_text(self):
-        """Returns text assigned to cell.
-
-        Returns
-        -------
-        text : string
-        """
-        return self.text
-
-    def add_object(self, t_object):
-        """Adds PDFMiner text object to cell.
-
-        Parameters
-        ----------
-        t_object : object
-        """
-        self.text_objects.append(t_object)
-
-    def get_objects(self):
-        """Returns list of text objects assigned to cell.
-
-        Returns
-        -------
-        text_objects : list
-        """
-        return self.text_objects
-
-    def get_bounded_edges(self):
-        """Returns the number of edges by which a cell is bounded.
-
-        Returns
-        -------
-        bounded_edges : int
-        """
-        self.bounded_edges = self.top + self.bottom + self.left + self.right
-        return self.bounded_edges
diff --git a/camelot/cli.py b/camelot/cli.py
new file mode 100644
index 0000000..302830e
--- /dev/null
+++ b/camelot/cli.py
@@ -0,0 +1 @@
+import click
\ No newline at end of file
diff --git a/camelot/core.py b/camelot/core.py
new file mode 100644
index 0000000..f400fe8
--- /dev/null
+++ b/camelot/core.py
@@ -0,0 +1,491 @@
+import os
+import json
+import zipfile
+import tempfile
+
+import numpy as np
+import pandas as pd
+
+
+class Cell(object):
+    """Defines a cell in a table with coordinates relative to a
+    left-bottom origin. (pdf coordinate space)
+
+    Parameters
+    ----------
+    x1 : float
+        x-coordinate of left-bottom point.
+    y1 : float
+        y-coordinate of left-bottom point.
+    x2 : float
+        x-coordinate of right-top point.
+    y2 : float
+        y-coordinate of right-top point.
+
+    Attributes
+    ----------
+    lb : tuple
+        Tuple representing left-bottom coordinates.
+    lt : tuple
+        Tuple representing left-top coordinates.
+    rb : tuple
+        Tuple representing right-bottom coordinates.
+    rt : tuple
+        Tuple representing right-top coordinates.
+    left : bool
+        Whether or not cell is bounded on the left.
+    right : bool
+        Whether or not cell is bounded on the right.
+    top : bool
+        Whether or not cell is bounded on the top.
+    bottom : bool
+        Whether or not cell is bounded on the bottom.
+    hspan : bool
+        Whether or not cell spans horizontally.
+    vspan : bool
+        Whether or not cell spans vertically.
+    text : string
+        Text assigned to cell.
+    bound
+
+    """
+
+    def __init__(self, x1, y1, x2, y2):
+        self.x1 = x1
+        self.y1 = y1
+        self.x2 = x2
+        self.y2 = y2
+        self.lb = (x1, y1)
+        self.lt = (x1, y2)
+        self.rb = (x2, y1)
+        self.rt = (x2, y2)
+        self.left = False
+        self.right = False
+        self.top = False
+        self.bottom = False
+        self.hspan = False
+        self.vspan = False
+        self._text = ''
+
+    def __repr__(self):
+        return '<Cell x1={} y1={} x2={} y2={}>'.format(
+            self.x1, self.y1, self.x2, self.y2)
+
+    @property
+    def text(self):
+        return self._text
+
+    @text.setter
+    def text(self, t):
+        self._text = ''.join([self._text, t])
+
+    @property
+    def bound(self):
+        """The number of sides on which the cell is bounded.
+        """
+        return self.top + self.bottom + self.left + self.right
+
+
+class Table(object):
+    """Defines a table with coordinates relative to a left-bottom
+    origin. (pdf coordinate space)
+
+    Parameters
+    ----------
+    cols : list
+        List of tuples representing column x-coordinates in increasing
+        order.
+    rows : list
+        List of tuples representing row y-coordinates in decreasing
+        order.
+
+    Attributes
+    ----------
+    df : object
+        pandas.DataFrame
+    shape : tuple
+        Shape of the table.
+    accuracy : float
+        Accuracy with which text was assigned to the cell.
+    whitespace : float
+        Percentage of whitespace in the table.
+    order : int
+        Table number on pdf page.
+    page : int
+        Pdf page number.
+    data
+    parsing_report
+
+    """
+    def __init__(self, cols, rows):
+        self.cols = cols
+        self.rows = rows
+        self.cells = [[Cell(c[0], r[1], c[1], r[0])
+                       for c in cols] for r in rows]
+        self.df = None
+        self.shape = (0, 0)
+        self.accuracy = 0
+        self.whitespace = 0
+        self.order = None
+        self.page = None
+
+    def __repr__(self):
+        return '<{} shape={}>'.format(self.__class__.__name__, self.shape)
+
+    @property
+    def data(self):
+        """Returns two-dimensional list of strings in table.
+        """
+        d = []
+        for row in self.cells:
+            d.append([cell.text.strip() for cell in row])
+        return d
+
+    @property
+    def parsing_report(self):
+        """Returns a parsing report with accuracy, %whitespace,
+        table number on page and page number.
+        """
+        # pretty?
+        report = {
+            'accuracy': self.accuracy,
+            'whitespace': self.whitespace,
+            'order': self.order,
+            'page': self.page
+        }
+        return report
+
+    def set_all_edges(self):
+        """Sets all table edges to True.
+        """
+        for row in self.cells:
+            for cell in row:
+                cell.left = cell.right = cell.top = cell.bottom = True
+        return self
+
+    def set_edges(self, vertical, horizontal, joint_close_tol=2):
+        """Sets a cell's edges to True depending on whether the cell's
+        coordinates overlap with the line's coordinates within a
+        tolerance.
+
+        Parameters
+        ----------
+        vertical : list
+            List of detected vertical lines.
+        horizontal : list
+            List of detected horizontal lines.
+
+        """
+        for v in vertical:
+            # find closest x coord
+            # iterate over y coords and find closest start and end points
+            i = [i for i, t in enumerate(self.cols)
+                 if np.isclose(v[0], t[0], atol=joint_close_tol)]
+            j = [j for j, t in enumerate(self.rows)
+                 if np.isclose(v[3], t[0], atol=joint_close_tol)]
+            k = [k for k, t in enumerate(self.rows)
+                 if np.isclose(v[1], t[0], atol=joint_close_tol)]
+            if not j:
+                continue
+            J = j[0]
+            if i == [0]:  # only left edge
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[J][L].left = True
+                        J += 1
+                else:
+                    K = len(self.rows)
+                    while J < K:
+                        self.cells[J][L].left = True
+                        J += 1
+            elif i == []:  # only right edge
+                L = len(self.cols) - 1
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[J][L].right = True
+                        J += 1
+                else:
+                    K = len(self.rows)
+                    while J < K:
+                        self.cells[J][L].right = True
+                        J += 1
+            else:  # both left and right edges
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[J][L].left = True
+                        self.cells[J][L - 1].right = True
+                        J += 1
+                else:
+                    K = len(self.rows)
+                    while J < K:
+                        self.cells[J][L].left = True
+                        self.cells[J][L - 1].right = True
+                        J += 1
+
+        for h in horizontal:
+            # find closest y coord
+            # iterate over x coords and find closest start and end points
+            i = [i for i, t in enumerate(self.rows)
+                 if np.isclose(h[1], t[0], atol=joint_close_tol)]
+            j = [j for j, t in enumerate(self.cols)
+                 if np.isclose(h[0], t[0], atol=joint_close_tol)]
+            k = [k for k, t in enumerate(self.cols)
+                 if np.isclose(h[2], t[0], atol=joint_close_tol)]
+            if not j:
+                continue
+            J = j[0]
+            if i == [0]:  # only top edge
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[L][J].top = True
+                        J += 1
+                else:
+                    K = len(self.cols)
+                    while J < K:
+                        self.cells[L][J].top = True
+                        J += 1
+            elif i == []:  # only bottom edge
+                I = len(self.rows) - 1
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[L][J].bottom = True
+                        J += 1
+                else:
+                    K = len(self.cols)
+                    while J < K:
+                        self.cells[L][J].bottom = True
+                        J += 1
+            else:  # both top and bottom edges
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[L][J].top = True
+                        self.cells[L - 1][J].bottom = True
+                        J += 1
+                else:
+                    K = len(self.cols)
+                    while J < K:
+                        self.cells[L][J].top = True
+                        self.cells[L - 1][J].bottom = True
+                        J += 1
+
+        return self
+
+    def set_border(self):
+        """Sets table border edges to True.
+        """
+        for r in range(len(self.rows)):
+            self.cells[r][0].left = True
+            self.cells[r][len(self.cols) - 1].right = True
+        for c in range(len(self.cols)):
+            self.cells[0][c].top = True
+            self.cells[len(self.rows) - 1][c].bottom = True
+        return self
+
+    def set_span(self):
+        """Sets a cell's hspan or vspan attribute to True depending
+        on whether the cell spans horizontally or vertically.
+        """
+        for row in self.cells:
+            for cell in row:
+                left = cell.left
+                right = cell.right
+                top = cell.top
+                bottom = cell.bottom
+                if cell.bound == 4:
+                    continue
+                elif cell.bound == 3:
+                    if not left and (right and top and bottom):
+                        cell.hspan = True
+                    elif not right and (left and top and bottom):
+                        cell.hspan = True
+                    elif not top and (left and right and bottom):
+                        cell.vspan = True
+                    elif not bottom and (left and right and top):
+                        cell.vspan = True
+                elif cell.bound == 2:
+                    if left and right and (not top and not bottom):
+                        cell.vspan = True
+                    elif top and bottom and (not left and not right):
+                        cell.hspan = True
+        return self
+
+    def to_csv(self, path, **kwargs):
+        """Write Table to a comma-separated values (csv) file.
+        """
+        kw = {
+            'encoding': 'utf-8',
+            'index': False,
+            'quoting': 1
+        }
+        kw.update(kwargs)
+        self.df.to_csv(path, **kw)
+
+    def to_json(self, path, **kwargs):
+        """Write Table to a JSON file.
+        """
+        kw = {
+            'orient': 'records'
+        }
+        kw.update(kwargs)
+        json_string = self.df.to_json(**kw)
+        with open(path, 'w') as f:
+            f.write(json_string)
+
+    def to_excel(self, path, **kwargs):
+        """Write Table to an Excel file.
+        """
+        kw = {
+            'sheet_name': 'page-{}-table-{}'.format(self.page, self.order),
+            'encoding': 'utf-8'
+        }
+        kw.update(kwargs)
+        writer = pd.ExcelWriter(path)
+        self.df.to_excel(writer, **kw)
+        writer.save()
+
+    def to_html(self, path, **kwargs):
+        """Write Table to an HTML file.
+        """
+        html_string = self.df.to_html(**kwargs)
+        with open(path, 'w') as f:
+            f.write(html_string)
+
+
+class TableList(object):
+    """Defines a list of camelot.core.Table objects. Each table can
+    be accessed using its index.
+
+    Attributes
+    ----------
+    n : int
+        Number of tables in the list.
+
+    """
+    def __init__(self, tables):
+        self._tables = tables
+
+    def __repr__(self):
+        return '<{} tables={}>'.format(
+            self.__class__.__name__, len(self._tables))
+
+    def __len__(self):
+        return len(self._tables)
+
+    def __getitem__(self, idx):
+        return self._tables[idx]
+
+    @staticmethod
+    def _format_func(table, f):
+        return getattr(table, 'to_{}'.format(f))
+
+    @property
+    def n(self):
+        return len(self._tables)
+
+    def _write_file(self, f=None, **kwargs):
+        dirname = kwargs.get('dirname')
+        root = kwargs.get('root')
+        ext = kwargs.get('ext')
+        for table in self._tables:
+            filename = os.path.join('{}-page-{}-table-{}{}'.format(
+                                    root, table.page, table.order, ext))
+            filepath = os.path.join(dirname, filename)
+            to_format = self._format_func(table, f)
+            to_format(filepath)
+
+    def _compress_dir(self, **kwargs):
+        path = kwargs.get('path')
+        dirname = kwargs.get('dirname')
+        root = kwargs.get('root')
+        ext = kwargs.get('ext')
+        zipname = os.path.join(os.path.dirname(path), root) + '.zip'
+        with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
+            for table in self._tables:
+                filename = os.path.join('{}-page-{}-table-{}{}'.format(
+                                        root, table.page, table.order, ext))
+                filepath = os.path.join(dirname, filename)
+                z.write(filepath, os.path.basename(filepath))
+
+    def export(self, path, f='csv', compress=False):
+        """Exports the list of tables to specified file format.
+
+        Parameters
+        ----------
+        path : str
+            Filepath
+        f : str
+            File format. Can be csv, json, excel and html.
+        compress : bool
+            Whether or not to add files to a ZIP archive.
+
+        """
+        dirname = os.path.dirname(path)
+        basename = os.path.basename(path)
+        root, ext = os.path.splitext(basename)
+        if compress:
+            dirname = tempfile.mkdtemp()
+
+        kwargs = {
+            'path': path,
+            'dirname': dirname,
+            'root': root,
+            'ext': ext
+        }
+
+        if f in ['csv', 'json', 'html']:
+            self._write_file(f=f, **kwargs)
+            if compress:
+                self._compress_dir(**kwargs)
+        elif f == 'excel':
+            filepath = os.path.join(dirname, basename)
+            writer = pd.ExcelWriter(filepath)
+            for table in self._tables:
+                sheet_name = 'page-{}-table-{}'.format(table.page, table.order)
+                table.df.to_excel(writer, sheet_name=sheet_name, encoding='utf-8')
+            writer.save()
+            if compress:
+                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
+                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
+                    z.write(filepath, os.path.basename(filepath))
+
+
+class Geometry(object):
+    def __init__(self):
+        self.text = []
+        self.images = ()
+        self.segments = ()
+        self.tables = []
+
+    def __repr__(self):
+        return '<{} text={} images={} segments={} tables={}>'.format(
+            self.__class__.__name__,
+            len(self.text),
+            len(self.images),
+            len(self.segments),
+            len(self.tables))
+
+
+class GeometryList(object):
+    def __init__(self, geometry):
+        self.text = [g.text for g in geometry]
+        self.images = [g.images for g in geometry]
+        self.segments = [g.segments for g in geometry]
+        self.tables = [g.tables for g in geometry]
+
+    def __repr__(self):
+        return '<{} text={} images={} segments={} tables={}>'.format(
+            self.__class__.__name__,
+            len(self.text),
+            len(self.images),
+            len(self.segments),
+            len(self.tables))
\ No newline at end of file
diff --git a/camelot/handlers.py b/camelot/handlers.py
new file mode 100644
index 0000000..8585432
--- /dev/null
+++ b/camelot/handlers.py
@@ -0,0 +1,144 @@
+import os
+import tempfile
+
+from PyPDF2 import PdfFileReader, PdfFileWriter
+
+from .core import TableList, GeometryList
+from .parsers import Stream, Lattice
+from .utils import get_page_layout, get_text_objects, get_rotation
+
+
+class PDFHandler(object):
+    """Handles all operations like temp directory creation, splitting
+    file into single page pdfs, parsing each pdf and then removing the
+    temp directory.
+
+    Parameter
+    ---------
+    filename : str
+        Path to pdf file.
+    pages : str
+        Comma-separated page numbers to parse.
+        Example: 1,3,4 or 1,4-end
+
+    """
+    def __init__(self, filename, pages='1'):
+        self.filename = filename
+        if not self.filename.endswith('.pdf'):
+            raise TypeError("File format not supported.")
+        self.pages = self._get_pages(self.filename, pages)
+        self.tempdir = tempfile.mkdtemp()
+
+    def _get_pages(self, filename, pages):
+        """Converts pages string to list of ints.
+
+        Parameters
+        ----------
+        filename : str
+            Path to pdf file.
+        pages : str
+            Comma-separated page numbers to parse.
+            Example: 1,3,4 or 1,4-end
+
+        Returns
+        -------
+        P : list
+            List of int page numbers.
+
+        """
+        page_numbers = []
+        if pages == '1':
+            page_numbers.append({'start': 1, 'end': 1})
+        else:
+            infile = PdfFileReader(open(filename, 'rb'), strict=False)
+            if pages == 'all':
+                page_numbers.append({'start': 1, 'end': infile.getNumPages()})
+            else:
+                for r in pages.split(','):
+                    if '-' in r:
+                        a, b = r.split('-')
+                        if b == 'end':
+                            b = infile.getNumPages()
+                        page_numbers.append({'start': int(a), 'end': int(b)})
+                    else:
+                        page_numbers.append({'start': int(r), 'end': int(r)})
+        P = []
+        for p in page_numbers:
+            P.extend(range(p['start'], p['end'] + 1))
+        return sorted(set(P))
+
+    def _save_page(self, filename, page, temp):
+        """Saves specified page from pdf into a temporary directory.
+
+        Parameters
+        ----------
+        filename : str
+            Path to pdf file.
+        page : int
+            Page number
+        temp : str
+            Tmp directory
+
+        """
+        with open(filename, 'rb') as fileobj:
+            infile = PdfFileReader(fileobj, strict=False)
+            fpath = os.path.join(temp, 'page-{0}.pdf'.format(page))
+            froot, fext = os.path.splitext(fpath)
+            p = infile.getPage(page - 1)
+            outfile = PdfFileWriter()
+            outfile.addPage(p)
+            with open(fpath, 'wb') as f:
+                outfile.write(f)
+            layout, dim = get_page_layout(fpath)
+            # fix rotated pdf
+            lttextlh = get_text_objects(layout, ltype="lh")
+            lttextlv = get_text_objects(layout, ltype="lv")
+            ltchar = get_text_objects(layout, ltype="char")
+            rotation = get_rotation(lttextlh, lttextlv, ltchar)
+            if rotation != '':
+                fpath_new = ''.join([froot.replace('page', 'p'), '_rotated', fext])
+                os.rename(fpath, fpath_new)
+                infile = PdfFileReader(open(fpath_new, 'rb'), strict=False)
+                outfile = PdfFileWriter()
+                p = infile.getPage(0)
+                if rotation == 'anticlockwise':
+                    p.rotateClockwise(90)
+                elif rotation == 'clockwise':
+                    p.rotateCounterClockwise(90)
+                outfile.addPage(p)
+                with open(fpath, 'wb') as f:
+                    outfile.write(f)
+
+    def parse(self, mesh=False, **kwargs):
+        """Extracts tables by calling parser.get_tables on all single
+        page pdfs.
+
+        Parameters
+        ----------
+        mesh : bool (default: False)
+            Whether or not to use Lattice method of parsing. Stream
+            is used by default.
+        kwargs : dict
+            See camelot.read_pdf kwargs.
+
+        Returns
+        -------
+        tables : camelot.core.TableList
+            List of tables found in pdf.
+        geometry : camelot.core.GeometryList
+            List of geometry objects (contours, lines, joints)
+            found in pdf.
+
+        """
+        for p in self.pages:
+            self._save_page(self.filename, p, self.tempdir)
+        pages = [os.path.join(self.tempdir, 'page-{0}.pdf'.format(p))
+                 for p in self.pages]
+        tables = []
+        geometry = []
+        parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
+        for p in pages:
+            t, g = parser.extract_tables(p)
+            tables.extend(t)
+            geometry.append(g)
+        return TableList(tables), GeometryList(geometry)
\ No newline at end of file
diff --git a/camelot/imgproc.py b/camelot/image_processing.py
similarity index 69%
rename from camelot/imgproc.py
rename to camelot/image_processing.py
index 1621bea..23923b2 100644
--- a/camelot/imgproc.py
+++ b/camelot/image_processing.py
@@ -1,3 +1,4 @@
+from __future__ import division
 from itertools import groupby
 from operator import itemgetter
 
@@ -7,40 +8,38 @@ import numpy as np
 from .utils import merge_tuples
 
 
-def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
+def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
     """Thresholds an image using OpenCV's adaptiveThreshold.
 
     Parameters
     ----------
     imagename : string
         Path to image file.
-
-    invert : bool
-        Whether or not to invert the image. Useful when pdfs have
-        tables with lines in background.
-        (optional, default: False)
-
-    blocksize: int
+    process_background : bool, optional (default: False)
+        Whether or not to process lines that are in background.
+    blocksize : int, optional (default: 15)
         Size of a pixel neighborhood that is used to calculate a
         threshold value for the pixel: 3, 5, 7, and so on.
 
-    c: float
-        Constant subtracted from the mean or weighted mean
-        (see the details below). Normally, it is positive but may be
-        zero or negative as well.
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    c : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
 
     Returns
     -------
     img : object
         numpy.ndarray representing the original image.
-
     threshold : object
         numpy.ndarray representing the thresholded image.
+
     """
     img = cv2.imread(imagename)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
-    if invert:
+    if process_background:
         threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
             cv2.THRESH_BINARY, blocksize, c)
     else:
@@ -49,7 +48,7 @@ def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
     return img, threshold
 
 
-def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
+def find_lines(threshold, direction='horizontal', line_size_scaling=15, iterations=0):
     """Finds horizontal and vertical lines by applying morphological
     transformations on an image.
 
@@ -57,38 +56,37 @@ def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
     ----------
     threshold : object
         numpy.ndarray representing the thresholded image.
-
-    direction : string
+    direction : string, optional (default: 'horizontal')
         Specifies whether to find vertical or horizontal lines.
-        (default: 'horizontal')
+    line_size_scaling : int, optional (default: 15)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
 
-    scale : int
-        Used to divide the height/width to get a structuring element
-        for morph transform.
-        (optional, default: 15)
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
+    iterations : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
 
-    iterations : int
-        Number of iterations for dilation.
-        (optional, default: 2)
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
 
     Returns
     -------
     dmask : object
         numpy.ndarray representing pixels where vertical/horizontal
         lines lie.
-
     lines : list
         List of tuples representing vertical/horizontal lines with
         coordinates relative to a left-top origin in
-        OpenCV's coordinate space.
+        image coordinate space.
+
     """
     lines = []
 
     if direction == 'vertical':
-        size = threshold.shape[0] // scale
+        size = threshold.shape[0] // line_size_scaling
         el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
     elif direction == 'horizontal':
-        size = threshold.shape[1] // scale
+        size = threshold.shape[1] // line_size_scaling
         el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
     elif direction is None:
         raise ValueError("Specify direction as either 'vertical' or"
@@ -110,9 +108,9 @@ def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
         x1, x2 = x, x + w
         y1, y2 = y, y + h
         if direction == 'vertical':
-            lines.append(((x1 + x2) / 2, y2, (x1 + x2) / 2, y1))
+            lines.append(((x1 + x2) // 2, y2, (x1 + x2) // 2, y1))
         elif direction == 'horizontal':
-            lines.append((x1, (y1 + y2) / 2, x2, (y1 + y2) / 2))
+            lines.append((x1, (y1 + y2) // 2, x2, (y1 + y2) // 2))
 
     return dmask, lines
 
@@ -124,7 +122,6 @@ def find_table_contours(vertical, horizontal):
     ----------
     vertical : object
         numpy.ndarray representing pixels where vertical lines lie.
-
     horizontal : object
         numpy.ndarray representing pixels where horizontal lines lie.
 
@@ -133,7 +130,8 @@ def find_table_contours(vertical, horizontal):
     cont : list
         List of tuples representing table boundaries. Each tuple is of
         the form (x, y, w, h) where (x, y) -> left-top, w -> width and
-        h -> height in OpenCV's coordinate space.
+        h -> height in image coordinate space.
+
     """
     mask = vertical + horizontal
 
@@ -161,11 +159,9 @@ def find_table_joints(contours, vertical, horizontal):
     contours : list
         List of tuples representing table boundaries. Each tuple is of
         the form (x, y, w, h) where (x, y) -> left-top, w -> width and
-        h -> height in OpenCV's coordinate space.
-
+        h -> height in image coordinate space.
     vertical : object
         numpy.ndarray representing pixels where vertical lines lie.
-
     horizontal : object
         numpy.ndarray representing pixels where horizontal lines lie.
 
@@ -174,9 +170,9 @@ def find_table_joints(contours, vertical, horizontal):
     tables : dict
         Dict with table boundaries as keys and list of intersections
         in that boundary as their value.
-
         Keys are of the form (x1, y1, x2, y2) where (x1, y1) -> lb
-        and (x2, y2) -> rt in OpenCV's coordinate space.
+        and (x2, y2) -> rt in image coordinate space.
+
     """
     joints = np.bitwise_and(vertical, horizontal)
     tables = {}
@@ -194,32 +190,35 @@ def find_table_joints(contours, vertical, horizontal):
         joint_coords = []
         for j in jc:
             jx, jy, jw, jh = cv2.boundingRect(j)
-            c1, c2 = x + (2 * jx + jw) / 2, y + (2 * jy + jh) / 2
+            c1, c2 = x + (2 * jx + jw) // 2, y + (2 * jy + jh) // 2
             joint_coords.append((c1, c2))
         tables[(x, y + h, x + w, y)] = joint_coords
 
     return tables
 
 
-def remove_lines(threshold, line_scale=15):
+def remove_lines(threshold, line_size_scaling=15):
     """Removes lines from a thresholded image.
 
     Parameters
     ----------
     threshold : object
         numpy.ndarray representing the thresholded image.
+    line_size_scaling : int, optional (default: 15)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
 
-    line_scale : int
-        Line scaling factor.
-        (optional, default: 15)
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
 
     Returns
     -------
     threshold : object
         numpy.ndarray representing the thresholded image
         with horizontal and vertical lines removed.
+
     """
-    size = threshold.shape[0] // line_scale
+    size = threshold.shape[0] // line_size_scaling
     vertical_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
     horizontal_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
     dilate_el = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
@@ -235,24 +234,26 @@ def remove_lines(threshold, line_scale=15):
     return threshold
 
 
-def find_cuts(threshold, char_scale=200):
+def find_cuts(threshold, char_size_scaling=200):
     """Finds cuts made by text projections on y-axis.
 
     Parameters
     ----------
     threshold : object
         numpy.ndarray representing the thresholded image.
+    line_size_scaling : int, optional (default: 200)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
 
-    char_scale : int
-        Char scaling factor.
-        (optional, default: 200)
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
 
     Returns
     -------
     y_cuts : list
         List of cuts on y-axis.
     """
-    size = threshold.shape[0] // char_scale
+    size = threshold.shape[0] // char_size_scaling
     char_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
 
     threshold = cv2.erode(threshold, char_el)
@@ -268,5 +269,5 @@ def find_cuts(threshold, char_scale=200):
     contours = [cv2.boundingRect(c) for c in contours]
     y_cuts = [(c[1], c[1] + c[3]) for c in contours]
     y_cuts = list(merge_tuples(sorted(y_cuts)))
-    y_cuts = [(y_cuts[i][0] + y_cuts[i - 1][1]) / 2 for i in range(1, len(y_cuts))]
+    y_cuts = [(y_cuts[i][0] + y_cuts[i - 1][1]) // 2 for i in range(1, len(y_cuts))]
     return sorted(y_cuts, reverse=True)
\ No newline at end of file
diff --git a/camelot/io.py b/camelot/io.py
new file mode 100644
index 0000000..33007d4
--- /dev/null
+++ b/camelot/io.py
@@ -0,0 +1,94 @@
+from .handlers import PDFHandler
+
+
+def read_pdf(filepath, pages='1', mesh=False, **kwargs):
+    """Read PDF and return parsed data tables.
+
+    Note: kwargs annotated with ^ can only be used with mesh=False
+    and kwargs annotated with * can only be used with mesh=True.
+
+    Parameters
+    ----------
+    filepath : str
+        Path to pdf file.
+    pages : str
+        Comma-separated page numbers to parse.
+        Example: 1,3,4 or 1,4-end
+    mesh : bool (default: False)
+        Whether or not to use Lattice method of parsing. Stream
+        is used by default.
+    table_area : list, optional (default: None)
+        List of table areas to analyze as strings of the form
+        x1,y1,x2,y2 where (x1, y1) -> left-top and
+        (x2, y2) -> right-bottom in pdf coordinate space.
+    columns^ : list, optional (default: None)
+        List of column x-coordinates as strings where the coordinates
+        are comma-separated.
+    split_text : bool, optional (default: False)
+        Whether or not to split a text line if it spans across
+        multiple cells.
+    flag_size : bool, optional (default: False)
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+    row_close_tol^ : int, optional (default: 2)
+        Rows will be formed by combining text vertically
+        within this tolerance.
+    col_close_tol^ : int, optional (default: 0)
+        Columns will be formed by combining text horizontally
+        within this tolerance.
+    process_background* : bool, optional (default: False)
+        Whether or not to process lines that are in background.
+    line_size_scaling* : int, optional (default: 15)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
+
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
+    copy_text* : list, optional (default: None)
+        {'h', 'v'}
+        Select one or more strings from above and pass them as a list
+        to specify the direction in which text should be copied over
+        when a cell spans multiple rows or columns.
+    shift_text* : list, optional (default: ['l', 't'])
+        {'l', 'r', 't', 'b'}
+        Select one or more strings from above and pass them as a list
+        to specify where the text in a spanning cell should flow.
+    line_close_tol* : int, optional (default: 2)
+        Tolerance parameter used to merge vertical and horizontal
+        detected lines which lie close to each other.
+    joint_close_tol* : int, optional (default: 2)
+        Tolerance parameter used to decide whether the detected lines
+        and points lie close to each other.
+    threshold_blocksize : int, optional (default: 15)
+        Size of a pixel neighborhood that is used to calculate a
+        threshold value for the pixel: 3, 5, 7, and so on.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    threshold_constant : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    iterations : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
+
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+    margins : tuple
+        PDFMiner margins. (char_margin, line_margin, word_margin)
+
+        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+    debug : bool, optional (default: False)
+        Whether or not to return all text objects on the page
+        which can be used to generate a matplotlib plot, to get
+        values for table_area(s) and debugging.
+
+    Returns
+    -------
+    tables : camelot.core.TableList
+
+    """
+    # validate kwargs?
+    p = PDFHandler(filepath, pages)
+    tables, __ = p.parse(mesh=mesh, **kwargs)
+    return tables
\ No newline at end of file
diff --git a/camelot/lattice.py b/camelot/lattice.py
deleted file mode 100644
index 40803f6..0000000
--- a/camelot/lattice.py
+++ /dev/null
@@ -1,382 +0,0 @@
-from __future__ import division
-import os
-import sys
-import copy
-import types
-import logging
-import copy_reg
-import warnings
-import subprocess
-
-from .imgproc import (adaptive_threshold, find_lines, find_table_contours,
-                      find_table_joints)
-from .table import Table
-from .utils import (scale_to_pdf, scale_to_image, segments_bbox, text_in_bbox,
-                    merge_close_values, get_table_index, get_score, count_empty,
-                    encode_list, get_text_objects, get_page_layout)
-
-
-__all__ = ['Lattice']
-logger = logging.getLogger('app_logger')
-
-
-def _reduce_method(m):
-    if m.im_self is None:
-        return getattr, (m.im_class, m.im_func.func_name)
-    else:
-        return getattr, (m.im_self, m.im_func.func_name)
-copy_reg.pickle(types.MethodType, _reduce_method)
-
-
-def _reduce_index(t, idx, shift_text):
-    """Reduces index of a text object if it lies within a spanning
-    cell.
-
-    Parameters
-    ----------
-    table : object
-        camelot.table.Table
-
-    idx : list
-        List of tuples of the form (r_idx, c_idx, text).
-
-    shift_text : list
-        {'l', 'r', 't', 'b'}
-        Select one or more from above and pass them as a list to
-        specify where the text in a spanning cell should flow.
-
-    Returns
-    -------
-    indices : list
-        List of tuples of the form (idx, text) where idx is the reduced
-        index of row/column and text is the an lttextline substring.
-    """
-    indices = []
-    for r_idx, c_idx, text in idx:
-        for d in shift_text:
-            if d == 'l':
-                if t.cells[r_idx][c_idx].spanning_h:
-                    while not t.cells[r_idx][c_idx].left:
-                        c_idx -= 1
-            if d == 'r':
-                if t.cells[r_idx][c_idx].spanning_h:
-                    while not t.cells[r_idx][c_idx].right:
-                        c_idx += 1
-            if d == 't':
-                if t.cells[r_idx][c_idx].spanning_v:
-                    while not t.cells[r_idx][c_idx].top:
-                        r_idx -= 1
-            if d == 'b':
-                if t.cells[r_idx][c_idx].spanning_v:
-                    while not t.cells[r_idx][c_idx].bottom:
-                        r_idx += 1
-        indices.append((r_idx, c_idx, text))
-    return indices
-
-
-def _fill_spanning(t, fill=None):
-    """Fills spanning cells.
-
-    Parameters
-    ----------
-    t : object
-        camelot.table.Table
-
-    fill : list
-        {'h', 'v'}
-        Specify to fill spanning cells in horizontal or vertical
-        direction.
-        (optional, default: None)
-
-    Returns
-    -------
-    t : object
-        camelot.table.Table
-    """
-    for f in fill:
-        if f == "h":
-            for i in range(len(t.cells)):
-                for j in range(len(t.cells[i])):
-                    if t.cells[i][j].get_text().strip() == '':
-                        if t.cells[i][j].spanning_h and not t.cells[i][j].left:
-                            t.cells[i][j].add_text(t.cells[i][j - 1].get_text())
-        elif f == "v":
-            for i in range(len(t.cells)):
-                for j in range(len(t.cells[i])):
-                    if t.cells[i][j].get_text().strip() == '':
-                        if t.cells[i][j].spanning_v and not t.cells[i][j].top:
-                            t.cells[i][j].add_text(t.cells[i - 1][j].get_text())
-    return t
-
-
-class Lattice:
-    """Lattice looks for lines in the pdf to form a table.
-
-    If you want to give fill and mtol for each table when specifying
-    multiple table areas, make sure that the length of fill and mtol
-    is equal to the length of table_area. Mapping between them is based
-    on index.
-
-    Parameters
-    ----------
-    table_area : list
-        List of strings of the form x1,y1,x2,y2 where
-        (x1, y1) -> left-top and (x2, y2) -> right-bottom in PDFMiner's
-        coordinate space, denoting table areas to analyze.
-        (optional, default: None)
-
-    fill : list
-        List of strings specifying directions to fill spanning cells.
-        {'h', 'v'} to fill spanning cells in horizontal or vertical
-        direction.
-        (optional, default: None)
-
-    mtol : list
-        List of ints specifying m-tolerance parameters.
-        (optional, default: [2])
-
-    jtol : list
-        List of ints specifying j-tolerance parameters.
-        (optional, default: [2])
-
-    blocksize : int
-        Size of a pixel neighborhood that is used to calculate a
-        threshold value for the pixel: 3, 5, 7, and so on.
-        (optional, default: 15)
-
-    threshold_constant : float
-        Constant subtracted from the mean or weighted mean
-        (see the details below). Normally, it is positive but may be
-        zero or negative as well.
-        (optional, default: -2)
-
-    scale : int
-        Used to divide the height/width of a pdf to get a structuring
-        element for image processing.
-        (optional, default: 15)
-
-    iterations : int
-        Number of iterations for dilation.
-        (optional, default: 0)
-
-    invert : bool
-        Whether or not to invert the image. Useful when pdfs have
-        tables with lines in background.
-        (optional, default: False)
-
-    margins : tuple
-        PDFMiner margins. (char_margin, line_margin, word_margin)
-        (optional, default: (1.0, 0.5, 0.1))
-
-    split_text : bool
-        Whether or not to split a text line if it spans across
-        different cells.
-        (optional, default: False)
-
-    flag_size : bool
-        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
-        (optional, default: True)
-
-    shift_text : list
-        {'l', 'r', 't', 'b'}
-        Select one or more from above and pass them as a list to
-        specify where the text in a spanning cell should flow.
-        (optional, default: ['l', 't'])
-
-    debug : string
-        {'contour', 'line', 'joint', 'table'}
-        Set to one of the above values to generate a matplotlib plot
-        of detected contours, lines, joints and the table generated.
-        (optional, default: None)
-    """
-    def __init__(self, table_area=None, fill=None, mtol=[2], jtol=[2],
-                 blocksize=15, threshold_constant=-2, scale=15, iterations=0,
-                 invert=False, margins=(1.0, 0.5, 0.1), split_text=False,
-                 flag_size=True, shift_text=['l', 't'], debug=None):
-
-        self.method = 'lattice'
-        self.table_area = table_area
-        self.fill = fill
-        self.mtol = mtol
-        self.jtol = jtol
-        self.blocksize = blocksize
-        self.threshold_constant = threshold_constant
-        self.scale = scale
-        self.iterations = iterations
-        self.invert = invert
-        self.char_margin, self.line_margin, self.word_margin = margins
-        self.split_text = split_text
-        self.flag_size = flag_size
-        self.shift_text = shift_text
-        self.debug = debug
-
-    def get_tables(self, pdfname):
-        """Expects a single page pdf as input with rotation corrected.
-
-        Parameters
-        ----------
-        pdfname : string
-            Path to single page pdf file.
-
-        Returns
-        -------
-        page : dict
-        """
-        layout, dim = get_page_layout(pdfname, char_margin=self.char_margin,
-            line_margin=self.line_margin, word_margin=self.word_margin)
-        lttextlh = get_text_objects(layout, ltype="lh")
-        lttextlv = get_text_objects(layout, ltype="lv")
-        ltchar = get_text_objects(layout, ltype="char")
-        width, height = dim
-        bname, __ = os.path.splitext(pdfname)
-        logger.info('Processing {0}.'.format(os.path.basename(bname)))
-        if not ltchar:
-            warnings.warn("{0}: Page contains no text.".format(
-                os.path.basename(bname)))
-            return {os.path.basename(bname): None}
-
-        imagename = ''.join([bname, '.png'])
-        gs_call = [
-            "-q", "-sDEVICE=png16m", "-o", imagename, "-r600", pdfname
-        ]
-        if "ghostscript" in subprocess.check_output(["gs", "-version"]).lower():
-            gs_call.insert(0, "gs")
-        else:
-            gs_call.insert(0, "gsc")
-        subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
-            stderr=subprocess.STDOUT)
-
-        img, threshold = adaptive_threshold(imagename, invert=self.invert,
-            blocksize=self.blocksize, c=self.threshold_constant)
-        pdf_x = width
-        pdf_y = height
-        img_x = img.shape[1]
-        img_y = img.shape[0]
-        sc_x_image = img_x / float(pdf_x)
-        sc_y_image = img_y / float(pdf_y)
-        sc_x_pdf = pdf_x / float(img_x)
-        sc_y_pdf = pdf_y / float(img_y)
-        factors_image = (sc_x_image, sc_y_image, pdf_y)
-        factors_pdf = (sc_x_pdf, sc_y_pdf, img_y)
-
-        vmask, v_segments = find_lines(threshold, direction='vertical',
-            scale=self.scale, iterations=self.iterations)
-        hmask, h_segments = find_lines(threshold, direction='horizontal',
-            scale=self.scale, iterations=self.iterations)
-
-        if self.table_area is not None:
-            areas = []
-            for area in self.table_area:
-                x1, y1, x2, y2 = area.split(",")
-                x1 = float(x1)
-                y1 = float(y1)
-                x2 = float(x2)
-                y2 = float(y2)
-                x1, y1, x2, y2 = scale_to_image((x1, y1, x2, y2), factors_image)
-                areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
-            table_bbox = find_table_joints(areas, vmask, hmask)
-        else:
-            contours = find_table_contours(vmask, hmask)
-            table_bbox = find_table_joints(contours, vmask, hmask)
-
-        if len(self.mtol) == 1 and self.mtol[0] == 2:
-            mtolerance = copy.deepcopy(self.mtol) * len(table_bbox)
-        else:
-            mtolerance = copy.deepcopy(self.mtol)
-
-        if len(self.jtol) == 1 and self.jtol[0] == 2:
-            jtolerance = copy.deepcopy(self.jtol) * len(table_bbox)
-        else:
-            jtolerance = copy.deepcopy(self.jtol)
-
-        if self.debug:
-            self.debug_images = (img, table_bbox)
-
-        table_bbox, v_segments, h_segments = scale_to_pdf(table_bbox, v_segments,
-            h_segments, factors_pdf)
-
-        if self.debug:
-            self.debug_segments = (v_segments, h_segments)
-            self.debug_tables = []
-
-        page = {}
-        tables = {}
-        # sort tables based on y-coord
-        for table_no, k in enumerate(sorted(table_bbox.keys(), key=lambda x: x[1], reverse=True)):
-            # select elements which lie within table_bbox
-            table_data = {}
-            t_bbox = {}
-            v_s, h_s = segments_bbox(k, v_segments, h_segments)
-            t_bbox['horizontal'] = text_in_bbox(k, lttextlh)
-            t_bbox['vertical'] = text_in_bbox(k, lttextlv)
-            char_bbox = text_in_bbox(k, ltchar)
-            table_data['text_p'] = 100 * (1 - (len(char_bbox) / len(ltchar)))
-            for direction in t_bbox:
-                t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
-            cols, rows = zip(*table_bbox[k])
-            cols, rows = list(cols), list(rows)
-            cols.extend([k[0], k[2]])
-            rows.extend([k[1], k[3]])
-            # sort horizontal and vertical segments
-            cols = merge_close_values(sorted(cols), mtol=mtolerance[table_no])
-            rows = merge_close_values(
-                sorted(rows, reverse=True), mtol=mtolerance[table_no])
-            # make grid using x and y coord of shortlisted rows and cols
-            cols = [(cols[i], cols[i + 1])
-                    for i in range(0, len(cols) - 1)]
-            rows = [(rows[i], rows[i + 1])
-                    for i in range(0, len(rows) - 1)]
-
-            table = Table(cols, rows)
-            # set table edges to True using ver+hor lines
-            table = table.set_edges(v_s, h_s, jtol=jtolerance[table_no])
-            nouse = table.nocont_ / (len(v_s) + len(h_s))
-            table_data['line_p'] = 100 * (1 - nouse)
-            # set spanning cells to True
-            table = table.set_spanning()
-            # set table border edges to True
-            table = table.set_border_edges()
-
-            if self.debug:
-                self.debug_tables.append(table)
-
-            assignment_errors = []
-            table_data['split_text'] = []
-            table_data['superscript'] = []
-            for direction in ['vertical', 'horizontal']:
-                for t in t_bbox[direction]:
-                    indices, error = get_table_index(
-                        table, t, direction, split_text=self.split_text,
-                        flag_size=self.flag_size)
-                    if indices[:2] != (-1, -1):
-                        assignment_errors.append(error)
-                        indices = _reduce_index(table, indices, shift_text=self.shift_text)
-                        if len(indices) > 1:
-                            table_data['split_text'].append(indices)
-                        for r_idx, c_idx, text in indices:
-                            if all(s in text for s in ['<s>', '</s>']):
-                                table_data['superscript'].append((r_idx, c_idx, text))
-                            table.cells[r_idx][c_idx].add_text(text)
-            score = get_score([[100, assignment_errors]])
-            table_data['score'] = score
-
-            if self.fill is not None:
-                table = _fill_spanning(table, fill=self.fill)
-            ar = table.get_list()
-            ar = encode_list(ar)
-            table_data['data'] = ar
-            empty_p, r_nempty_cells, c_nempty_cells = count_empty(ar)
-            table_data['empty_p'] = empty_p
-            table_data['r_nempty_cells'] = r_nempty_cells
-            table_data['c_nempty_cells'] = c_nempty_cells
-            table_data['nrows'] = len(ar)
-            table_data['ncols'] = len(ar[0])
-            tables['table-{0}'.format(table_no + 1)] = table_data
-        page[os.path.basename(bname)] = tables
-
-        if self.debug:
-            return None
-
-        return page
\ No newline at end of file
diff --git a/camelot/ocr.py b/camelot/ocr.py
deleted file mode 100644
index 48d1983..0000000
--- a/camelot/ocr.py
+++ /dev/null
@@ -1,331 +0,0 @@
-import os
-import copy
-import logging
-import subprocess
-
-import pyocr
-from PIL import Image
-
-from .table import Table
-from .imgproc import (adaptive_threshold, find_lines, find_table_contours,
-                      find_table_joints, remove_lines, find_cuts)
-from .utils import merge_close_values, encode_list
-
-
-__all__ = ['OCRLattice', 'OCRStream']
-logger = logging.getLogger('app_logger')
-
-
-class OCRLattice:
-    """Lattice, but for images.
-
-    Parameters
-    ----------
-    table_area : list
-        List of strings of the form x1,y1,x2,y2 where
-        (x1, y1) -> left-top and (x2, y2) -> right-bottom in OpenCV's
-        coordinate space, denoting table areas to analyze.
-        (optional, default: None)
-
-    mtol : list
-        List of ints specifying m-tolerance parameters.
-        (optional, default: [2])
-
-    blocksize : int
-        Size of a pixel neighborhood that is used to calculate a
-        threshold value for the pixel: 3, 5, 7, and so on.
-        (optional, default: 15)
-
-    threshold_constant : float
-        Constant subtracted from the mean or weighted mean
-        (see the details below). Normally, it is positive but may be
-        zero or negative as well.
-        (optional, default: -2)
-
-    dpi : int
-        Dots per inch.
-        (optional, default: 300)
-
-    layout : int
-        Tesseract page segmentation mode.
-        (optional, default: 7)
-
-    lang : string
-        Language to be used for OCR.
-        (optional, default: 'eng')
-
-    scale : int
-        Used to divide the height/width of a pdf to get a structuring
-        element for image processing.
-        (optional, default: 15)
-
-    iterations : int
-        Number of iterations for dilation.
-        (optional, default: 0)
-
-    debug : string
-        {'contour', 'line', 'joint', 'table'}
-        Set to one of the above values to generate a matplotlib plot
-        of detected contours, lines, joints and the table generated.
-        (optional, default: None)
-    """
-    def __init__(self, table_area=None, mtol=[2], blocksize=15, threshold_constant=-2,
-                 dpi=300, layout=7, lang="eng", scale=15, iterations=0, debug=None):
-
-        self.method = 'ocrl'
-        self.table_area = table_area
-        self.mtol = mtol
-        self.blocksize = blocksize
-        self.threshold_constant = threshold_constant
-        self.tool = pyocr.get_available_tools()[0] # fix this
-        self.dpi = dpi
-        self.layout = layout
-        self.lang = lang
-        self.scale = scale
-        self.iterations = iterations
-        self.debug = debug
-
-    def get_tables(self, pdfname):
-        if self.tool is None:
-            return None
-
-        bname, __ = os.path.splitext(pdfname)
-        imagename = ''.join([bname, '.png'])
-        logger.info('Processing {0}.'.format(os.path.basename(bname)))
-
-        gs_call = [
-            "-q", "-sDEVICE=png16m", "-o", imagename, "-r{0}".format(self.dpi),
-            pdfname
-        ]
-        if "ghostscript" in subprocess.check_output(["gs", "-version"]).lower():
-            gs_call.insert(0, "gs")
-        else:
-            gs_call.insert(0, "gsc")
-        subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
-            stderr=subprocess.STDOUT)
-
-        img, threshold = adaptive_threshold(imagename, blocksize=self.blocksize,
-            c=self.threshold_constant)
-        vmask, v_segments = find_lines(threshold, direction='vertical',
-            scale=self.scale, iterations=self.iterations)
-        hmask, h_segments = find_lines(threshold, direction='horizontal',
-            scale=self.scale, iterations=self.iterations)
-
-        if self.table_area is not None:
-            areas = []
-            for area in self.table_area:
-                x1, y1, x2, y2 = area.split(",")
-                x1 = int(float(x1))
-                y1 = int(float(y1))
-                x2 = int(float(x2))
-                y2 = int(float(y2))
-                areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
-            table_bbox = find_table_joints(areas, vmask, hmask)
-        else:
-            contours = find_table_contours(vmask, hmask)
-            table_bbox = find_table_joints(contours, vmask, hmask)
-
-        if self.debug:
-            self.debug_images = (img, table_bbox)
-            self.debug_segments = (v_segments, h_segments)
-            self.debug_tables = []
-
-        if len(self.mtol) == 1 and self.mtol[0] == 2:
-            mtolerance = copy.deepcopy(self.mtol) * len(table_bbox)
-        else:
-            mtolerance = copy.deepcopy(self.mtol)
-
-        page = {}
-        tables = {}
-        table_no = 0
-        for k in sorted(table_bbox.keys(), key=lambda x: x[1]):
-            table_data = {}
-            cols, rows = zip(*table_bbox[k])
-            cols, rows = list(cols), list(rows)
-            cols.extend([k[0], k[2]])
-            rows.extend([k[1], k[3]])
-            cols = merge_close_values(sorted(cols), mtol=mtolerance[table_no])
-            rows = merge_close_values(sorted(rows, reverse=True), mtol=mtolerance[table_no])
-            cols = [(cols[i], cols[i + 1])
-                    for i in range(0, len(cols) - 1)]
-            rows = [(rows[i], rows[i + 1])
-                    for i in range(0, len(rows) - 1)]
-            table = Table(cols, rows)
-            if self.debug:
-                self.debug_tables.append(table)
-            table.image = img[k[3]:k[1],k[0]:k[2]]
-            for i in range(len(table.cells)):
-                for j in range(len(table.cells[i])):
-                    x1 = int(table.cells[i][j].x1)
-                    y1 = int(table.cells[i][j].y1)
-                    x2 = int(table.cells[i][j].x2)
-                    y2 = int(table.cells[i][j].y2)
-                    table.cells[i][j].image = img[y1:y2,x1:x2]
-                    text = self.tool.image_to_string(
-                        Image.fromarray(table.cells[i][j].image),
-                        lang=self.lang,
-                        builder=pyocr.builders.TextBuilder(tesseract_layout=self.layout)
-                    )
-                    table.cells[i][j].add_text(text)
-            ar = table.get_list()
-            ar.reverse()
-            ar = encode_list(ar)
-            table_data['data'] = ar
-            tables['table-{0}'.format(table_no + 1)] = table_data
-            table_no += 1
-        page[os.path.basename(bname)] = tables
-
-        if self.debug:
-            return None
-
-        return page
-
-
-class OCRStream:
-    """Stream, but for images.
-
-    Parameters
-    ----------
-    table_area : list
-        List of strings of the form x1,y1,x2,y2 where
-        (x1, y1) -> left-top and (x2, y2) -> right-bottom in OpenCV's
-        coordinate space, denoting table areas to analyze.
-        (optional, default: None)
-
-    columns : list
-        List of strings where each string is comma-separated values of
-        x-coordinates in OpenCV's coordinate space.
-        (optional, default: None)
-
-    blocksize : int
-        Size of a pixel neighborhood that is used to calculate a
-        threshold value for the pixel: 3, 5, 7, and so on.
-        (optional, default: 15)
-
-    threshold_constant : float
-        Constant subtracted from the mean or weighted mean
-        (see the details below). Normally, it is positive but may be
-        zero or negative as well.
-        (optional, default: -2)
-
-    dpi : int
-        Dots per inch.
-        (optional, default: 300)
-
-    layout : int
-        Tesseract page segmentation mode.
-        (optional, default: 7)
-
-    lang : string
-        Language to be used for OCR.
-        (optional, default: 'eng')
-
-    line_scale : int
-        Line scaling factor.
-        (optional, default: 15)
-
-    char_scale : int
-        Char scaling factor.
-        (optional, default: 200)
-    """
-    def __init__(self, table_area=None, columns=None, blocksize=15,
-                 threshold_constant=-2, dpi=300, layout=7, lang="eng",
-                 line_scale=15, char_scale=200, debug=False):
-
-        self.method = 'ocrs'
-        self.table_area = table_area
-        self.columns = columns
-        self.blocksize = blocksize
-        self.threshold_constant = threshold_constant
-        self.tool = pyocr.get_available_tools()[0] # fix this
-        self.dpi = dpi
-        self.layout = layout
-        self.lang = lang
-        self.line_scale = line_scale
-        self.char_scale = char_scale
-        self.debug = debug
-
-    def get_tables(self, pdfname):
-        if self.tool is None:
-            return None
-
-        bname, __ = os.path.splitext(pdfname)
-        imagename = ''.join([bname, '.png'])
-        logger.info('Processing {0}.'.format(os.path.basename(bname)))
-
-        gs_call = [
-            "-q", "-sDEVICE=png16m", "-o", imagename, "-r{0}".format(self.dpi),
-            pdfname
-        ]
-        if "ghostscript" in subprocess.check_output(["gs", "-version"]).lower():
-            gs_call.insert(0, "gs")
-        else:
-            gs_call.insert(0, "gsc")
-        subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
-            stderr=subprocess.STDOUT)
-
-        img, threshold = adaptive_threshold(imagename, blocksize=self.blocksize,
-            c=self.threshold_constant)
-        threshold = remove_lines(threshold, line_scale=self.line_scale)
-        height, width = threshold.shape
-        if self.debug:
-            self.debug_images = img
-            return None
-
-        if self.table_area is not None:
-            if self.columns is not None:
-                if len(self.table_area) != len(self.columns):
-                    raise ValueError("{0}: Length of table area and columns"
-                                     " should be equal.".format(os.path.basename(bname)))
-
-            table_bbox = {}
-            for area in self.table_area:
-                x1, y1, x2, y2 = area.split(",")
-                x1 = int(float(x1))
-                y1 = int(float(y1))
-                x2 = int(float(x2))
-                y2 = int(float(y2))
-                table_bbox[(x1, y1, x2, y2)] = None
-        else:
-            table_bbox = {(0, 0, width, height): None}
-
-        page = {}
-        tables = {}
-        table_no = 0
-        for k in sorted(table_bbox.keys(), key=lambda x: x[1]):
-            if self.columns is None:
-                raise NotImplementedError
-            else:
-                table_data = {}
-                table_image = threshold[k[1]:k[3],k[0]:k[2]]
-                cols = self.columns[table_no].split(',')
-                cols = [float(c) for c in cols]
-                cols.insert(0, k[0])
-                cols.append(k[2])
-                cols = [(cols[i] - k[0], cols[i + 1] - k[0]) for i in range(0, len(cols) - 1)]
-                y_cuts = find_cuts(table_image, char_scale=self.char_scale)
-                rows = [(y_cuts[i], y_cuts[i + 1]) for i in range(0, len(y_cuts) - 1)]
-                table = Table(cols, rows)
-                for i in range(len(table.cells)):
-                    for j in range(len(table.cells[i])):
-                        x1 = int(table.cells[i][j].x1)
-                        y1 = int(table.cells[i][j].y1)
-                        x2 = int(table.cells[i][j].x2)
-                        y2 = int(table.cells[i][j].y2)
-                        table.cells[i][j].image = table_image[y1:y2,x1:x2]
-                        cell_image = Image.fromarray(table.cells[i][j].image)
-                        text = self.tool.image_to_string(
-                            cell_image,
-                            lang=self.lang,
-                            builder=pyocr.builders.TextBuilder(tesseract_layout=self.layout)
-                        )
-                        table.cells[i][j].add_text(text)
-                ar = table.get_list()
-                ar.reverse()
-                ar = encode_list(ar)
-                table_data['data'] = ar
-                tables['table-{0}'.format(table_no + 1)] = table_data
-                table_no += 1
-        page[os.path.basename(bname)] = tables
-
-        return page
\ No newline at end of file
diff --git a/camelot/parsers/__init__.py b/camelot/parsers/__init__.py
new file mode 100644
index 0000000..e046b46
--- /dev/null
+++ b/camelot/parsers/__init__.py
@@ -0,0 +1,2 @@
+from .stream import Stream
+from .lattice import Lattice
\ No newline at end of file
diff --git a/camelot/parsers/base.py b/camelot/parsers/base.py
new file mode 100644
index 0000000..3ffe146
--- /dev/null
+++ b/camelot/parsers/base.py
@@ -0,0 +1,21 @@
+import os
+
+from ..core import Geometry
+from ..utils import get_page_layout, get_text_objects
+
+
+class BaseParser(object):
+    """Defines a base parser.
+    """
+    def _generate_layout(self, filename):
+        self.filename = filename
+        self.layout, self.dimensions = get_page_layout(
+            self.filename,
+            char_margin=self.char_margin,
+            line_margin=self.line_margin,
+            word_margin=self.word_margin)
+        self.horizontal_text = get_text_objects(self.layout, ltype="lh")
+        self.vertical_text = get_text_objects(self.layout, ltype="lv")
+        self.pdf_width, self.pdf_height = self.dimensions
+        self.rootname, __ = os.path.splitext(self.filename)
+        self.g = Geometry()
\ No newline at end of file
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
new file mode 100644
index 0000000..40a9040
--- /dev/null
+++ b/camelot/parsers/lattice.py
@@ -0,0 +1,336 @@
+from __future__ import division
+import os
+import copy
+import logging
+import subprocess
+
+import numpy as np
+import pandas as pd
+
+from .base import BaseParser
+from ..core import Table
+from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
+                     merge_close_lines, get_table_index, compute_accuracy,
+                     compute_whitespace, setup_logging, encode_)
+from ..image_processing import (adaptive_threshold, find_lines,
+                                find_table_contours, find_table_joints)
+
+
+logger = setup_logging(__name__)
+
+
+class Lattice(BaseParser):
+    """Lattice method of parsing looks for lines between text
+    to form a table.
+
+    Parameters
+    ----------
+    table_area : list, optional (default: None)
+        List of table areas to analyze as strings of the form
+        x1,y1,x2,y2 where (x1, y1) -> left-top and
+        (x2, y2) -> right-bottom in pdf coordinate space.
+    process_background : bool, optional (default: False)
+        Whether or not to process lines that are in background.
+    line_size_scaling : int, optional (default: 15)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
+
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
+    copy_text : list, optional (default: None)
+        {'h', 'v'}
+        Select one or more strings from above and pass them as a list
+        to specify the direction in which text should be copied over
+        when a cell spans multiple rows or columns.
+    shift_text : list, optional (default: ['l', 't'])
+        {'l', 'r', 't', 'b'}
+        Select one or more strings from above and pass them as a list
+        to specify where the text in a spanning cell should flow.
+    split_text : bool, optional (default: False)
+        Whether or not to split a text line if it spans across
+        multiple cells.
+    flag_size : bool, optional (default: False)
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+    line_close_tol : int, optional (default: 2)
+        Tolerance parameter used to merge vertical and horizontal
+        detected lines which lie close to each other.
+    joint_close_tol : int, optional (default: 2)
+        Tolerance parameter used to decide whether the detected lines
+        and points lie close to each other.
+    threshold_blocksize : int, optional (default: 15)
+        Size of a pixel neighborhood that is used to calculate a
+        threshold value for the pixel: 3, 5, 7, and so on.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    threshold_constant : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    iterations : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
+
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+    margins : tuple
+        PDFMiner margins. (char_margin, line_margin, word_margin)
+
+        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+    debug : bool, optional (default: False)
+        Whether or not to return all text objects on the page
+        which can be used to generate a matplotlib plot, to get
+        values for table_area(s) and debugging.
+
+    """
+    def __init__(self, table_area=None, process_background=False,
+                 line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
+                 split_text=False, flag_size=False, line_close_tol=2,
+                 joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
+                 iterations=0, margins=(1.0, 0.5, 0.1), debug=False):
+        self.table_area = table_area
+        self.process_background = process_background
+        self.line_size_scaling = line_size_scaling
+        self.copy_text = copy_text
+        self.shift_text = shift_text
+        self.split_text = split_text
+        self.flag_size = flag_size
+        self.line_close_tol = line_close_tol
+        self.joint_close_tol = joint_close_tol
+        self.threshold_blocksize = threshold_blocksize
+        self.threshold_constant = threshold_constant
+        self.iterations = iterations
+        self.char_margin, self.line_margin, self.word_margin = margins
+        self.debug = debug
+
+    @staticmethod
+    def _reduce_index(t, idx, shift_text):
+        """Reduces index of a text object if it lies within a spanning
+        cell.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+        idx : list
+            List of tuples of the form (r_idx, c_idx, text).
+        shift_text : list
+            {'l', 'r', 't', 'b'}
+            Select one or more strings from above and pass them as a
+            list to specify where the text in a spanning cell should
+            flow.
+
+        Returns
+        -------
+        indices : list
+            List of tuples of the form (r_idx, c_idx, text) where
+            r_idx and c_idx are new row and column indices for text.
+
+        """
+        indices = []
+        for r_idx, c_idx, text in idx:
+            for d in shift_text:
+                if d == 'l':
+                    if t.cells[r_idx][c_idx].hspan:
+                        while not t.cells[r_idx][c_idx].left:
+                            c_idx -= 1
+                if d == 'r':
+                    if t.cells[r_idx][c_idx].hspan:
+                        while not t.cells[r_idx][c_idx].right:
+                            c_idx += 1
+                if d == 't':
+                    if t.cells[r_idx][c_idx].vspan:
+                        while not t.cells[r_idx][c_idx].top:
+                            r_idx -= 1
+                if d == 'b':
+                    if t.cells[r_idx][c_idx].vspan:
+                        while not t.cells[r_idx][c_idx].bottom:
+                            r_idx += 1
+            indices.append((r_idx, c_idx, text))
+        return indices
+
+    @staticmethod
+    def _copy_spanning_text(t, copy_text=None):
+        """Copies over text in empty spanning cells.
+
+        Parameters
+        ----------
+        t : camelot.core.Table
+        copy_text : list, optional (default: None)
+            {'h', 'v'}
+            Select one or more strings from above and pass them as a list
+            to specify the direction in which text should be copied over
+            when a cell spans multiple rows or columns.
+
+        Returns
+        -------
+        t : camelot.core.Table
+
+        """
+        for f in copy_text:
+            if f == "h":
+                for i in range(len(t.cells)):
+                    for j in range(len(t.cells[i])):
+                        if t.cells[i][j].text.strip() == '':
+                            if t.cells[i][j].hspan and not t.cells[i][j].left:
+                                t.cells[i][j].text = t.cells[i][j - 1].text
+            elif f == "v":
+                for i in range(len(t.cells)):
+                    for j in range(len(t.cells[i])):
+                        if t.cells[i][j].text.strip() == '':
+                            if t.cells[i][j].vspan and not t.cells[i][j].top:
+                                t.cells[i][j].text = t.cells[i - 1][j].text
+        return t
+
+    def _generate_image(self):
+        self.imagename = ''.join([self.rootname, '.png'])
+        gs_call = [
+            "-q", "-sDEVICE=png16m", "-o", self.imagename, "-r600", self.filename
+        ]
+        if "ghostscript" in subprocess.check_output(["gs", "-version"]).lower():
+            gs_call.insert(0, "gs")
+        else:
+            gs_call.insert(0, "gsc")
+        subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
+            stderr=subprocess.STDOUT)
+
+    def _generate_table_bbox(self):
+        self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
+            blocksize=self.threshold_blocksize, c=self.threshold_constant)
+        image_width = self.image.shape[1]
+        image_height = self.image.shape[0]
+        image_width_scaler = image_width / float(self.pdf_width)
+        image_height_scaler = image_height / float(self.pdf_height)
+        pdf_width_scaler = self.pdf_width / float(image_width)
+        pdf_height_scaler = self.pdf_height / float(image_height)
+        image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
+        pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
+
+        vertical_mask, vertical_segments = find_lines(
+            self.threshold, direction='vertical',
+            line_size_scaling=self.line_size_scaling, iterations=self.iterations)
+        horizontal_mask, horizontal_segments = find_lines(
+            self.threshold, direction='horizontal',
+            line_size_scaling=self.line_size_scaling, iterations=self.iterations)
+
+        if self.table_area is not None:
+            areas = []
+            for area in self.table_area:
+                x1, y1, x2, y2 = area.split(",")
+                x1 = float(x1)
+                y1 = float(y1)
+                x2 = float(x2)
+                y2 = float(y2)
+                x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers)
+                areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
+            table_bbox = find_table_joints(areas, vertical_mask, horizontal_mask)
+        else:
+            contours = find_table_contours(vertical_mask, horizontal_mask)
+            table_bbox = find_table_joints(contours, vertical_mask, horizontal_mask)
+
+        self.table_bbox_unscaled = copy.deepcopy(table_bbox)
+
+        self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
+            table_bbox, vertical_segments, horizontal_segments, pdf_scalers)
+
+    def _generate_columns_and_rows(self, table_idx, tk):
+        # select elements which lie within table_bbox
+        t_bbox = {}
+        v_s, h_s = segments_in_bbox(
+            tk, self.vertical_segments, self.horizontal_segments)
+        t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
+        t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
+        self.t_bbox = t_bbox
+
+        for direction in t_bbox:
+            t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
+
+        cols, rows = zip(*self.table_bbox[tk])
+        cols, rows = list(cols), list(rows)
+        cols.extend([tk[0], tk[2]])
+        rows.extend([tk[1], tk[3]])
+        # sort horizontal and vertical segments
+        cols = merge_close_lines(
+            sorted(cols), line_close_tol=self.line_close_tol)
+        rows = merge_close_lines(
+            sorted(rows, reverse=True), line_close_tol=self.line_close_tol)
+        # make grid using x and y coord of shortlisted rows and cols
+        cols = [(cols[i], cols[i + 1])
+                for i in range(0, len(cols) - 1)]
+        rows = [(rows[i], rows[i + 1])
+                for i in range(0, len(rows) - 1)]
+
+        return cols, rows, v_s, h_s
+
+    def _generate_table(self, table_idx, cols, rows, **kwargs):
+        v_s = kwargs.get('v_s')
+        h_s = kwargs.get('h_s')
+        if v_s is None or h_s is None:
+            raise ValueError('No segments found on {}'.format(self.rootname))
+
+        table = Table(cols, rows)
+        # set table edges to True using ver+hor lines
+        table = table.set_edges(v_s, h_s, joint_close_tol=self.joint_close_tol)
+        # set table border edges to True
+        table = table.set_border()
+        # set spanning cells to True
+        table = table.set_span()
+
+        pos_errors = []
+        for direction in self.t_bbox:
+            for t in self.t_bbox[direction]:
+                indices, error = get_table_index(
+                    table, t, direction, split_text=self.split_text,
+                    flag_size=self.flag_size)
+                if indices[:2] != (-1, -1):
+                    pos_errors.append(error)
+                    indices = Lattice._reduce_index(table, indices, shift_text=self.shift_text)
+                    for r_idx, c_idx, text in indices:
+                        table.cells[r_idx][c_idx].text = text
+        accuracy = compute_accuracy([[100, pos_errors]])
+
+        if self.copy_text is not None:
+            table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
+
+        data = table.data
+        data = encode_(data)
+        table.df = pd.DataFrame(data)
+        table.shape = table.df.shape
+
+        whitespace = compute_whitespace(data)
+        table.accuracy = accuracy
+        table.whitespace = whitespace
+        table.order = table_idx + 1
+        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
+
+        return table
+
+    def extract_tables(self, filename):
+        logger.info('Processing {}'.format(os.path.basename(filename)))
+        self._generate_layout(filename)
+
+        if not self.horizontal_text:
+            logger.info("No tables found on {}".format(
+                os.path.basename(self.rootname)))
+            return [], self.g
+
+        self._generate_image()
+        self._generate_table_bbox()
+
+        _tables = []
+        # sort tables based on y-coord
+        for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
+                key=lambda x: x[1], reverse=True)):
+            cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
+            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
+            _tables.append(table)
+
+        if self.debug:
+            text = []
+            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+            self.g.text = text
+            self.g.images = (self.image, self.table_bbox_unscaled)
+            self.g.segments = (self.vertical_segments, self.horizontal_segments)
+            self.g.tables = _tables
+
+        return _tables, self.g
\ No newline at end of file
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
new file mode 100644
index 0000000..f547bf0
--- /dev/null
+++ b/camelot/parsers/stream.py
@@ -0,0 +1,370 @@
+from __future__ import division
+import os
+import logging
+
+import numpy as np
+import pandas as pd
+
+from .base import BaseParser
+from ..core import Table
+from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
+                     compute_whitespace, setup_logging, encode_)
+
+
+logger = setup_logging(__name__)
+
+
+class Stream(BaseParser):
+    """Stream method of parsing looks for spaces between text
+    to form a table.
+
+    If you want to specify columns when specifying multiple table
+    areas, make sure that the length of both lists are equal.
+
+    Parameters
+    ----------
+    table_area : list, optional (default: None)
+        List of table areas to analyze as strings of the form
+        x1,y1,x2,y2 where (x1, y1) -> left-top and
+        (x2, y2) -> right-bottom in pdf coordinate space.
+    columns : list, optional (default: None)
+        List of column x-coordinates as strings where the coordinates
+        are comma-separated.
+    split_text : bool, optional (default: False)
+        Whether or not to split a text line if it spans across
+        multiple cells.
+    flag_size : bool, optional (default: False)
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+    row_close_tol : int, optional (default: 2)
+        Rows will be formed by combining text vertically
+        within this tolerance.
+    col_close_tol : int, optional (default: 0)
+        Columns will be formed by combining text horizontally
+        within this tolerance.
+    margins : tuple, optional (default: (1.0, 0.5, 0.1))
+        PDFMiner margins. (char_margin, line_margin, word_margin)
+
+        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+    debug : bool, optional (default: False)
+        Whether or not to return all text objects on the page
+        which can be used to generate a matplotlib plot, to get
+        values for table_area(s), columns and debugging.
+
+    """
+    def __init__(self, table_area=None, columns=None, split_text=False,
+                 flag_size=False, row_close_tol=2, col_close_tol=0,
+                 margins=(1.0, 0.5, 0.1), debug=False):
+        self.table_area = table_area
+        self.columns = columns
+        self._validate_columns()
+        self.split_text = split_text
+        self.flag_size = flag_size
+        self.row_close_tol = row_close_tol
+        self.col_close_tol = col_close_tol
+        self.char_margin, self.line_margin, self.word_margin = margins
+        self.debug = debug
+
+    @staticmethod
+    def _text_bbox(t_bbox):
+        """Returns bounding box for the text present on a page.
+
+        Parameters
+        ----------
+        t_bbox : dict
+            Dict with two keys 'horizontal' and 'vertical' with lists of
+            LTTextLineHorizontals and LTTextLineVerticals respectively.
+
+        Returns
+        -------
+        text_bbox : tuple
+            Tuple (x0, y0, x1, y1) in pdf coordinate space.
+
+        """
+        xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
+        ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
+        xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
+        ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
+        text_bbox = (xmin, ymin, xmax, ymax)
+        return text_bbox
+
+    @staticmethod
+    def _group_rows(text, row_close_tol=2):
+        """Groups PDFMiner text objects into rows vertically
+        within a tolerance.
+
+        Parameters
+        ----------
+        text : list
+            List of PDFMiner text objects.
+        row_close_tol : int, optional (default: 2)
+
+        Returns
+        -------
+        rows : list
+            Two-dimensional list of text objects grouped into rows.
+
+        """
+        row_y = 0
+        rows = []
+        temp = []
+        for t in text:
+            # is checking for upright necessary?
+            # if t.get_text().strip() and all([obj.upright for obj in t._objs if
+            # type(obj) is LTChar]):
+            if t.get_text().strip():
+                if not np.isclose(row_y, t.y0, atol=row_close_tol):
+                    rows.append(sorted(temp, key=lambda t: t.x0))
+                    temp = []
+                    row_y = t.y0
+                temp.append(t)
+        rows.append(sorted(temp, key=lambda t: t.x0))
+        __ = rows.pop(0) # hacky
+        return rows
+
+    @staticmethod
+    def _merge_columns(l, col_close_tol=0):
+        """Merges column boundaries horizontally if they overlap
+        or lie within a tolerance.
+
+        Parameters
+        ----------
+        l : list
+            List of column x-coordinate tuples.
+        col_close_tol : int, optional (default: 0)
+
+        Returns
+        -------
+        merged : list
+            List of merged column x-coordinate tuples.
+
+        """
+        merged = []
+        for higher in l:
+            if not merged:
+                merged.append(higher)
+            else:
+                lower = merged[-1]
+                if col_close_tol >= 0:
+                    if (higher[0] <= lower[1] or
+                            np.isclose(higher[0], lower[1], atol=col_close_tol)):
+                        upper_bound = max(lower[1], higher[1])
+                        lower_bound = min(lower[0], higher[0])
+                        merged[-1] = (lower_bound, upper_bound)
+                    else:
+                        merged.append(higher)
+                elif col_close_tol < 0:
+                    if higher[0] <= lower[1]:
+                        if np.isclose(higher[0], lower[1], atol=abs(col_close_tol)):
+                            merged.append(higher)
+                        else:
+                            upper_bound = max(lower[1], higher[1])
+                            lower_bound = min(lower[0], higher[0])
+                            merged[-1] = (lower_bound, upper_bound)
+                    else:
+                        merged.append(higher)
+        return merged
+
+    @staticmethod
+    def _join_rows(rows_grouped, text_y_max, text_y_min):
+        """Makes row coordinates continuous.
+
+        Parameters
+        ----------
+        rows_grouped : list
+            Two-dimensional list of text objects grouped into rows.
+        text_y_max : int
+        text_y_min : int
+
+        Returns
+        -------
+        rows : list
+            List of continuous row y-coordinate tuples.
+
+        """
+        row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r)
+                    if len(r) > 0 else 0 for r in rows_grouped]
+        rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
+        rows.insert(0, text_y_max)
+        rows.append(text_y_min)
+        rows = [(rows[i], rows[i + 1])
+                for i in range(0, len(rows) - 1)]
+        return rows
+
+    @staticmethod
+    def _add_columns(cols, text, row_close_tol):
+        """Adds columns to existing list by taking into account
+        the text that lies outside the current column x-coordinates.
+
+        Parameters
+        ----------
+        cols : list
+            List of column x-coordinate tuples.
+        text : list
+            List of PDFMiner text objects.
+        ytol : int
+
+        Returns
+        -------
+        cols : list
+            Updated list of column x-coordinate tuples.
+
+        """
+        if text:
+            text = Stream._group_rows(text, row_close_tol=row_close_tol)
+            elements = [len(r) for r in text]
+            new_cols = [(t.x0, t.x1)
+                for r in text if len(r) == max(elements) for t in r]
+            cols.extend(Stream._merge_columns(sorted(new_cols)))
+        return cols
+
+    @staticmethod
+    def _join_columns(cols, text_x_min, text_x_max):
+        """Makes column coordinates continuous.
+
+        Parameters
+        ----------
+        cols : list
+            List of column x-coordinate tuples.
+        text_x_min : int
+        text_y_max : int
+
+        Returns
+        -------
+        cols : list
+            Updated list of column x-coordinate tuples.
+
+        """
+        cols = sorted(cols)
+        cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
+        cols.insert(0, text_x_min)
+        cols.append(text_x_max)
+        cols = [(cols[i], cols[i + 1])
+                for i in range(0, len(cols) - 1)]
+        return cols
+
+    def _validate_columns(self):
+        if self.table_area is not None and self.columns is not None:
+            if len(self.table_area) != len(self.columns):
+                raise ValueError("Length of table_area and columns"
+                                 " should be equal")
+
+    def _generate_table_bbox(self):
+        if self.table_area is not None:
+            table_bbox = {}
+            for area in self.table_area:
+                x1, y1, x2, y2 = area.split(",")
+                x1 = float(x1)
+                y1 = float(y1)
+                x2 = float(x2)
+                y2 = float(y2)
+                table_bbox[(x1, y2, x2, y1)] = None
+        else:
+            table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
+        self.table_bbox = table_bbox
+
+    def _generate_columns_and_rows(self, table_idx, tk):
+        # select elements which lie within table_bbox
+        t_bbox = {}
+        t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
+        t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
+        self.t_bbox = t_bbox
+
+        for direction in self.t_bbox:
+            self.t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
+
+        text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
+        rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
+        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
+        elements = [len(r) for r in rows_grouped]
+
+        if self.columns is not None and self.columns[table_idx] != "":
+            # user has to input boundary columns too
+            # take (0, pdf_width) by default
+            # similar to else condition
+            # len can't be 1
+            cols = self.columns[table_idx].split(',')
+            cols = [float(c) for c in cols]
+            cols.insert(0, text_x_min)
+            cols.append(text_x_max)
+            cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
+        else:
+            ncols = max(set(elements), key=elements.count)
+            if ncols == 1:
+                logger.info("No tables found on {}".format(
+                    os.path.basename(self.rootname)))
+            cols = [(t.x0, t.x1)
+                for r in rows_grouped if len(r) == ncols for t in r]
+            cols = self._merge_columns(sorted(cols), col_close_tol=self.col_close_tol)
+            inner_text = []
+            for i in range(1, len(cols)):
+                left = cols[i - 1][1]
+                right = cols[i][0]
+                inner_text.extend([t for direction in self.t_bbox
+                                     for t in self.t_bbox[direction]
+                                     if t.x0 > left and t.x1 < right])
+            outer_text = [t for direction in self.t_bbox
+                            for t in self.t_bbox[direction]
+                            if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
+            inner_text.extend(outer_text)
+            cols = self._add_columns(cols, inner_text, self.row_close_tol)
+            cols = self._join_columns(cols, text_x_min, text_x_max)
+
+        return cols, rows
+
+    def _generate_table(self, table_idx, cols, rows, **kwargs):
+        table = Table(cols, rows)
+        table = table.set_all_edges()
+        pos_errors = []
+        for direction in self.t_bbox:
+            for t in self.t_bbox[direction]:
+                indices, error = get_table_index(
+                    table, t, direction, split_text=self.split_text,
+                    flag_size=self.flag_size)
+                if indices[:2] != (-1, -1):
+                    pos_errors.append(error)
+                    for r_idx, c_idx, text in indices:
+                        table.cells[r_idx][c_idx].text = text
+        accuracy = compute_accuracy([[100, pos_errors]])
+
+        data = table.data
+        data = encode_(data)
+        table.df = pd.DataFrame(data)
+        table.shape = table.df.shape
+
+        whitespace = compute_whitespace(data)
+        table.accuracy = accuracy
+        table.whitespace = whitespace
+        table.order = table_idx + 1
+        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
+
+        return table
+
+    def extract_tables(self, filename):
+        logger.info('Processing {}'.format(os.path.basename(filename)))
+        self._generate_layout(filename)
+
+        if not self.horizontal_text:
+            logger.info("No tables found on {}".format(
+                os.path.basename(self.rootname)))
+            return [], self.g
+
+        self._generate_table_bbox()
+
+        _tables = []
+        # sort tables based on y-coord
+        for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
+                key=lambda x: x[1], reverse=True)):
+            cols, rows = self._generate_columns_and_rows(table_idx, tk)
+            table = self._generate_table(table_idx, cols, rows)
+            _tables.append(table)
+
+        if self.debug:
+            text = []
+            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+            text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+            self.g.text = text
+            self.g.tables = _tables
+
+        return _tables, self.g
\ No newline at end of file
diff --git a/camelot/pdf.py b/camelot/pdf.py
deleted file mode 100644
index 08fd26c..0000000
--- a/camelot/pdf.py
+++ /dev/null
@@ -1,268 +0,0 @@
-import os
-import shutil
-import tempfile
-import itertools
-import multiprocessing as mp
-from functools import partial
-
-import cv2
-from PyPDF2 import PdfFileReader, PdfFileWriter
-
-from .utils import get_page_layout, get_text_objects, get_rotation
-
-
-__all__ = ['Pdf']
-
-
-def _parse_page_numbers(pagenos):
-    """Converts list of dicts to list of ints.
-
-    Parameters
-    ----------
-    pagenos : list
-        List of dicts representing page ranges. A dict must have only
-        two keys named 'start' and 'end' having int as their value.
-
-    Returns
-    -------
-    page_numbers : list
-        List of int page numbers.
-    """
-    page_numbers = []
-    for p in pagenos:
-        page_numbers.extend(range(p['start'], p['end'] + 1))
-    page_numbers = sorted(set(page_numbers))
-    return page_numbers
-
-
-def _save_page(temp, pdfname, pageno):
-    with open(pdfname, 'rb') as pdffile:
-        infile = PdfFileReader(pdffile, strict=False)
-        sp_path = os.path.join(temp, 'page-{0}.pdf'.format(pageno))
-        sp_name, sp_ext = os.path.splitext(sp_path)
-        page = infile.getPage(pageno - 1)
-        outfile = PdfFileWriter()
-        outfile.addPage(page)
-        with open(sp_path, 'wb') as f:
-            outfile.write(f)
-        layout, dim = get_page_layout(sp_path)
-        lttextlh = get_text_objects(layout, ltype="lh")
-        lttextlv = get_text_objects(layout, ltype="lv")
-        ltchar = get_text_objects(layout, ltype="char")
-        rotation = get_rotation(lttextlh, lttextlv, ltchar)
-        if rotation != '':
-            sp_new_path = ''.join([sp_name.replace('page', 'p'), '_rotated', sp_ext])
-            os.rename(sp_path, sp_new_path)
-            sp_in = PdfFileReader(open(sp_new_path, 'rb'),
-                strict=False)
-            sp_out = PdfFileWriter()
-            sp_page = sp_in.getPage(0)
-            if rotation == 'left':
-                sp_page.rotateClockwise(90)
-            elif rotation == 'right':
-                sp_page.rotateCounterClockwise(90)
-            sp_out.addPage(sp_page)
-            with open(sp_path, 'wb') as pdf_out:
-                sp_out.write(pdf_out)
-
-
-class Pdf:
-    """Pdf manager.
-    Handles all operations like temp directory creation, splitting file
-    into single page pdfs, running extraction using multiple processes
-    and removing the temp directory.
-
-    Parameters
-    ----------
-    extractor : object
-        camelot.stream.Stream or camelot.lattice.Lattice extractor
-        object.
-
-    pdfname : string
-        Path to pdf file.
-
-    pagenos : list
-        List of dicts representing page ranges. A dict must have only
-        two keys named 'start' and 'end' having int as their value.
-        (optional, default: [{'start': 1, 'end': 1}])
-
-    parallel : bool
-        Whether or not to run using multiple processes.
-        (optional, default: False)
-
-    clean : bool
-        Whether or not to remove the temp directory.
-        (optional, default: False)
-    """
-
-    def __init__(self, extractor, pdfname, pagenos=[{'start': 1, 'end': 1}],
-                 parallel=False, clean=False):
-
-        self.extractor = extractor
-        self.pdfname = pdfname
-        if not self.pdfname.endswith('.pdf'):
-            raise TypeError("File format not supported.")
-        self.pagenos = _parse_page_numbers(pagenos)
-        self.parallel = parallel
-        if self.parallel:
-            self.cpu_count = mp.cpu_count()
-            self.pool = mp.Pool(processes=self.cpu_count)
-        self.clean = clean
-        self.temp = tempfile.mkdtemp()
-
-    def split(self):
-        """Splits file into single page pdfs.
-        """
-        if self.parallel:
-            pfunc = partial(_save_page, self.temp, self.pdfname)
-            self.pool.map(pfunc, self.pagenos)
-        else:
-            for p in self.pagenos:
-                _save_page(self.temp, self.pdfname, p)
-
-
-    def extract(self):
-        """Runs table extraction by calling extractor.get_tables
-        on all single page pdfs.
-        """
-        self.split()
-        pages = [os.path.join(self.temp, 'page-{0}.pdf'.format(p))
-                 for p in self.pagenos]
-        if self.parallel:
-            tables = self.pool.map(self.extractor.get_tables, pages)
-            tables = {k: v for d in tables if d is not None for k, v in d.items()}
-        else:
-            tables = {}
-            if self.extractor.debug:
-                if self.extractor.method == 'stream':
-                    self.debug = self.extractor.debug
-                    self.debug_text = []
-                elif self.extractor.method in ['lattice', 'ocrl']:
-                    self.debug = self.extractor.debug
-                    self.debug_images = []
-                    self.debug_segments = []
-                    self.debug_tables = []
-                elif self.extractor.method == 'ocrs':
-                    self.debug = self.extractor.debug
-                    self.debug_images = []
-            for p in pages:
-                table = self.extractor.get_tables(p)
-                if table is not None:
-                    tables.update(table)
-                if self.extractor.debug:
-                    if self.extractor.method == 'stream':
-                        self.debug_text.append(self.extractor.debug_text)
-                    elif self.extractor.method in ['lattice', 'ocr']:
-                        self.debug_images.append(self.extractor.debug_images)
-                        self.debug_segments.append(self.extractor.debug_segments)
-                        self.debug_tables.append(self.extractor.debug_tables)
-                    elif self.extractor.method == 'ocrs':
-                        self.debug_images.append(self.extractor.debug_images)
-        if self.clean:
-            self.remove_tempdir()
-        return tables
-
-    def remove_tempdir(self):
-        """Removes temporary directory that was created to save single
-        page pdfs and their images.
-        """
-        shutil.rmtree(self.temp)
-
-    def debug_plot(self):
-        """Generates a matplotlib plot based on the selected extractor
-        debug option.
-        """
-        import matplotlib.pyplot as plt
-        import matplotlib.patches as patches
-
-        if self.debug is True:
-            if hasattr(self, 'debug_text'):
-                for text in self.debug_text:
-                    fig = plt.figure()
-                    ax = fig.add_subplot(111, aspect='equal')
-                    xs, ys = [], []
-                    for t in text:
-                        xs.extend([t[0], t[1]])
-                        ys.extend([t[2], t[3]])
-                        ax.add_patch(
-                            patches.Rectangle(
-                                (t[0], t[1]),
-                                t[2] - t[0],
-                                t[3] - t[1]
-                            )
-                        )
-                    ax.set_xlim(min(xs) - 10, max(xs) + 10)
-                    ax.set_ylim(min(ys) - 10, max(ys) + 10)
-                    plt.show()
-            elif hasattr(self, 'debug_images'):
-                for img in self.debug_images:
-                    plt.imshow(img)
-                    plt.show()
-        elif self.debug == 'contour':
-            try:
-                for img, table_bbox in self.debug_images:
-                    for t in table_bbox.keys():
-                        cv2.rectangle(img, (t[0], t[1]),
-                                      (t[2], t[3]), (255, 0, 0), 3)
-                    plt.imshow(img)
-                    plt.show()
-            except AttributeError:
-                raise ValueError("This option can only be used with Lattice.")
-        elif self.debug == 'joint':
-            try:
-                for img, table_bbox in self.debug_images:
-                    x_coord = []
-                    y_coord = []
-                    for k in table_bbox.keys():
-                        for coord in table_bbox[k]:
-                            x_coord.append(coord[0])
-                            y_coord.append(coord[1])
-                    max_x, max_y = max(x_coord), max(y_coord)
-                    plt.plot(x_coord, y_coord, 'ro')
-                    plt.axis([0, max_x + 100, max_y + 100, 0])
-                    plt.imshow(img)
-                    plt.show()
-            except AttributeError:
-                raise ValueError("This option can only be used with Lattice.")
-        elif self.debug == 'line':
-            try:
-                for v_s, h_s in self.debug_segments:
-                    for v in v_s:
-                        plt.plot([v[0], v[2]], [v[1], v[3]])
-                    for h in h_s:
-                        plt.plot([h[0], h[2]], [h[1], h[3]])
-                    plt.show()
-            except AttributeError:
-                raise ValueError("This option can only be used with Lattice.")
-        elif self.debug == 'table':
-            try:
-                for tables in self.debug_tables:
-                    for table in tables:
-                        for r in range(len(table.rows)):
-                            for c in range(len(table.cols)):
-                                if table.cells[r][c].left:
-                                    plt.plot([table.cells[r][c].lb[0],
-                                              table.cells[r][c].lt[0]],
-                                             [table.cells[r][c].lb[1],
-                                              table.cells[r][c].lt[1]])
-                                if table.cells[r][c].right:
-                                    plt.plot([table.cells[r][c].rb[0],
-                                              table.cells[r][c].rt[0]],
-                                             [table.cells[r][c].rb[1],
-                                              table.cells[r][c].rt[1]])
-                                if table.cells[r][c].top:
-                                    plt.plot([table.cells[r][c].lt[0],
-                                              table.cells[r][c].rt[0]],
-                                             [table.cells[r][c].lt[1],
-                                              table.cells[r][c].rt[1]])
-                                if table.cells[r][c].bottom:
-                                    plt.plot([table.cells[r][c].lb[0],
-                                              table.cells[r][c].rb[0]],
-                                             [table.cells[r][c].lb[1],
-                                              table.cells[r][c].rb[1]])
-                    plt.show()
-            except AttributeError:
-                raise ValueError("This option can only be used with Lattice.")
-        else:
-            raise UserWarning("This method can only be called after"
-                " debug has been specified.")
\ No newline at end of file
diff --git a/camelot/plotting.py b/camelot/plotting.py
new file mode 100644
index 0000000..2d0bb3c
--- /dev/null
+++ b/camelot/plotting.py
@@ -0,0 +1,174 @@
+import cv2
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+
+from .handlers import PDFHandler
+
+
+def plot_geometry(filepath, pages='1', mesh=False, geometry_type='text', **kwargs):
+    """Plot geometry found on pdf page based on type specified,
+    useful for debugging and playing with different parameters to get
+    the best output.
+
+    Note: kwargs annotated with ^ can only be used with mesh=False
+    and kwargs annotated with * can only be used with mesh=True.
+
+    Parameters
+    ----------
+    filepath : str
+        Path to pdf file.
+    pages : str
+        Comma-separated page numbers to parse.
+        Example: 1,3,4 or 1,4-end
+    mesh : bool (default: False)
+        Whether or not to use Lattice method of parsing. Stream
+        is used by default.
+    geometry_type : str, optional (default: 'text')
+        'text' : Plot text objects found on page, useful to get
+                 table_area and columns coordinates.
+        'table' : Plot parsed table.
+        'contour'* : Plot detected rectangles.
+        'joint'* : Plot detected line intersections.
+        'line'* : Plot detected lines.
+    table_area : list, optional (default: None)
+        List of table areas to analyze as strings of the form
+        x1,y1,x2,y2 where (x1, y1) -> left-top and
+        (x2, y2) -> right-bottom in pdf coordinate space.
+    columns^ : list, optional (default: None)
+        List of column x-coordinates as strings where the coordinates
+        are comma-separated.
+    split_text : bool, optional (default: False)
+        Whether or not to split a text line if it spans across
+        multiple cells.
+    flag_size : bool, optional (default: False)
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+    row_close_tol^ : int, optional (default: 2)
+        Rows will be formed by combining text vertically
+        within this tolerance.
+    col_close_tol^ : int, optional (default: 0)
+        Columns will be formed by combining text horizontally
+        within this tolerance.
+    process_background* : bool, optional (default: False)
+        Whether or not to process lines that are in background.
+    line_size_scaling* : int, optional (default: 15)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
+
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
+    copy_text* : list, optional (default: None)
+        {'h', 'v'}
+        Select one or more strings from above and pass them as a list
+        to specify the direction in which text should be copied over
+        when a cell spans multiple rows or columns.
+    shift_text* : list, optional (default: ['l', 't'])
+        {'l', 'r', 't', 'b'}
+        Select one or more strings from above and pass them as a list
+        to specify where the text in a spanning cell should flow.
+    line_close_tol* : int, optional (default: 2)
+        Tolerance parameter used to merge vertical and horizontal
+        detected lines which lie close to each other.
+    joint_close_tol* : int, optional (default: 2)
+        Tolerance parameter used to decide whether the detected lines
+        and points lie close to each other.
+    threshold_blocksize : int, optional (default: 15)
+        Size of a pixel neighborhood that is used to calculate a
+        threshold value for the pixel: 3, 5, 7, and so on.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    threshold_constant : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    iterations : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
+
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+    margins : tuple
+        PDFMiner margins. (char_margin, line_margin, word_margin)
+
+        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+    debug : bool, optional (default: False)
+        Whether or not to return all text objects on the page
+        which can be used to generate a matplotlib plot, to get
+        values for table_area(s) and debugging.
+
+    """
+    # validate kwargs?
+    p = PDFHandler(filepath, pages)
+    debug = True if geometry_type else False
+    kwargs.update({'debug': debug})
+    __, geometry = p.parse(mesh=mesh, **kwargs)
+
+    if geometry_type == 'text':
+        for text in geometry.text:
+            fig = plt.figure()
+            ax = fig.add_subplot(111, aspect='equal')
+            xs, ys = [], []
+            for t in text:
+                xs.extend([t[0], t[1]])
+                ys.extend([t[2], t[3]])
+                ax.add_patch(
+                    patches.Rectangle(
+                        (t[0], t[1]),
+                        t[2] - t[0],
+                        t[3] - t[1]
+                    )
+                )
+            ax.set_xlim(min(xs) - 10, max(xs) + 10)
+            ax.set_ylim(min(ys) - 10, max(ys) + 10)
+            plt.show()
+    elif geometry_type == 'table':
+        for tables in geometry.tables:
+            for table in tables:
+                for row in table.cells:
+                    for cell in row:
+                        if cell.left:
+                            plt.plot([cell.lb[0], cell.lt[0]],
+                                     [cell.lb[1], cell.lt[1]])
+                        if cell.right:
+                            plt.plot([cell.rb[0], cell.rt[0]],
+                                     [cell.rb[1], cell.rt[1]])
+                        if cell.top:
+                            plt.plot([cell.lt[0], cell.rt[0]],
+                                     [cell.lt[1], cell.rt[1]])
+                        if cell.bottom:
+                            plt.plot([cell.lb[0], cell.rb[0]],
+                                     [cell.lb[1], cell.rb[1]])
+            plt.show()
+    elif geometry_type == 'contour':
+        if not mesh:
+            raise ValueError("Use mesh=True")
+        for img, table_bbox in geometry.images:
+            for t in table_bbox.keys():
+                cv2.rectangle(img, (t[0], t[1]),
+                              (t[2], t[3]), (255, 0, 0), 3)
+            plt.imshow(img)
+            plt.show()
+    elif geometry_type == 'joint':
+        if not mesh:
+            raise ValueError("Use mesh=True")
+        for img, table_bbox in geometry.images:
+            x_coord = []
+            y_coord = []
+            for k in table_bbox.keys():
+                for coord in table_bbox[k]:
+                    x_coord.append(coord[0])
+                    y_coord.append(coord[1])
+            max_x, max_y = max(x_coord), max(y_coord)
+            plt.plot(x_coord, y_coord, 'ro')
+            plt.axis([0, max_x + 100, max_y + 100, 0])
+            plt.imshow(img)
+            plt.show()
+    elif geometry_type == 'line':
+        if not mesh:
+            raise ValueError("Use mesh=True")
+        for v_s, h_s in geometry.segments:
+            for v in v_s:
+                plt.plot([v[0], v[2]], [v[1], v[3]])
+            for h in h_s:
+                plt.plot([h[0], h[2]], [h[1], h[3]])
+            plt.show()
\ No newline at end of file
diff --git a/camelot/stream.py b/camelot/stream.py
deleted file mode 100644
index e794d6a..0000000
--- a/camelot/stream.py
+++ /dev/null
@@ -1,428 +0,0 @@
-from __future__ import division
-import os
-import copy
-import types
-import logging
-import copy_reg
-import warnings
-
-import numpy as np
-
-from .table import Table
-from .utils import (text_in_bbox, get_table_index, get_score, count_empty,
-                    encode_list, get_text_objects, get_page_layout)
-
-
-__all__ = ['Stream']
-logger = logging.getLogger('app_logger')
-
-
-def _reduce_method(m):
-    if m.im_self is None:
-        return getattr, (m.im_class, m.im_func.func_name)
-    else:
-        return getattr, (m.im_self, m.im_func.func_name)
-copy_reg.pickle(types.MethodType, _reduce_method)
-
-
-def _text_bbox(t_bbox):
-    """Returns bounding box for the text present on a page.
-
-    Parameters
-    ----------
-    t_bbox : dict
-        Dict with two keys 'horizontal' and 'vertical' with lists of
-        LTTextLineHorizontals and LTTextLineVerticals respectively.
-
-    Returns
-    -------
-    text_bbox : tuple
-        Tuple of the form (x0, y0, x1, y1) in PDFMiner's coordinate
-        space.
-    """
-    xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
-    ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
-    xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
-    ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
-    text_bbox = (xmin, ymin, xmax, ymax)
-    return text_bbox
-
-
-def _group_rows(text, ytol=2):
-    """Groups PDFMiner text objects into rows using their
-    y-coordinates taking into account some tolerance ytol.
-
-    Parameters
-    ----------
-    text : list
-        List of PDFMiner text objects.
-
-    ytol : int
-        Tolerance parameter.
-        (optional, default: 2)
-
-    Returns
-    -------
-    rows : list
-        Two-dimensional list of text objects grouped into rows.
-    """
-    row_y = 0
-    rows = []
-    temp = []
-    for t in text:
-        # is checking for upright necessary?
-        # if t.get_text().strip() and all([obj.upright for obj in t._objs if
-        # type(obj) is LTChar]):
-        if t.get_text().strip():
-            if not np.isclose(row_y, t.y0, atol=ytol):
-                rows.append(sorted(temp, key=lambda t: t.x0))
-                temp = []
-                row_y = t.y0
-            temp.append(t)
-    rows.append(sorted(temp, key=lambda t: t.x0))
-    __ = rows.pop(0) # hacky
-    return rows
-
-
-def _merge_columns(l, mtol=0):
-    """Merges column boundaries if they overlap or lie within some
-    tolerance mtol.
-
-    Parameters
-    ----------
-    l : list
-        List of column coordinate tuples.
-
-    mtol : int
-        TODO
-        (optional, default: 0)
-
-    Returns
-    -------
-    merged : list
-        List of merged column coordinate tuples.
-    """
-    merged = []
-    for higher in l:
-        if not merged:
-            merged.append(higher)
-        else:
-            lower = merged[-1]
-            if mtol >= 0:
-                if (higher[0] <= lower[1] or
-                        np.isclose(higher[0], lower[1], atol=mtol)):
-                    upper_bound = max(lower[1], higher[1])
-                    lower_bound = min(lower[0], higher[0])
-                    merged[-1] = (lower_bound, upper_bound)
-                else:
-                    merged.append(higher)
-            elif mtol < 0:
-                if higher[0] <= lower[1]:
-                    if np.isclose(higher[0], lower[1], atol=abs(mtol)):
-                        merged.append(higher)
-                    else:
-                        upper_bound = max(lower[1], higher[1])
-                        lower_bound = min(lower[0], higher[0])
-                        merged[-1] = (lower_bound, upper_bound)
-                else:
-                    merged.append(higher)
-    return merged
-
-
-def _join_rows(rows_grouped, text_y_max, text_y_min):
-    """Makes row coordinates continuous.
-
-    Parameters
-    ----------
-    rows_grouped : list
-        Two-dimensional list of text objects grouped into rows.
-
-    text_y_max : int
-
-    text_y_min : int
-
-    Returns
-    -------
-    rows : list
-        List of continuous row coordinate tuples.
-    """
-    row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r)
-                if len(r) > 0 else 0 for r in rows_grouped]
-    rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
-    rows.insert(0, text_y_max)
-    rows.append(text_y_min)
-    rows = [(rows[i], rows[i + 1])
-            for i in range(0, len(rows) - 1)]
-    return rows
-
-
-def _join_columns(cols, text_x_min, text_x_max):
-    """Makes column coordinates continuous.
-
-    Parameters
-    ----------
-    cols : list
-        List of column coordinate tuples.
-
-    text_x_min : int
-
-    text_y_max : int
-
-    Returns
-    -------
-    cols : list
-        Updated list of column coordinate tuples.
-    """
-    cols = sorted(cols)
-    cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
-    cols.insert(0, text_x_min)
-    cols.append(text_x_max)
-    cols = [(cols[i], cols[i + 1])
-            for i in range(0, len(cols) - 1)]
-    return cols
-
-
-def _add_columns(cols, text, ytol):
-    """Adds columns to existing list by taking into account
-    the text that lies outside the current column coordinates.
-
-    Parameters
-    ----------
-    cols : list
-        List of column coordinate tuples.
-
-    text : list
-        List of PDFMiner text objects.
-
-    ytol : int
-        Tolerance parameter.
-
-    Returns
-    -------
-    cols : list
-        Updated list of column coordinate tuples.
-    """
-    if text:
-        text = _group_rows(text, ytol=ytol)
-        elements = [len(r) for r in text]
-        new_cols = [(t.x0, t.x1)
-            for r in text if len(r) == max(elements) for t in r]
-        cols.extend(_merge_columns(sorted(new_cols)))
-    return cols
-
-
-class Stream:
-    """Stream looks for spaces between text elements to form a table.
-
-    If you want to give columns, ytol or mtol for each table
-    when specifying multiple table areas, make sure that their length
-    is equal to the length of table_area. Mapping between them is based
-    on index.
-
-    If you don't want to specify columns for the some tables in a pdf
-    page having multiple tables, pass them as empty strings.
-    For example: ['', 'x1,x2,x3,x4', '']
-
-    Parameters
-    ----------
-    table_area : list
-        List of strings of the form x1,y1,x2,y2 where
-        (x1, y1) -> left-top and (x2, y2) -> right-bottom in PDFMiner's
-        coordinate space, denoting table areas to analyze.
-        (optional, default: None)
-
-    columns : list
-        List of strings where each string is comma-separated values of
-        x-coordinates in PDFMiner's coordinate space.
-        (optional, default: None)
-
-    ytol : list
-        List of ints specifying the y-tolerance parameters.
-        (optional, default: [2])
-
-    mtol : list
-        List of ints specifying the m-tolerance parameters.
-        (optional, default: [0])
-
-    margins : tuple
-        PDFMiner margins. (char_margin, line_margin, word_margin)
-        (optional, default: (1.0, 0.5, 0.1))
-
-    split_text : bool
-        Whether or not to split a text line if it spans across
-        different cells.
-        (optional, default: False)
-
-    flag_size : bool
-        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
-        (optional, default: True)
-
-    debug : bool
-        Set to True to generate a matplotlib plot of
-        LTTextLineHorizontals in order to select table_area, columns.
-        (optional, default: False)
-    """
-    def __init__(self, table_area=None, columns=None, ytol=[2], mtol=[0],
-                 margins=(1.0, 0.5, 0.1), split_text=False, flag_size=True,
-                 debug=False):
-
-        self.method = 'stream'
-        self.table_area = table_area
-        self.columns = columns
-        self.ytol = ytol
-        self.mtol = mtol
-        self.char_margin, self.line_margin, self.word_margin = margins
-        self.split_text = split_text
-        self.flag_size = flag_size
-        self.debug = debug
-
-    def get_tables(self, pdfname):
-        """Expects a single page pdf as input with rotation corrected.
-
-        Parameters
-        ---------
-        pdfname : string
-            Path to single page pdf file.
-
-        Returns
-        -------
-        page : dict
-        """
-        layout, dim = get_page_layout(pdfname, char_margin=self.char_margin,
-            line_margin=self.line_margin, word_margin=self.word_margin)
-        lttextlh = get_text_objects(layout, ltype="lh")
-        lttextlv = get_text_objects(layout, ltype="lv")
-        ltchar = get_text_objects(layout, ltype="char")
-        width, height = dim
-        bname, __ = os.path.splitext(pdfname)
-        logger.info('Processing {0}.'.format(os.path.basename(bname)))
-        if not lttextlh:
-            warnings.warn("{0}: Page contains no text.".format(
-                os.path.basename(bname)))
-            return {os.path.basename(bname): None}
-
-        if self.debug:
-            self.debug_text = []
-            self.debug_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in lttextlh])
-            self.debug_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in lttextlv])
-            return None
-
-        if self.table_area is not None:
-            if self.columns is not None:
-                if len(self.table_area) != len(self.columns):
-                    raise ValueError("{0}: Length of table area and columns"
-                                     " should be equal.".format(os.path.basename(bname)))
-
-            table_bbox = {}
-            for area in self.table_area:
-                x1, y1, x2, y2 = area.split(",")
-                x1 = float(x1)
-                y1 = float(y1)
-                x2 = float(x2)
-                y2 = float(y2)
-                table_bbox[(x1, y2, x2, y1)] = None
-        else:
-            table_bbox = {(0, 0, width, height): None}
-
-        if len(self.ytol) == 1 and self.ytol[0] == 2:
-            ytolerance = copy.deepcopy(self.ytol) * len(table_bbox)
-        else:
-            ytolerance = copy.deepcopy(self.ytol)
-
-        if len(self.mtol) == 1 and self.mtol[0] == 0:
-            mtolerance = copy.deepcopy(self.mtol) * len(table_bbox)
-        else:
-            mtolerance = copy.deepcopy(self.mtol)
-
-        page = {}
-        tables = {}
-        # sort tables based on y-coord
-        for table_no, k in enumerate(sorted(table_bbox.keys(), key=lambda x: x[1], reverse=True)):
-            # select elements which lie within table_bbox
-            table_data = {}
-            t_bbox = {}
-            t_bbox['horizontal'] = text_in_bbox(k, lttextlh)
-            t_bbox['vertical'] = text_in_bbox(k, lttextlv)
-            char_bbox = text_in_bbox(k, ltchar)
-            table_data['text_p'] = 100 * (1 - (len(char_bbox) / len(ltchar)))
-            for direction in t_bbox:
-                t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
-            text_x_min, text_y_min, text_x_max, text_y_max = _text_bbox(t_bbox)
-            rows_grouped = _group_rows(t_bbox['horizontal'], ytol=ytolerance[table_no])
-            rows = _join_rows(rows_grouped, text_y_max, text_y_min)
-            elements = [len(r) for r in rows_grouped]
-
-            guess = False
-            if self.columns is not None and self.columns[table_no] != "":
-                # user has to input boundary columns too
-                # take (0, width) by default
-                # similar to else condition
-                # len can't be 1
-                cols = self.columns[table_no].split(',')
-                cols = [float(c) for c in cols]
-                cols.insert(0, text_x_min)
-                cols.append(text_x_max)
-                cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
-            else:
-                guess = True
-                ncols = max(set(elements), key=elements.count)
-                len_non_mode = len(filter(lambda x: x != ncols, elements))
-                if ncols == 1:
-                    # no tables detected
-                    warnings.warn("{0}: Page contains no tables.".format(
-                        os.path.basename(bname)))
-                cols = [(t.x0, t.x1)
-                    for r in rows_grouped if len(r) == ncols for t in r]
-                cols = _merge_columns(sorted(cols), mtol=mtolerance[table_no])
-                inner_text = []
-                for i in range(1, len(cols)):
-                    left = cols[i - 1][1]
-                    right = cols[i][0]
-                    inner_text.extend([t for direction in t_bbox
-                                       for t in t_bbox[direction]
-                                       if t.x0 > left and t.x1 < right])
-                outer_text = [t for direction in t_bbox
-                              for t in t_bbox[direction]
-                              if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
-                inner_text.extend(outer_text)
-                cols = _add_columns(cols, inner_text, ytolerance[table_no])
-                cols = _join_columns(cols, text_x_min, text_x_max)
-
-            table = Table(cols, rows)
-            table = table.set_all_edges()
-            assignment_errors = []
-            table_data['split_text'] = []
-            table_data['superscript'] = []
-            for direction in t_bbox:
-                for t in t_bbox[direction]:
-                    indices, error = get_table_index(
-                        table, t, direction, split_text=self.split_text,
-                        flag_size=self.flag_size)
-                    assignment_errors.append(error)
-                    if len(indices) > 1:
-                        table_data['split_text'].append(indices)
-                    for r_idx, c_idx, text in indices:
-                        if all(s in text for s in ['<s>', '</s>']):
-                            table_data['superscript'].append((r_idx, c_idx, text))
-                        table.cells[r_idx][c_idx].add_text(text)
-            if guess:
-                score = get_score([[66, assignment_errors], [34, [len_non_mode / len(elements)]]])
-            else:
-                score = get_score([[100, assignment_errors]])
-
-            table_data['score'] = score
-            ar = table.get_list()
-            ar = encode_list(ar)
-            table_data['data'] = ar
-            empty_p, r_nempty_cells, c_nempty_cells = count_empty(ar)
-            table_data['empty_p'] = empty_p
-            table_data['r_nempty_cells'] = r_nempty_cells
-            table_data['c_nempty_cells'] = c_nempty_cells
-            table_data['nrows'] = len(ar)
-            table_data['ncols'] = len(ar[0])
-            tables['table-{0}'.format(table_no + 1)] = table_data
-        page[os.path.basename(bname)] = tables
-
-        return page
\ No newline at end of file
diff --git a/camelot/table.py b/camelot/table.py
deleted file mode 100644
index fc1a45e..0000000
--- a/camelot/table.py
+++ /dev/null
@@ -1,236 +0,0 @@
-import numpy as np
-
-from .cell import Cell
-
-
-class Table:
-    """Table.
-    Defines a table object with coordinates relative to a left-bottom
-    origin, which is also PDFMiner's coordinate space.
-
-    Parameters
-    ----------
-    cols : list
-        List of tuples representing column x-coordinates in increasing
-        order.
-
-    rows : list
-        List of tuples representing row y-coordinates in decreasing
-        order.
-
-    Attributes
-    ----------
-    cells : list
-        List of cell objects with row-major ordering.
-
-    nocont_ : int
-        Number of lines that did not contribute to setting cell edges.
-    """
-
-    def __init__(self, cols, rows):
-
-        self.cols = cols
-        self.rows = rows
-        self.cells = [[Cell(c[0], r[1], c[1], r[0])
-                       for c in cols] for r in rows]
-        self.nocont_ = 0
-        self.image = None
-
-    def set_all_edges(self):
-        """Sets all table edges to True.
-        """
-        for r in range(len(self.rows)):
-            for c in range(len(self.cols)):
-                self.cells[r][c].left = True
-                self.cells[r][c].right = True
-                self.cells[r][c].top = True
-                self.cells[r][c].bottom = True
-        return self
-
-    def set_border_edges(self):
-        """Sets table border edges to True.
-        """
-        for r in range(len(self.rows)):
-            self.cells[r][0].left = True
-            self.cells[r][len(self.cols) - 1].right = True
-        for c in range(len(self.cols)):
-            self.cells[0][c].top = True
-            self.cells[len(self.rows) - 1][c].bottom = True
-        return self
-
-    def set_edges(self, vertical, horizontal, jtol=2):
-        """Sets a cell's edges to True depending on whether they
-        overlap with lines found by imgproc.
-
-        Parameters
-        ----------
-        vertical : list
-            List of vertical lines detected by imgproc. Coordinates
-            scaled and translated to the PDFMiner's coordinate space.
-
-        horizontal : list
-            List of horizontal lines detected by imgproc. Coordinates
-            scaled and translated to the PDFMiner's coordinate space.
-        """
-        for v in vertical:
-            # find closest x coord
-            # iterate over y coords and find closest points
-            i = [i for i, t in enumerate(self.cols)
-                 if np.isclose(v[0], t[0], atol=jtol)]
-            j = [j for j, t in enumerate(self.rows)
-                 if np.isclose(v[3], t[0], atol=jtol)]
-            k = [k for k, t in enumerate(self.rows)
-                 if np.isclose(v[1], t[0], atol=jtol)]
-            if not j:
-                self.nocont_ += 1
-                continue
-            J = j[0]
-            if i == [0]:  # only left edge
-                I = i[0]
-                if k:
-                    K = k[0]
-                    while J < K:
-                        self.cells[J][I].left = True
-                        J += 1
-                else:
-                    K = len(self.rows)
-                    while J < K:
-                        self.cells[J][I].left = True
-                        J += 1
-            elif i == []:  # only right edge
-                I = len(self.cols) - 1
-                if k:
-                    K = k[0]
-                    while J < K:
-                        self.cells[J][I].right = True
-                        J += 1
-                else:
-                    K = len(self.rows)
-                    while J < K:
-                        self.cells[J][I].right = True
-                        J += 1
-            else:  # both left and right edges
-                I = i[0]
-                if k:
-                    K = k[0]
-                    while J < K:
-                        self.cells[J][I].left = True
-                        self.cells[J][I - 1].right = True
-                        J += 1
-                else:
-                    K = len(self.rows)
-                    while J < K:
-                        self.cells[J][I].left = True
-                        self.cells[J][I - 1].right = True
-                        J += 1
-
-        for h in horizontal:
-            #  find closest y coord
-            # iterate over x coords and find closest points
-            i = [i for i, t in enumerate(self.rows)
-                 if np.isclose(h[1], t[0], atol=jtol)]
-            j = [j for j, t in enumerate(self.cols)
-                 if np.isclose(h[0], t[0], atol=jtol)]
-            k = [k for k, t in enumerate(self.cols)
-                 if np.isclose(h[2], t[0], atol=jtol)]
-            if not j:
-                self.nocont_ += 1
-                continue
-            J = j[0]
-            if i == [0]:  # only top edge
-                I = i[0]
-                if k:
-                    K = k[0]
-                    while J < K:
-                        self.cells[I][J].top = True
-                        J += 1
-                else:
-                    K = len(self.cols)
-                    while J < K:
-                        self.cells[I][J].top = True
-                        J += 1
-            elif i == []:  # only bottom edge
-                I = len(self.rows) - 1
-                if k:
-                    K = k[0]
-                    while J < K:
-                        self.cells[I][J].bottom = True
-                        J += 1
-                else:
-                    K = len(self.cols)
-                    while J < K:
-                        self.cells[I][J].bottom = True
-                        J += 1
-            else:  # both top and bottom edges
-                I = i[0]
-                if k:
-                    K = k[0]
-                    while J < K:
-                        self.cells[I][J].top = True
-                        self.cells[I - 1][J].bottom = True
-                        J += 1
-                else:
-                    K = len(self.cols)
-                    while J < K:
-                        self.cells[I][J].top = True
-                        self.cells[I - 1][J].bottom = True
-                        J += 1
-
-        return self
-
-    def set_spanning(self):
-        """Sets a cell's spanning_h or spanning_v attribute to True
-        depending on whether the cell spans/extends horizontally or
-        vertically.
-        """
-        for r in range(len(self.rows)):
-            for c in range(len(self.cols)):
-                bound = self.cells[r][c].get_bounded_edges()
-                if bound == 4:
-                    continue
-                elif bound == 3:
-                    if not self.cells[r][c].left:
-                        if (self.cells[r][c].right and
-                                self.cells[r][c].top and
-                                self.cells[r][c].bottom):
-                            self.cells[r][c].spanning_h = True
-                    elif not self.cells[r][c].right:
-                        if (self.cells[r][c].left and
-                                self.cells[r][c].top and
-                                self.cells[r][c].bottom):
-                            self.cells[r][c].spanning_h = True
-                    elif not self.cells[r][c].top:
-                        if (self.cells[r][c].left and
-                                self.cells[r][c].right and
-                                self.cells[r][c].bottom):
-                            self.cells[r][c].spanning_v = True
-                    elif not self.cells[r][c].bottom:
-                        if (self.cells[r][c].left and
-                                self.cells[r][c].right and
-                                self.cells[r][c].top):
-                            self.cells[r][c].spanning_v = True
-                elif bound == 2:
-                    if self.cells[r][c].left and self.cells[r][c].right:
-                        if (not self.cells[r][c].top and
-                                not self.cells[r][c].bottom):
-                            self.cells[r][c].spanning_v = True
-                    elif self.cells[r][c].top and self.cells[r][c].bottom:
-                        if (not self.cells[r][c].left and
-                                not self.cells[r][c].right):
-                            self.cells[r][c].spanning_h = True
-
-        return self
-
-    def get_list(self):
-        """Returns a two-dimensional list of text assigned to each
-        cell.
-
-        Returns
-        -------
-        ar : list
-        """
-        ar = []
-        for r in range(len(self.rows)):
-            ar.append([self.cells[r][c].get_text().strip()
-                       for c in range(len(self.cols))])
-        return ar
diff --git a/camelot/utils.py b/camelot/utils.py
index 3640b37..3e87fe5 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -18,18 +18,47 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
                              LTTextLineVertical)
 
 
+def setup_logging(name):
+    """Sets up a logger with StreamHandler.
+
+    Parameters
+    ----------
+    name : str
+
+    Returns
+    -------
+    logger : logging.Logger
+
+    """
+    logger = logging.getLogger(name)
+
+    format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'
+    formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
+
+    handler = logging.StreamHandler()
+    handler.setLevel(logging.INFO)
+    handler.setFormatter(formatter)
+
+    logger.addHandler(handler)
+
+    return logger
+
+
+logger = setup_logging(__name__)
+
+
 def translate(x1, x2):
     """Translates x2 by x1.
 
     Parameters
     ----------
     x1 : float
-
     x2 : float
 
     Returns
     -------
     x2 : float
+
     """
     x2 += x1
     return x2
@@ -41,12 +70,12 @@ def scale(x, s):
     Parameters
     ----------
     x : float
-
     s : float
 
     Returns
     -------
     x : float
+
     """
     x *= s
     return x
@@ -58,21 +87,17 @@ def rotate(x1, y1, x2, y2, angle):
     Parameters
     ----------
     x1 : float
-
     y1 : float
-
     x2 : float
-
     y2 : float
-
     angle : float
         Angle in radians.
 
     Returns
     -------
     xnew : float
-
     ynew : float
+
     """
     s = np.sin(angle)
     c = np.cos(angle)
@@ -85,17 +110,16 @@ def rotate(x1, y1, x2, y2, angle):
     return xnew, ynew
 
 
-def scale_to_image(k, factors):
-    """Translates and scales PDFMiner coordinates to OpenCV's coordinate
-    space.
+def scale_pdf(k, factors):
+    """Translates and scales pdf coordinate space to image
+    coordinate space.
 
     Parameters
     ----------
     k : tuple
         Tuple (x1, y1, x2, y2) representing table bounding box where
-        (x1, y1) -> lt and (x2, y2) -> rb in PDFMiner's coordinate
+        (x1, y1) -> lt and (x2, y2) -> rb in PDFMiner coordinate
         space.
-
     factors : tuple
         Tuple (scaling_factor_x, scaling_factor_y, pdf_y) where the
         first two elements are scaling factors and pdf_y is height of
@@ -105,8 +129,9 @@ def scale_to_image(k, factors):
     -------
     knew : tuple
         Tuple (x1, y1, x2, y2) representing table bounding box where
-        (x1, y1) -> lt and (x2, y2) -> rb in OpenCV's coordinate
+        (x1, y1) -> lt and (x2, y2) -> rb in OpenCV coordinate
         space.
+
     """
     x1, y1, x2, y2 = k
     scaling_factor_x, scaling_factor_y, pdf_y = factors
@@ -118,22 +143,19 @@ def scale_to_image(k, factors):
     return knew
 
 
-def scale_to_pdf(tables, v_segments, h_segments, factors):
-    """Translates and scales OpenCV coordinates to PDFMiner's coordinate
-    space.
+def scale_image(tables, v_segments, h_segments, factors):
+    """Translates and scales image coordinate space to pdf
+    coordinate space.
 
     Parameters
     ----------
     tables : dict
         Dict with table boundaries as keys and list of intersections
-        in that boundary as their value.
-
+        in that boundary as value.
     v_segments : list
         List of vertical line segments.
-
     h_segments : list
         List of horizontal line segments.
-
     factors : tuple
         Tuple (scaling_factor_x, scaling_factor_y, img_y) where the
         first two elements are scaling factors and img_y is height of
@@ -142,10 +164,9 @@ def scale_to_pdf(tables, v_segments, h_segments, factors):
     Returns
     -------
     tables_new : dict
-
     v_segments_new : dict
-
     h_segments_new : dict
+
     """
     scaling_factor_x, scaling_factor_y, img_y = factors
     tables_new = {}
@@ -178,54 +199,26 @@ def scale_to_pdf(tables, v_segments, h_segments, factors):
     return tables_new, v_segments_new, h_segments_new
 
 
-def setup_logging(log_filepath):
-    """Setup logging
-    Args:
-        log_filepath (string): Path to log file
-    Returns:
-        logging.Logger: Logger object
-    """
-    logger = logging.getLogger("app_logger")
-    logger.setLevel(logging.DEBUG)
-    # Log File Handler (Associating one log file per webservice run)
-    log_file_handler = logging.FileHandler(log_filepath,
-                                           mode='a',
-                                           encoding='utf-8')
-    log_file_handler.setLevel(logging.DEBUG)
-    format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'
-    formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
-    log_file_handler.setFormatter(formatter)
-    logger.addHandler(log_file_handler)
-    # Stream Log Handler (For console)
-    stream_log_handler = logging.StreamHandler()
-    stream_log_handler.setLevel(logging.INFO)
-    formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
-    stream_log_handler.setFormatter(formatter)
-    logger.addHandler(stream_log_handler)
-    return logger
-
-
 def get_rotation(lttextlh, lttextlv, ltchar):
-    """Detects if text in table is vertical or not using the current
+    """Detects if text in table is rotated or not using the current
     transformation matrix (CTM) and returns its orientation.
 
     Parameters
     ----------
     lttextlh : list
         List of PDFMiner LTTextLineHorizontal objects.
-
     lttextlv : list
         List of PDFMiner LTTextLineVertical objects.
-
     ltchar : list
         List of PDFMiner LTChar objects.
 
     Returns
     -------
     rotation : string
-        {'', 'left', 'right'}
-        '' if text in table is upright, 'left' if rotated 90 degree
-        anti-clockwise and 'right' if rotated 90 degree clockwise.
+        '' if text in table is upright, 'anticlockwise' if
+        rotated 90 degree anticlockwise and 'clockwise' if
+        rotated 90 degree clockwise.
+
     """
     rotation = ''
     hlen = len([t for t in lttextlh if t.get_text().strip()])
@@ -233,23 +226,21 @@ def get_rotation(lttextlh, lttextlv, ltchar):
     if hlen < vlen:
         clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in ltchar)
         anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in ltchar)
-        rotation = 'left' if clockwise < anticlockwise else 'right'
+        rotation = 'anticlockwise' if clockwise < anticlockwise else 'clockwise'
     return rotation
 
 
-def segments_bbox(bbox, v_segments, h_segments):
-    """Returns all line segments present inside a
-    table's bounding box.
+def segments_in_bbox(bbox, v_segments, h_segments):
+    """Returns all line segments present inside a bounding box.
 
     Parameters
     ----------
     bbox : tuple
-        Tuple (x1, y1, x2, y2) representing table bounding box where
-        (x1, y1) -> lb and (x2, y2) -> rt in PDFMiner's coordinate space.
-
+        Tuple (x1, y1, x2, y2) representing a bounding box where
+        (x1, y1) -> lb and (x2, y2) -> rt in PDFMiner coordinate
+        space.
     v_segments : list
         List of vertical line segments.
-
     h_segments : list
         List of vertical horizontal segments.
 
@@ -257,9 +248,9 @@ def segments_bbox(bbox, v_segments, h_segments):
     -------
     v_s : list
         List of vertical line segments that lie inside table.
-
     h_s : list
         List of horizontal line segments that lie inside table.
+
     """
     lb = (bbox[0], bbox[1])
     rt = (bbox[2], bbox[3])
@@ -271,45 +262,43 @@ def segments_bbox(bbox, v_segments, h_segments):
 
 
 def text_in_bbox(bbox, text):
-    """Returns all text objects present inside a
-    table's bounding box.
+    """Returns all text objects present inside a bounding box.
 
     Parameters
     ----------
     bbox : tuple
-        Tuple (x1, y1, x2, y2) representing table bounding box where
-        (x1, y1) -> lb and (x2, y2) -> rt in PDFMiner's coordinate space.
-
-    text : list
-        List of PDFMiner text objects.
+        Tuple (x1, y1, x2, y2) representing a bounding box where
+        (x1, y1) -> lb and (x2, y2) -> rt in PDFMiner coordinate
+        space.
+    text : List of PDFMiner text objects.
 
     Returns
     -------
     t_bbox : list
         List of PDFMiner text objects that lie inside table.
+
     """
     lb = (bbox[0], bbox[1])
     rt = (bbox[2], bbox[3])
     t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
-                 <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
-                 <= rt[1] + 2]
+                <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
+                <= rt[1] + 2]
     return t_bbox
 
 
-def remove_close_values(ar, mtol=2):
-    """Removes values which are within a tolerance of mtol of another value
-    present in list.
+def remove_close_lines(ar, line_close_tol=2):
+    """Removes lines which are within a tolerance, based on their x or
+    y axis projections.
 
     Parameters
     ----------
     ar : list
-
-    mtol : int
-        (optional, default: 2)
+    line_close_tol : int, optional (default: 2)
 
     Returns
     -------
     ret : list
+
     """
     ret = []
     for a in ar:
@@ -317,27 +306,26 @@ def remove_close_values(ar, mtol=2):
             ret.append(a)
         else:
             temp = ret[-1]
-            if np.isclose(temp, a, atol=mtol):
+            if np.isclose(temp, a, atol=line_close_tol):
                 pass
             else:
                 ret.append(a)
     return ret
 
 
-def merge_close_values(ar, mtol=2):
-    """Merges values which are within a tolerance of mtol by calculating
-    a moving mean.
+def merge_close_lines(ar, line_close_tol=2):
+    """Merges lines which are within a tolerance by calculating a
+    moving mean, based on their x or y axis projections.
 
     Parameters
     ----------
     ar : list
-
-    mtol : int
-        (optional, default: 2)
+    line_close_tol : int, optional (default: 2)
 
     Returns
     -------
     ret : list
+
     """
     ret = []
     for a in ar:
@@ -345,7 +333,7 @@ def merge_close_values(ar, mtol=2):
             ret.append(a)
         else:
             temp = ret[-1]
-            if np.isclose(temp, a, atol=mtol):
+            if np.isclose(temp, a, atol=line_close_tol):
                 temp = (temp + a) / 2.0
                 ret[-1] = temp
             else:
@@ -353,22 +341,21 @@ def merge_close_values(ar, mtol=2):
     return ret
 
 
-def flag_on_size(textline, direction):
-    """Flags a super/subscript by enclosing it with <s></s>. May give
-    false positives.
+def flag_font_size(textline, direction):
+    """Flags super/subscripts in text by enclosing them with <s></s>.
+    May give false positives.
 
     Parameters
     ----------
     textline : list
         List of PDFMiner LTChar objects.
-
     direction : string
-        {'horizontal', 'vertical'}
         Direction of the PDFMiner LTTextLine object.
 
     Returns
     -------
     fstring : string
+
     """
     if direction == 'horizontal':
         d = [(t.get_text(), np.round(t.height, decimals=6)) for t in textline if not isinstance(t, LTAnno)]
@@ -395,33 +382,28 @@ def flag_on_size(textline, direction):
     return fstring
 
 
-def split_textline(table, textline, direction, flag_size=True):
+def split_textline(table, textline, direction, flag_size=False):
     """Splits PDFMiner LTTextLine into substrings if it spans across
     multiple rows/columns.
 
     Parameters
     ----------
-    table : object
-        camelot.pdf.Pdf
-
+    table : camelot.core.Table
     textline : object
         PDFMiner LTTextLine object.
-
     direction : string
-        {'horizontal', 'vertical'}
         Direction of the PDFMiner LTTextLine object.
-
-    flag_size : bool
+    flag_size : bool, optional (default: False)
         Whether or not to highlight a substring using <s></s>
         if its size is different from rest of the string, useful for
         super and subscripts.
-        (optional, default: True)
 
     Returns
     -------
     grouped_chars : list
         List of tuples of the form (idx, text) where idx is the index
         of row/column and text is the an lttextline substring.
+
     """
     idx = 0
     cut_text = []
@@ -466,46 +448,37 @@ def split_textline(table, textline, direction, flag_size=True):
     grouped_chars = []
     for key, chars in groupby(cut_text, itemgetter(0, 1)):
         if flag_size:
-            grouped_chars.append((key[0], key[1], flag_on_size([t[2] for t in chars], direction)))
+            grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction)))
         else:
             gchars = [t[2].get_text() for t in chars]
             grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
     return grouped_chars
 
 
-def get_table_index(table, t, direction, split_text=False, flag_size=True):
-    """Gets indices of the cell where given text object lies by
+def get_table_index(table, t, direction, split_text=False, flag_size=False):
+    """Gets indices of the table cell where given text object lies by
     comparing their y and x-coordinates.
 
     Parameters
     ----------
-    table : object
-        camelot.table.Table
-
+    table : camelot.core.Table
     t : object
         PDFMiner LTTextLine object.
-
     direction : string
-        {'horizontal', 'vertical'}
         Direction of the PDFMiner LTTextLine object.
-
-    split_text : bool
+    split_text : bool, optional (default: False)
         Whether or not to split a text line if it spans across
         multiple cells.
-        (optional, default: False)
-
-    flag_size : bool
+    flag_size : bool, optional (default: False)
         Whether or not to highlight a substring using <s></s>
         if its size is different from rest of the string, useful for
         super and subscripts.
-        (optional, default: True)
 
     Returns
     -------
     indices : list
-        List of tuples of the form (idx, text) where idx is the index
-        of row/column and text is the an lttextline substring.
-
+        List of tuples of the form (r_idx, c_idx, text) where r_idx
+        and c_idx are row and column indices.
     error : float
         Assignment error, percentage of text area that lies outside
         a cell.
@@ -514,6 +487,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
         |   [Text bounding box]
         |       |
         +-------+
+
     """
     r_idx, c_idx = [-1] * 2
     for r in range(len(table.rows)):
@@ -528,7 +502,11 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
                 else:
                     lt_col_overlap.append(-1)
             if len(filter(lambda x: x != -1, lt_col_overlap)) == 0:
-                logging.warning("Text did not fit any column.")
+                text = t.get_text().strip('\n')
+                text_range = (t.x0, t.x1)
+                col_range = (table.cols[0][0], table.cols[-1][1])
+                logger.info("{} {} does not lie in column range {}".format(
+                    text, text_range, col_range))
             r_idx = r
             c_idx = lt_col_overlap.index(max(lt_col_overlap))
             break
@@ -552,14 +530,14 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
         return split_textline(table, t, direction, flag_size=flag_size), error
     else:
         if flag_size:
-            return [(r_idx, c_idx, flag_on_size(t._objs, direction))], error
+            return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error
         else:
             return [(r_idx, c_idx, t.get_text().strip('\n'))], error
 
 
-def get_score(error_weights):
-    """Calculates score based on weights assigned to various parameters,
-    and their error percentages.
+def compute_accuracy(error_weights):
+    """Calculates a score based on weights assigned to various
+    parameters and their error percentages.
 
     Parameters
     ----------
@@ -571,6 +549,7 @@ def get_score(error_weights):
     Returns
     -------
     score : float
+
     """
     SCORE_VAL = 100
     try:
@@ -586,6 +565,30 @@ def get_score(error_weights):
     return score
 
 
+def compute_whitespace(d):
+    """Calculates the percentage of empty strings in a
+    two-dimensional list.
+
+    Parameters
+    ----------
+    d : list
+
+    Returns
+    -------
+    whitespace : float
+        Percentage of empty cells.
+
+    """
+    whitespace = 0
+    r_nempty_cells, c_nempty_cells = [], []
+    for i in d:
+        for j in i:
+            if j.strip() == '':
+                whitespace += 1
+    whitespace = 100 * (whitespace / float(len(d) * len(d[0])))
+    return whitespace
+
+
 def remove_empty(d):
     """Removes empty rows and columns from a two-dimensional list.
 
@@ -596,6 +599,7 @@ def remove_empty(d):
     Returns
     -------
     d : list
+
     """
     for i, row in enumerate(d):
         if row == [''] * len(row):
@@ -606,50 +610,8 @@ def remove_empty(d):
     return d
 
 
-def count_empty(d):
-    """Counts empty rows and columns in a two-dimensional list.
-
-    Parameters
-    ----------
-    d : list
-
-    Returns
-    -------
-    n_empty_rows : list
-        Number of empty rows.
-
-    n_empty_cols : list
-        Number of empty columns.
-
-    empty_p : float
-        Percentage of empty cells.
-    """
-    empty_p = 0
-    r_nempty_cells, c_nempty_cells = [], []
-    for i in d:
-        for j in i:
-            if j.strip() == '':
-                empty_p += 1
-    empty_p = 100 * (empty_p / float(len(d) * len(d[0])))
-    for row in d:
-        r_nempty_c = 0
-        for r in row:
-            if r.strip() != '':
-                r_nempty_c += 1
-        r_nempty_cells.append(r_nempty_c)
-    d = zip(*d)
-    d = [list(col) for col in d]
-    for col in d:
-        c_nempty_c = 0
-        for c in col:
-            if c.strip() != '':
-                c_nempty_c += 1
-        c_nempty_cells.append(c_nempty_c)
-    return empty_p, r_nempty_cells, c_nempty_cells
-
-
-def encode_list(ar):
-    """Encodes list of text.
+def encode_(ar):
+    """Encodes two-dimensional list into unicode.
 
     Parameters
     ----------
@@ -658,52 +620,13 @@ def encode_list(ar):
     Returns
     -------
     ar : list
+
     """
     ar = [[r.encode('utf-8') for r in row] for row in ar]
     return ar
 
 
-def get_text_objects(layout, ltype="char", t=None):
-    """Recursively parses pdf layout to get a list of
-    text objects.
-
-    Parameters
-    ----------
-    layout : object
-        PDFMiner LTPage object.
-
-    ltype : string
-        {'char', 'lh', 'lv'}
-        Specify 'char', 'lh', 'lv' to get LTChar, LTTextLineHorizontal,
-        and LTTextLineVertical objects respectively.
-
-    t : list
-
-    Returns
-    -------
-    t : list
-        List of PDFMiner text objects.
-    """
-    if ltype == "char":
-        LTObject = LTChar
-    elif ltype == "lh":
-        LTObject = LTTextLineHorizontal
-    elif ltype == "lv":
-        LTObject = LTTextLineVertical
-    if t is None:
-        t = []
-    try:
-        for obj in layout._objs:
-            if isinstance(obj, LTObject):
-                t.append(obj)
-            else:
-                t += get_text_objects(obj, ltype=ltype)
-    except AttributeError:
-        pass
-    return t
-
-
-def get_page_layout(pname, char_margin=1.0, line_margin=0.5, word_margin=0.1,
+def get_page_layout(filename, char_margin=1.0, line_margin=0.5, word_margin=0.1,
                detect_vertical=True, all_texts=True):
     """Returns a PDFMiner LTPage object and page dimension of a single
     page pdf. See https://euske.github.io/pdfminer/ to get definitions
@@ -711,28 +634,23 @@ def get_page_layout(pname, char_margin=1.0, line_margin=0.5, word_margin=0.1,
 
     Parameters
     ----------
-    pname : string
+    filename : string
         Path to pdf file.
-
     char_margin : float
-
     line_margin : float
-
     word_margin : float
-
     detect_vertical : bool
-
     all_texts : bool
 
     Returns
     -------
     layout : object
         PDFMiner LTPage object.
-
     dim : tuple
-        pdf page dimension of the form (width, height).
+        Dimension of pdf page in the form (width, height).
+
     """
-    with open(pname, 'r') as f:
+    with open(filename, 'r') as f:
         parser = PDFParser(f)
         document = PDFDocument(parser)
         if not document.is_extractable:
@@ -754,16 +672,56 @@ def get_page_layout(pname, char_margin=1.0, line_margin=0.5, word_margin=0.1,
         return layout, dim
 
 
+def get_text_objects(layout, ltype="char", t=None):
+    """Recursively parses pdf layout to get a list of
+    PDFMiner text objects.
+
+    Parameters
+    ----------
+    layout : object
+        PDFMiner LTPage object.
+    ltype : string
+        Specify 'char', 'lh', 'lv' to get LTChar, LTTextLineHorizontal,
+        and LTTextLineVertical objects respectively.
+    t : list
+
+    Returns
+    -------
+    t : list
+        List of PDFMiner text objects.
+
+    """
+    if ltype == "char":
+        LTObject = LTChar
+    elif ltype == "lh":
+        LTObject = LTTextLineHorizontal
+    elif ltype == "lv":
+        LTObject = LTTextLineVertical
+    if t is None:
+        t = []
+    try:
+        for obj in layout._objs:
+            if isinstance(obj, LTObject):
+                t.append(obj)
+            else:
+                t += get_text_objects(obj, ltype=ltype)
+    except AttributeError:
+        pass
+    return t
+
+
 def merge_tuples(tuples):
     """Merges a list of overlapping tuples.
 
     Parameters
     ----------
     tuples : list
+        List of tuples where a tuple is a single axis coordinate pair.
+
+    Yields
+    ------
+    tuple
 
-    Returns
-    -------
-    merged : list
     """
     merged = list(tuples[0])
     for s, e in tuples:
diff --git a/debug/hough_opencv.py b/debug/hough_opencv.py
deleted file mode 100644
index 79140f8..0000000
--- a/debug/hough_opencv.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""
-usage: python hough_opencv.py file.png
-
-finds lines present in an image using opencv's hough transform.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    image = cv2.imread(sys.argv[1])
-    print "image dimensions -> {0}".format(image.shape)
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
-
-    lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
-    print "found {0} lines".format(len(lines))
-    for line in lines:
-        r, theta = line[0]
-        # filter horizontal and vertical lines
-        if theta == 0 or np.isclose(theta, np.pi / 2):
-            x0 = r * np.cos(theta)
-            y0 = r * np.sin(theta)
-            x1 = int(x0 + 10000 * (-np.sin(theta)))
-            y1 = int(y0 + 10000 * (np.cos(theta)))
-            x2 = int(x0 - 10000 * (-np.sin(theta)))
-            y2 = int(y0 - 10000 * (np.cos(theta)))
-            cv2.line(image, (x1, y1), (x2, y2), (0, 0, 255), 5)
-    plt.imshow(image)
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
\ No newline at end of file
diff --git a/debug/hough_skimage.py b/debug/hough_skimage.py
deleted file mode 100644
index 93012fc..0000000
--- a/debug/hough_skimage.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-usage: python hough_skimage.py file.png
-
-finds lines present in an image using scikit-image's hough transform.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-from scipy.misc import imread
-import matplotlib.pyplot as plt
-from skimage.transform import hough_line, hough_line_peaks
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    image = cv2.imread(sys.argv[1])
-    print "image dimensions -> {0}".format(image.shape)
-    ret, binary = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
-    binary = np.min(binary, axis=2)
-    binary = np.where(binary == 255, 0, 255)
-    rows, cols = binary.shape
-    pixel = np.zeros(binary.shape)
-
-    fig, ax = plt.subplots(1, 1, figsize=(8,4))
-    ax.imshow(image, cmap=plt.cm.gray)
-
-    theta_in = np.linspace(0, np.pi / 2, 10)
-    h, theta, d = hough_line(binary, theta_in)
-    for _, angle, dist in zip(*hough_line_peaks(h, theta, d)):
-        x0 = dist * np.cos(angle)
-        y0 = dist * np.sin(angle)
-        x1 = int(x0 + 1000 * (-np.sin(angle)))
-        y1 = int(y0 + 1000 * (np.cos(angle)))
-        x2 = int(x0 - 1000 * (-np.sin(angle)))
-        y2 = int(y0 - 1000 * (np.cos(angle)))
-        ax.plot((x1, x2), (y1, y2), '-r')
-        a = np.cos(angle)
-        b = np.sin(angle)
-        x = np.arange(binary.shape[1])
-        y = np.arange(binary.shape[0])
-        x = a * x
-        y = b * y
-        R = np.round(np.add(y.reshape((binary.shape[0], 1)), x.reshape((1, binary.shape[1]))))
-        pixel += np.isclose(R, np.round(dist))
-
-    pixel = np.clip(pixel, 0, 1)
-    pixel = np.where(pixel == 1, 0, 1)
-    binary = np.where(binary == 0, 255, 0)
-    binary *= pixel.astype(np.int64)
-    ax.imshow(binary, cmap=plt.cm.gray)
-    ax.axis((0, cols, rows, 0))
-    ax.set_title('Detected lines')
-    ax.set_axis_off()
-    ax.set_adjustable('box-forced')
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
\ No newline at end of file
diff --git a/debug/houghp_skimage.py b/debug/houghp_skimage.py
deleted file mode 100644
index c7b9aec..0000000
--- a/debug/houghp_skimage.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-usage: python hough_prob.py file.png
-
-finds lines present in an image using scikit-image's hough transform.
-"""
-
-import sys
-import time
-
-from scipy.misc import imread
-import matplotlib.pyplot as plt
-from skimage.feature import canny
-from skimage.transform import probabilistic_hough_line
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    image = imread(sys.argv[1], mode='L')
-    edges = canny(image, 2, 1, 25)
-    lines = probabilistic_hough_line(edges, threshold=1000)
-
-    fig, ax = plt.subplots(1, 1, figsize=(8,4), sharex=True, sharey=True)
-    ax.imshow(edges * 0)
-
-    for line in lines:
-        p0, p1 = line
-        ax.plot((p0[0], p1[0]), (p0[1], p1[1]))
-
-    ax.set_title('Probabilistic Hough')
-    ax.set_axis_off()
-    ax.set_adjustable('box-forced')
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
\ No newline at end of file
diff --git a/debug/morph_transform.py b/debug/morph_transform.py
deleted file mode 100644
index cd6a6b9..0000000
--- a/debug/morph_transform.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-usage: python morph_transform.py file.png scale={int} invert={bool}
-
-finds lines present in an image using opencv's morph transform.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def mt(imagename, scale=40, invert=False):
-    img = cv2.imread(imagename)
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    if invert:
-        threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -2)
-    else:
-        threshold = cv2.adaptiveThreshold(np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -2)
-    vertical = threshold
-    horizontal = threshold
-
-    verticalsize = vertical.shape[0] / scale
-    horizontalsize = horizontal.shape[1] / scale
-
-    ver = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
-    hor = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontalsize, 1))
-
-    vertical = cv2.erode(vertical, ver, (-1, -1))
-    vertical = cv2.dilate(vertical, ver, (-1, -1))
-
-    horizontal = cv2.erode(horizontal, hor, (-1, -1))
-    horizontal = cv2.dilate(horizontal, hor, (-1, -1))
-
-    mask = vertical + horizontal
-    joints = np.bitwise_and(vertical, horizontal)
-    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
-
-    tables = {}
-    for c in contours:
-        x, y, w, h = cv2.boundingRect(c)
-        x1, x2 = x, x + w
-        y1, y2 = y, y + h
-        # find number of non-zero values in joints using what boundingRect returns
-        roi = joints[y:y+h, x:x+w]
-        jc, _ = cv2.findContours(roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
-        if len(jc) <= 4: # remove contours with less than <=4 joints
-            continue
-        joint_coords = []
-        for j in jc:
-            jx, jy, jw, jh = cv2.boundingRect(j)
-            c1, c2 = x + (2*jx + jw) / 2, y + (2*jy + jh) / 2
-            joint_coords.append((c1, c2))
-        tables[(x1, y2, x2, y1)] = joint_coords
-
-    vcontours, _ = cv2.findContours(vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    for vc in vcontours:
-        x, y, w, h = cv2.boundingRect(vc)
-        x1, x2 = x, x + w
-        y1, y2 = y, y + h
-        plt.plot([(x1 + x2) / 2, (x1 + x2) / 2], [y2, y1])
-
-    hcontours, _ = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    for hc in hcontours:
-        x, y, w, h = cv2.boundingRect(hc)
-        x1, x2 = x, x + w
-        y1, y2 = y, y + h
-        plt.plot([x1, x2], [(y1 + y2) / 2, (y1 + y2) / 2])
-
-    x_coord = []
-    y_coord = []
-    for k in tables.keys():
-        for coord in tables[k]:
-            x_coord.append(coord[0])
-            y_coord.append(coord[1])
-    plt.plot(x_coord, y_coord, 'ro')
-
-    plt.imshow(img)
-    plt.show()
-    return tables
-
-
-@timeit
-def main():
-    try:
-        scale = int(sys.argv[2].split('=')[1])
-    except IndexError:
-        scale = 40
-    try:
-        invert = bool(sys.argv[3].split('=')[1])
-    except IndexError:
-        invert = False
-    t = mt(sys.argv[1], scale=scale, invert=invert)
-    print 'tables found: ', len(t.keys())
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
diff --git a/debug/plot_geo.py b/debug/plot_geo.py
deleted file mode 100644
index 3f7aaf9..0000000
--- a/debug/plot_geo.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""
-usage:  python plot_geo.py file.pdf
-        python plot_geo.py file.pdf file.png
-
-prints lines and rectangles present in a pdf file.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-from pdfminer.pdfpage import PDFPage
-from pdfminer.pdfdevice import PDFDevice
-from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.converter import PDFPageAggregator
-from pdfminer.pdfinterp import PDFResourceManager
-from pdfminer.pdfinterp import PDFPageInterpreter
-from pdfminer.layout import LAParams, LTLine, LTRect
-from pdfminer.pdfpage import PDFTextExtractionNotAllowed
-
-
-MIN_LENGTH = 1
-pdf_x, pdf_y, image_x, image_y = [0] * 4
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def remove_coords(coords):
-    merged = []
-    for coord in coords:
-        if not merged:
-            merged.append(coord)
-        else:
-            last = merged[-1]
-            if np.isclose(last, coord, atol=2):
-                pass
-            else:
-                merged.append(coord)
-    return merged
-
-
-def parse_layout(pdfname):
-    global pdf_x, pdf_y
-    def is_horizontal(line):
-        if line[0] == line[2]:
-            return True
-        return False
-
-    def is_vertical(line):
-        if line[1] == line[3]:
-            return True
-        return False
-
-    vertical, horizontal = [], []
-    with open(pdfname, 'rb') as f:
-        parser = PDFParser(f)
-        document = PDFDocument(parser)
-        if not document.is_extractable:
-            raise PDFTextExtractionNotAllowed
-        laparams = LAParams()
-        rsrcmgr = PDFResourceManager()
-        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
-        interpreter = PDFPageInterpreter(rsrcmgr, device)
-        for page in PDFPage.create_pages(document):
-            interpreter.process_page(page)
-            layout = device.get_result()
-            pdf_x, pdf_y = layout.bbox[2], layout.bbox[3]
-            for obj in layout._objs:
-                if isinstance(obj, LTLine):
-                    line = (obj.x0, obj.y0, obj.x1, obj.y1)
-                    if is_vertical(line):
-                        vertical.append(line)
-                    elif is_horizontal(line):
-                        horizontal.append(line)
-                elif isinstance(obj, LTRect):
-                    vertical.append((obj.x0, obj.y1, obj.x0, obj.y0))
-                    vertical.append((obj.x1, obj.y1, obj.x1, obj.y0))
-                    horizontal.append((obj.x0, obj.y1, obj.x1, obj.y1))
-                    horizontal.append((obj.x0, obj.y0, obj.x1, obj.y0))
-    return vertical, horizontal
-
-
-def hough_transform(imagename):
-    global pdf_x, pdf_y, image_x, image_y
-    img = cv2.imread(imagename)
-    image_x, image_y = img.shape[1], img.shape[0]
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
-    lines = cv2.HoughLines(edges, 1, np.pi/180, 1000)
-    x = []
-    for line in lines:
-        r, theta = line[0]
-        x0 = r * np.cos(theta)
-        x0 *= pdf_x / float(image_x)
-        x.append(x0)
-    y = []
-    for line in lines:
-        r, theta = line[0]
-        y0 = r * np.sin(theta)
-        y0 = abs(y0 - image_y)
-        y0 *= pdf_y / float(image_y)
-        y.append(y0)
-    x = remove_coords(sorted(set([x0 for x0 in x if x0 > 0])))
-    y = remove_coords(sorted(set(y), reverse=True))
-    return x, y
-
-
-def plot_lines1(vertical, horizontal):
-    fig = plt.figure()
-    ax = fig.add_subplot(111, aspect='equal')
-    ax.set_xlim(0, 1000)
-    ax.set_ylim(0, 1000)
-
-    vertical = filter(lambda x: abs(x[1] - x[3]) > MIN_LENGTH, vertical)
-    horizontal = filter(lambda x: abs(x[0] - x[2]) > MIN_LENGTH, horizontal)
-    for v in vertical:
-        ax.plot([v[0], v[2]], [v[1], v[3]])
-    for h in horizontal:
-        ax.plot([h[0], h[2]], [h[1], h[3]])
-    plt.show()
-
-
-def plot_lines2(imagename, vertical, horizontal):
-    x, y = hough_transform(imagename)
-    fig = plt.figure()
-    ax = fig.add_subplot(111, aspect='equal')
-    ax.set_xlim(0, 1000)
-    ax.set_ylim(0, 1000)
-
-    for x0 in x:
-        for v in vertical:
-            if np.isclose(x0, v[0], atol=2):
-                ax.plot([v[0], v[2]], [v[1], v[3]])
-    for y0 in y:
-        for h in horizontal:
-            if np.isclose(y0, h[1], atol=2):
-                ax.plot([h[0], h[2]], [h[1], h[3]])
-    plt.show()
-
-
-@timeit
-def main():
-    vertical, horizontal = parse_layout(sys.argv[1])
-    if len(sys.argv) == 2:
-        plot_lines1(vertical, horizontal)
-    elif len(sys.argv) == 3:
-        plot_lines1(vertical, horizontal)
-        plot_lines2(sys.argv[2], vertical, horizontal)
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
\ No newline at end of file
diff --git a/debug/plot_intensity.py b/debug/plot_intensity.py
deleted file mode 100644
index 87c386b..0000000
--- a/debug/plot_intensity.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-usage: python plot_intensity.py file.png threshold
-
-plots sum of pixel intensities on both axes for an image.
-"""
-import sys
-import time
-from itertools import groupby
-from operator import itemgetter
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-from pylab import barh
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def plot_barchart(ar):
-    n = len(ar)
-    ind = np.arange(n)
-    width = 0.35
-    plt.bar(ind, ar, width, color='r', zorder=1)
-    plt.show()
-
-
-def merge_lines(lines):
-    ranges = []
-    for k, g in groupby(enumerate(lines), lambda (i, x): i-x):
-        group = map(itemgetter(1), g)
-        ranges.append((group[0], group[-1]))
-    merged = []
-    for r in ranges:
-        merged.append((r[0] + r[1]) / 2)
-    return merged
-
-
-def plot_lines(image, lines):
-    for y in lines:
-        plt.plot([0, image.shape[1]], [y, y])
-    plt.imshow(image)
-    plt.show()
-
-
-@timeit
-def main():
-    image = cv2.imread(sys.argv[1])
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    threshold = cv2.adaptiveThreshold(np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -2)
-    y_proj = np.sum(threshold, axis=1)
-    line_threshold = int(sys.argv[2])
-    lines = np.where(y_proj < line_threshold)[0]
-    lines = merge_lines(lines)
-    plot_lines(image, lines)
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
diff --git a/debug/print_text.py b/debug/print_text.py
deleted file mode 100644
index 1ab83d2..0000000
--- a/debug/print_text.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-usage: python print_text.py file.pdf
-
-prints horizontal and vertical text lines present in a pdf file.
-"""
-
-import sys
-import time
-from pprint import pprint
-
-from pdfminer.layout import LAParams
-from pdfminer.pdfpage import PDFPage
-from pdfminer.pdfdevice import PDFDevice
-from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.converter import PDFPageAggregator
-from pdfminer.pdfinterp import PDFPageInterpreter
-from pdfminer.pdfinterp import PDFResourceManager
-from pdfminer.pdfpage import PDFTextExtractionNotAllowed
-from pdfminer.layout import (LAParams, LTChar, LTAnno, LTTextBoxHorizontal,
-                             LTTextLineHorizontal, LTTextLineVertical, LTLine)
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def extract_text_objects(layout, LTObject, t=None):
-    if t is None:
-        t = []
-    try:
-        for obj in layout._objs:
-            if isinstance(obj, LTObject):
-                t.append(obj)
-            else:
-                t += extract_text_objects(obj, LTObject)
-    except AttributeError:
-        pass
-    return t
-
-
-@timeit
-def main():
-    with open(sys.argv[1], 'rb') as f:
-        parser = PDFParser(f)
-        document = PDFDocument(parser)
-        if not document.is_extractable:
-            raise PDFTextExtractionNotAllowed
-        # 2.0, 0.5, 0.1
-        kwargs = {
-            'char_margin': 1.0,
-            'line_margin': 0.5,
-            'word_margin': 0.1,
-            'detect_vertical': True
-        }
-        laparams = LAParams(**kwargs)
-        rsrcmgr = PDFResourceManager()
-        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
-        interpreter = PDFPageInterpreter(rsrcmgr, device)
-        for page in PDFPage.create_pages(document):
-            interpreter.process_page(page)
-            layout = device.get_result()
-            lh = extract_text_objects(layout, LTTextLineHorizontal)
-            lv = extract_text_objects(layout, LTTextLineVertical)
-            print "number of horizontal text lines -> {0}".format(len(lh))
-            print "horizontal text lines ->"
-            pprint([t.get_text() for t in lh])
-            print "number of vertical text lines -> {0}".format(len(lv))
-            print "vertical text lines ->"
-            pprint([t.get_text() for t in lv])
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
\ No newline at end of file
diff --git a/debug/threshold.py b/debug/threshold.py
deleted file mode 100644
index ea716b2..0000000
--- a/debug/threshold.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""
-usage: python threshold.py file.png blocksize threshold_constant
-
-shows thresholded image.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    img = cv2.imread(sys.argv[1])
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    blocksize = int(sys.argv[2])
-    threshold_constant = float(sys.argv[3])
-    threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
-        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, threshold_constant)
-    plt.imshow(img)
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
\ No newline at end of file
diff --git a/docs/api.rst b/docs/api.rst
index 99a9e7f..3bd0f3d 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -4,17 +4,37 @@
 API Reference
 =============
 
-Pdf
-===
-.. automodule:: camelot.pdf
+camelot.read_pdf
+================
+.. automodule:: camelot.read_pdf
    :members:
 
-Lattice
-=======
-.. automodule:: camelot.lattice
+camelot.handlers.PDFHandler
+===========================
+.. automodule:: camelot.handlers.PDFHandler
    :members:
 
-Stream
-======
-.. automodule:: camelot.stream
+camelot.parsers.Stream
+======================
+.. automodule:: camelot.parsers.Stream
+   :members:
+
+camelot.parsers.Lattice
+=======================
+.. automodule:: camelot.parsers.Lattice
+   :members:
+
+camelot.core.Cell
+=================
+.. automodule:: camelot.core.Cell
+   :members:
+
+camelot.core.Table
+==================
+.. automodule:: camelot.core.Table
+   :members:
+
+camelot.core.TableList
+======================
+.. automodule:: camelot.core.TableList
    :members:
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index b186ad7..4b91c69 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,11 +3,11 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-==================================
-Camelot: pdf parsing made simpler!
-==================================
+=====================================
+Camelot: PDF Table Parsing for Humans
+=====================================
 
-Camelot is a Python 2.7 library and command-line tool for getting tables out of pdf files.
+Camelot is a Python 2.7 library and command-line tool for extracting tabular data from PDF files.
 
 Why another pdf table parsing library?
 ======================================
@@ -32,12 +32,22 @@ Usage
 
 ::
 
-    >>> from camelot.pdf import Pdf
-    >>> from camelot.lattice import Lattice
-
-    >>> manager = Pdf(Lattice(), 'us-030.pdf')
-    >>> tables = manager.extract()
-    >>> print tables['page-1']['table-1']['data']
+    >>> import camelot
+    >>> tables = camelot.read_pdf("foo.pdf")
+    >>> tables
+    <TableList n=2>
+    >>> tables.export("foo.csv", f="csv", compress=True) # json, excel, html
+    >>> tables[0]
+    <Table shape=(3,4)>
+    >>> tables[0].to_csv("foo.csv") # to_json, to_excel, to_html
+    >>> tables[0].parsing_report
+    {
+        "accuracy": 96,
+        "whitespace": 80,
+        "order": 1,
+        "page": 1
+    }
+    >>> df = tables[0].df
 
 .. csv-table::
    :header: "Cycle Name","KI (1/km)","Distance (mi)","Percent Fuel Savings","","",""
@@ -49,45 +59,6 @@ Usage
    "2032_2","0.17","57.8","21.7%","0.3%","2.7%","1.2%"
    "4171_1","0.07","173.9","58.1%","1.6%","2.1%","0.5%"
 
-Camelot comes with a CLI where you can specify page numbers, output format, output directory etc. By default, the output files are placed in the same directory as the PDF.
-
-::
-
-    Camelot: PDF parsing made simpler!
-
-    usage:
-     camelot [options] <method> [<args>...]
-
-    options:
-     -h, --help                Show this screen.
-     -v, --version             Show version.
-     -V, --verbose             Verbose.
-     -p, --pages <pageno>      Comma-separated list of page numbers.
-                               Example: -p 1,3-6,10  [default: 1]
-     -P, --parallel            Parallelize the parsing process.
-     -f, --format <format>     Output format. (csv,tsv,html,json,xlsx) [default: csv]
-     -l, --log                 Log to file.
-     -o, --output <directory>  Output directory.
-     -M, --cmargin <cmargin>   Char margin. Chars closer than cmargin are
-                               grouped together to form a word. [default: 1.0]
-     -L, --lmargin <lmargin>   Line margin. Lines closer than lmargin are
-                               grouped together to form a textbox. [default: 0.5]
-     -W, --wmargin <wmargin>   Word margin. Insert blank spaces between chars
-                               if distance between words is greater than word
-                               margin. [default: 0.1]
-     -J, --split_text          Split text lines if they span across multiple cells.
-     -K, --flag_size           Flag substring if its size differs from the whole string.
-                               Useful for super and subscripts.
-     -X, --print-stats         List stats on the parsing process.
-     -Y, --save-stats          Save stats to a file.
-     -Z, --plot <dist>         Plot distributions. (page,all,rc)
-
-    camelot methods:
-     lattice  Looks for lines between data.
-     stream   Looks for spaces between data.
-
-    See 'camelot <method> -h' for more information on a specific method.
-
 Installation
 ============
 
@@ -95,42 +66,41 @@ Make sure you have the most updated versions for `pip` and `setuptools`. You can
 
     pip install -U pip setuptools
 
-The required dependencies include `numpy`_, `OpenCV`_ and `ImageMagick`_.
+The dependencies include `tk`_ and `ghostscript`_.
 
-.. _numpy: http://www.numpy.org/
-.. _OpenCV: http://opencv.org/
-.. _ImageMagick: http://www.imagemagick.org/script/index.php
+.. _tk: https://wiki.tcl.tk/3743
+.. _ghostscript: https://www.ghostscript.com/
 
 Installing dependencies
 -----------------------
 
-numpy can be install using `pip`. OpenCV and imagemagick can be installed using your system's default package manager.
+tk and ghostscript can be installed using your system's default package manager.
 
 Linux
 ^^^^^
 
-* Arch Linux
-
-::
-
-    sudo pacman -S opencv imagemagick
-
 * Ubuntu
 
 ::
 
-    sudo apt-get install libopencv-dev python-opencv imagemagick
+    sudo apt-get install python-opencv python-tk ghostscript
+
+* Arch Linux
+
+::
+
+    sudo pacman -S opencv tk ghostscript
 
 OS X
 ^^^^
 
 ::
 
-    brew install homebrew/science/opencv imagemagick
+    brew install homebrew/science/opencv ghostscript
 
 Finally, `cd` into the project directory and install by::
 
-    make install
+    python setup.py install
 
 API Reference
 =============
@@ -150,14 +120,14 @@ You can check the latest sources with the command::
 Contributing
 ------------
 
-See :doc:`Contributing doc <contributing>`.
+See :doc:`Contributing guidelines <contributing>`.
 
 Testing
 -------
 
 ::
 
-    make test
+    python setup.py test
 
 License
 =======
diff --git a/examples/demo_lattice.py b/examples/demo_lattice.py
deleted file mode 100644
index b3ff2ea..0000000
--- a/examples/demo_lattice.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/column_span_1.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/column_span_2.pdf"), clean=True, scale=30)
-tables = extractor.get_tables()
-print tables
diff --git a/examples/demo_lattice_fill.py b/examples/demo_lattice_fill.py
deleted file mode 100644
index 3546b00..0000000
--- a/examples/demo_lattice_fill.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(
-    Pdf("files/row_span_1.pdf", clean=True), fill='v', scale=40)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(
-    Pdf("files/row_span_2.pdf", clean=True), fill='v', scale=30)
-tables = extractor.get_tables()
-print tables
diff --git a/examples/demo_lattice_invert.py b/examples/demo_lattice_invert.py
deleted file mode 100644
index a0bf41e..0000000
--- a/examples/demo_lattice_invert.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/lines_in_background_1.pdf",
-                        clean=True), scale=30, invert=True)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/lines_in_background_2.pdf",
-                        clean=True), scale=30, invert=True)
-tables = extractor.get_tables()
-print tables
diff --git a/examples/demo_lattice_rotation.py b/examples/demo_lattice_rotation.py
deleted file mode 100644
index d201cf1..0000000
--- a/examples/demo_lattice_rotation.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/left_rotated_table.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/right_rotated_table.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
diff --git a/examples/demo_lattice_twotables.py b/examples/demo_lattice_twotables.py
deleted file mode 100644
index 91c6b93..0000000
--- a/examples/demo_lattice_twotables.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/twotables_1.pdf", clean=True), scale=40)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/twotables_2.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
diff --git a/examples/demo_stream.py b/examples/demo_stream.py
deleted file mode 100644
index baee02f..0000000
--- a/examples/demo_stream.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from camelot import Pdf
-from camelot import Stream
-
-
-extractor = Stream(Pdf("files/budget_2014-15.pdf",
-                       char_margin=1.0, clean=True))
-tables = extractor.get_tables()
-print tables
diff --git a/examples/demo_stream_columns.py b/examples/demo_stream_columns.py
deleted file mode 100644
index 79cc6cb..0000000
--- a/examples/demo_stream_columns.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from camelot import Pdf
-from camelot import Stream
-
-
-extractor = Stream(Pdf("files/inconsistent_rows.pdf", char_margin=1.0),
-                   columns="65,95,285,640,715,780", ytol=10)
-tables = extractor.get_tables()
-print tables
-
-extractor = Stream(Pdf("files/consistent_rows.pdf", char_margin=1.0),
-                   columns="28,67,180,230,425,475,700", ytol=5)
-tables = extractor.get_tables()
-print tables
diff --git a/examples/files/consistent_rows.pdf b/examples/files/consistent_rows.pdf
deleted file mode 100644
index e0213aa..0000000
Binary files a/examples/files/consistent_rows.pdf and /dev/null differ
diff --git a/examples/files/inconsistent_rows.pdf b/examples/files/inconsistent_rows.pdf
deleted file mode 100644
index 9eb4b63..0000000
Binary files a/examples/files/inconsistent_rows.pdf and /dev/null differ
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..d907a0b
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,11 @@
+click==6.7
+matplotlib==2.2.3
+numpy==1.13.3
+opencv-python==3.4.2.17
+pandas==0.23.4
+pdfminer==20140328
+Pillow==5.2.0
+PyPDF2==1.26.0
+pytest==3.8.0
+pytest-runner==4.2
+Sphinx==1.8.0b1
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 826e271..d1a33b7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,8 @@
-docopt
-matplotlib
-nose
-pdfminer
-pyexcel-xlsx
-Pillow
-pyocr
-PyPDF2
-Sphinx
+click==6.7
+matplotlib==2.2.3
+numpy==1.13.3
+opencv-python==3.4.2.17
+pandas==0.23.4
+pdfminer==20140328
+Pillow==5.2.0
+PyPDF2==1.26.0
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..730f976
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[aliases]
+test=pytest
+
+[tool:pytest]
+addopts = --verbose
+python_files = tests/test_*.py
diff --git a/setup.py b/setup.py
index 1d1d4ec..14c0516 100644
--- a/setup.py
+++ b/setup.py
@@ -4,12 +4,12 @@ import camelot
 
 NAME = 'camelot'
 VERSION = camelot.__version__
-DESCRIPTION = 'camelot parses tables from PDFs!'
+DESCRIPTION = 'PDF Table Parsing for Humans'
 with open('README.md') as f:
     LONG_DESCRIPTION = f.read()
 URL = 'https://github.com/socialcopsdev/camelot'
 AUTHOR = 'Vinayak Mehta'
-AUTHOR_EMAIL = 'vinayak@socialcops.com'
+AUTHOR_EMAIL = 'vmehta94@gmail.com'
 LICENSE = 'BSD License'
 
 opencv_min_version = '2.4.8'
@@ -48,10 +48,8 @@ def setup_package():
                     author=AUTHOR,
                     author_email=AUTHOR_EMAIL,
                     license=LICENSE,
-                    keywords='parse scrape pdf table',
                     packages=['camelot'],
-                    install_requires=reqs,
-                    scripts=['tools/camelot'])
+                    install_requires=reqs)
 
     try:
         from setuptools import setup
@@ -60,18 +58,14 @@ def setup_package():
 
     opencv_status = get_opencv_status()
     opencv_req_str = "camelot requires OpenCV >= {0}.\n".format(opencv_min_version)
-    instructions = ("Installation instructions are available in the README at "
-                    "https://github.com/socialcopsdev/camelot")
 
     if opencv_status['up_to_date'] is False:
         if opencv_status['version']:
-            raise ImportError("Your installation of OpenCV "
-                              "{0} is out-of-date.\n{1}{2}"
-                              .format(opencv_status['version'],
-                                      opencv_req_str, instructions))
+            raise ImportError("Your installation of OpenCV {} is out-of-date.\n{}"
+                              .format(opencv_status['version'], opencv_req_str))
         else:
-            raise ImportError("OpenCV is not installed.\n{0}{1}"
-                              .format(opencv_req_str, instructions))
+            raise ImportError("OpenCV is not installed.\n{}"
+                              .format(opencv_req_str))
 
     setup(**metadata)
 
diff --git a/tests/budget_2014-15.pdf b/tests/budget_2014-15.pdf
deleted file mode 100644
index 9466e87..0000000
Binary files a/tests/budget_2014-15.pdf and /dev/null differ
diff --git a/tests/column_span_1.pdf b/tests/column_span_1.pdf
deleted file mode 100644
index e7c164e..0000000
Binary files a/tests/column_span_1.pdf and /dev/null differ
diff --git a/tests/column_span_2.pdf b/tests/column_span_2.pdf
deleted file mode 100644
index 5cab903..0000000
Binary files a/tests/column_span_2.pdf and /dev/null differ
diff --git a/tests/agstat.pdf b/tests/files/agstat.pdf
similarity index 100%
rename from tests/agstat.pdf
rename to tests/files/agstat.pdf
diff --git a/examples/files/left_rotated_table.pdf b/tests/files/anticlockwise_table_1.pdf
similarity index 100%
rename from examples/files/left_rotated_table.pdf
rename to tests/files/anticlockwise_table_1.pdf
diff --git a/tests/left_rotated_table_2.pdf b/tests/files/anticlockwise_table_2.pdf
similarity index 100%
rename from tests/left_rotated_table_2.pdf
rename to tests/files/anticlockwise_table_2.pdf
diff --git a/tests/assam.pdf b/tests/files/assam.pdf
similarity index 100%
rename from tests/assam.pdf
rename to tests/files/assam.pdf
diff --git a/examples/files/lines_in_background_1.pdf b/tests/files/background_lines_1.pdf
similarity index 100%
rename from examples/files/lines_in_background_1.pdf
rename to tests/files/background_lines_1.pdf
diff --git a/examples/files/lines_in_background_2.pdf b/tests/files/background_lines_2.pdf
similarity index 100%
rename from examples/files/lines_in_background_2.pdf
rename to tests/files/background_lines_2.pdf
diff --git a/examples/files/budget_2014-15.pdf b/tests/files/budget_2014-15.pdf
similarity index 100%
rename from examples/files/budget_2014-15.pdf
rename to tests/files/budget_2014-15.pdf
diff --git a/examples/files/right_rotated_table.pdf b/tests/files/clockwise_table_1.pdf
similarity index 100%
rename from examples/files/right_rotated_table.pdf
rename to tests/files/clockwise_table_1.pdf
diff --git a/tests/right_rotated_table_2.pdf b/tests/files/clockwise_table_2.pdf
similarity index 100%
rename from tests/right_rotated_table_2.pdf
rename to tests/files/clockwise_table_2.pdf
diff --git a/examples/files/column_span_1.pdf b/tests/files/column_span_1.pdf
similarity index 100%
rename from examples/files/column_span_1.pdf
rename to tests/files/column_span_1.pdf
diff --git a/examples/files/column_span_2.pdf b/tests/files/column_span_2.pdf
similarity index 100%
rename from examples/files/column_span_2.pdf
rename to tests/files/column_span_2.pdf
diff --git a/tests/district_health.pdf b/tests/files/district_health.pdf
similarity index 100%
rename from tests/district_health.pdf
rename to tests/files/district_health.pdf
diff --git a/tests/electoral_roll.pdf b/tests/files/electoral_roll.pdf
similarity index 100%
rename from tests/electoral_roll.pdf
rename to tests/files/electoral_roll.pdf
diff --git a/tests/health.pdf b/tests/files/health.pdf
similarity index 100%
rename from tests/health.pdf
rename to tests/files/health.pdf
diff --git a/tests/medicine.pdf b/tests/files/medicine.pdf
similarity index 100%
rename from tests/medicine.pdf
rename to tests/files/medicine.pdf
diff --git a/tests/mexican_towns.pdf b/tests/files/mexican_towns.pdf
similarity index 100%
rename from tests/mexican_towns.pdf
rename to tests/files/mexican_towns.pdf
diff --git a/examples/files/missing_values.pdf b/tests/files/missing_values.pdf
similarity index 100%
rename from examples/files/missing_values.pdf
rename to tests/files/missing_values.pdf
diff --git a/tests/population_growth.pdf b/tests/files/population_growth.pdf
similarity index 100%
rename from tests/population_growth.pdf
rename to tests/files/population_growth.pdf
diff --git a/tests/rainfall_distribution.pdf b/tests/files/rainfall_distribution.pdf
similarity index 100%
rename from tests/rainfall_distribution.pdf
rename to tests/files/rainfall_distribution.pdf
diff --git a/examples/files/row_span_1.pdf b/tests/files/row_span_1.pdf
similarity index 100%
rename from examples/files/row_span_1.pdf
rename to tests/files/row_span_1.pdf
diff --git a/examples/files/row_span_2.pdf b/tests/files/row_span_2.pdf
similarity index 100%
rename from examples/files/row_span_2.pdf
rename to tests/files/row_span_2.pdf
diff --git a/tests/row_span_3.pdf b/tests/files/row_span_3.pdf
similarity index 100%
rename from tests/row_span_3.pdf
rename to tests/files/row_span_3.pdf
diff --git a/tests/tableception.pdf b/tests/files/tableception.pdf
similarity index 100%
rename from tests/tableception.pdf
rename to tests/files/tableception.pdf
diff --git a/tests/tabula_test_pdfs/12s0324.pdf b/tests/files/tabula/12s0324.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/12s0324.pdf
rename to tests/files/tabula/12s0324.pdf
diff --git a/tests/tabula_test_pdfs/20.pdf b/tests/files/tabula/20.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/20.pdf
rename to tests/files/tabula/20.pdf
diff --git a/tests/tabula_test_pdfs/S2MNCEbirdisland.pdf b/tests/files/tabula/S2MNCEbirdisland.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/S2MNCEbirdisland.pdf
rename to tests/files/tabula/S2MNCEbirdisland.pdf
diff --git a/tests/tabula_test_pdfs/arabic.pdf b/tests/files/tabula/arabic.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/arabic.pdf
rename to tests/files/tabula/arabic.pdf
diff --git a/tests/tabula_test_pdfs/argentina_diputados_voting_record.pdf b/tests/files/tabula/argentina_diputados_voting_record.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/argentina_diputados_voting_record.pdf
rename to tests/files/tabula/argentina_diputados_voting_record.pdf
diff --git a/tests/tabula_test_pdfs/campaign_donors.pdf b/tests/files/tabula/campaign_donors.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/campaign_donors.pdf
rename to tests/files/tabula/campaign_donors.pdf
diff --git a/tests/tabula_test_pdfs/china.pdf b/tests/files/tabula/china.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/china.pdf
rename to tests/files/tabula/china.pdf
diff --git a/tests/tabula_test_pdfs/eu-002.pdf b/tests/files/tabula/eu-002.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/eu-002.pdf
rename to tests/files/tabula/eu-002.pdf
diff --git a/tests/tabula_test_pdfs/eu-017.pdf b/tests/files/tabula/eu-017.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/eu-017.pdf
rename to tests/files/tabula/eu-017.pdf
diff --git a/tests/tabula_test_pdfs/failing_sort.pdf b/tests/files/tabula/failing_sort.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/failing_sort.pdf
rename to tests/files/tabula/failing_sort.pdf
diff --git a/tests/tabula_test_pdfs/frx_2012_disclosure.pdf b/tests/files/tabula/frx_2012_disclosure.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/frx_2012_disclosure.pdf
rename to tests/files/tabula/frx_2012_disclosure.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-001.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-012.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-013.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-014.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-015.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-016.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-017.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-018.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-019.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-020.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-021.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-022.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-023.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-024.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-025.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-026.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-027.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-001.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-002.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-003.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-004.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-005.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-006.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-007.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-008.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-009.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-010.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011a.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011b-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011b-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-011b-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-011b-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-012.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-013.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-014.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-015.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-016.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-017.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-018.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-019.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-020.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-021.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-022.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-023.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-024.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-025.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-026.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-027.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-028.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-029.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031a.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031b-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031b-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031b-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-031b-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-031b-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-032.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-033.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-034.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035a.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035b-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035b-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035b-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-035b-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-035b-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-036.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-037.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-038.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-039.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040-reg.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040-reg.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040-reg.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040-reg.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040-str.xml b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040-str.xml
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040-str.xml
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040-str.xml
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040.json b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040.json
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040.json
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040.json
diff --git a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040.pdf b/tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-040.pdf
rename to tests/files/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf
diff --git a/tests/tabula_test_pdfs/indictb1h_14.pdf b/tests/files/tabula/indictb1h_14.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/indictb1h_14.pdf
rename to tests/files/tabula/indictb1h_14.pdf
diff --git a/tests/tabula_test_pdfs/labor.pdf b/tests/files/tabula/labor.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/labor.pdf
rename to tests/files/tabula/labor.pdf
diff --git a/tests/tabula_test_pdfs/m27.pdf b/tests/files/tabula/m27.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/m27.pdf
rename to tests/files/tabula/m27.pdf
diff --git a/tests/tabula_test_pdfs/mednine.pdf b/tests/files/tabula/mednine.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/mednine.pdf
rename to tests/files/tabula/mednine.pdf
diff --git a/tests/tabula_test_pdfs/offense.pdf b/tests/files/tabula/offense.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/offense.pdf
rename to tests/files/tabula/offense.pdf
diff --git a/tests/tabula_test_pdfs/puertos1.pdf b/tests/files/tabula/puertos1.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/puertos1.pdf
rename to tests/files/tabula/puertos1.pdf
diff --git a/tests/tabula_test_pdfs/rotated_page.pdf b/tests/files/tabula/rotated_page.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/rotated_page.pdf
rename to tests/files/tabula/rotated_page.pdf
diff --git a/tests/tabula_test_pdfs/schools.pdf b/tests/files/tabula/schools.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/schools.pdf
rename to tests/files/tabula/schools.pdf
diff --git a/tests/tabula_test_pdfs/should_detect_rulings.pdf b/tests/files/tabula/should_detect_rulings.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/should_detect_rulings.pdf
rename to tests/files/tabula/should_detect_rulings.pdf
diff --git a/tests/tabula_test_pdfs/sort_exception.pdf b/tests/files/tabula/sort_exception.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/sort_exception.pdf
rename to tests/files/tabula/sort_exception.pdf
diff --git a/tests/tabula_test_pdfs/spanning_cells.pdf b/tests/files/tabula/spanning_cells.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/spanning_cells.pdf
rename to tests/files/tabula/spanning_cells.pdf
diff --git a/tests/tabula_test_pdfs/spreadsheet_no_bounding_frame.pdf b/tests/files/tabula/spreadsheet_no_bounding_frame.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/spreadsheet_no_bounding_frame.pdf
rename to tests/files/tabula/spreadsheet_no_bounding_frame.pdf
diff --git a/tests/tabula_test_pdfs/sydney_disclosure_contract.pdf b/tests/files/tabula/sydney_disclosure_contract.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/sydney_disclosure_contract.pdf
rename to tests/files/tabula/sydney_disclosure_contract.pdf
diff --git a/tests/tabula_test_pdfs/twotables.pdf b/tests/files/tabula/twotables.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/twotables.pdf
rename to tests/files/tabula/twotables.pdf
diff --git a/tests/tabula_test_pdfs/us-007.pdf b/tests/files/tabula/us-007.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/us-007.pdf
rename to tests/files/tabula/us-007.pdf
diff --git a/tests/tabula_test_pdfs/us-017.pdf b/tests/files/tabula/us-017.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/us-017.pdf
rename to tests/files/tabula/us-017.pdf
diff --git a/tests/tabula_test_pdfs/us-024.pdf b/tests/files/tabula/us-024.pdf
similarity index 100%
rename from tests/tabula_test_pdfs/us-024.pdf
rename to tests/files/tabula/us-024.pdf
diff --git a/examples/files/twotables_1.pdf b/tests/files/twotables_1.pdf
similarity index 100%
rename from examples/files/twotables_1.pdf
rename to tests/files/twotables_1.pdf
diff --git a/examples/files/twotables_2.pdf b/tests/files/twotables_2.pdf
similarity index 100%
rename from examples/files/twotables_2.pdf
rename to tests/files/twotables_2.pdf
diff --git a/tests/left_rotated_table_1.pdf b/tests/left_rotated_table_1.pdf
deleted file mode 100644
index 8b7a615..0000000
Binary files a/tests/left_rotated_table_1.pdf and /dev/null differ
diff --git a/tests/lines_in_background_1.pdf b/tests/lines_in_background_1.pdf
deleted file mode 100644
index f23d6b7..0000000
Binary files a/tests/lines_in_background_1.pdf and /dev/null differ
diff --git a/tests/lines_in_background_2.pdf b/tests/lines_in_background_2.pdf
deleted file mode 100644
index b64b2f2..0000000
Binary files a/tests/lines_in_background_2.pdf and /dev/null differ
diff --git a/tests/missing_values.pdf b/tests/missing_values.pdf
deleted file mode 100644
index 90b620f..0000000
Binary files a/tests/missing_values.pdf and /dev/null differ
diff --git a/tests/right_rotated_table_1.pdf b/tests/right_rotated_table_1.pdf
deleted file mode 100644
index 9494465..0000000
Binary files a/tests/right_rotated_table_1.pdf and /dev/null differ
diff --git a/tests/row_span_1.pdf b/tests/row_span_1.pdf
deleted file mode 100644
index ef2c7ce..0000000
Binary files a/tests/row_span_1.pdf and /dev/null differ
diff --git a/tests/row_span_2.pdf b/tests/row_span_2.pdf
deleted file mode 100644
index 39bce84..0000000
Binary files a/tests/row_span_2.pdf and /dev/null differ
diff --git a/tests/test_common.py b/tests/test_common.py
new file mode 100644
index 0000000..52f966a
--- /dev/null
+++ b/tests/test_common.py
@@ -0,0 +1,80 @@
+import os
+
+import pandas as pd
+
+import camelot
+
+from test_data import *
+
+testdir = os.path.dirname(os.path.abspath(__file__))
+testdir = os.path.join(testdir, "files")
+
+
+def test_stream():
+    pass
+
+
+def test_stream_table_rotated():
+    df = pd.DataFrame(data_stream_table_rotated)
+
+    filename = os.path.join(testdir, "clockwise_table_2.pdf")
+    tables = camelot.read_pdf(filename)
+    assert df.equals(tables[0].df)
+
+    filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
+    tables = camelot.read_pdf(filename)
+    assert df.equals(tables[0].df)
+
+
+def test_stream_table_area():
+    df = pd.DataFrame(data_stream_table_area_single)
+
+    filename = os.path.join(testdir, "tabula/us-007.pdf")
+    tables = camelot.read_pdf(filename, table_area=["320,500,573,335"])
+    assert df.equals(tables[0].df)
+
+
+def test_stream_columns():
+    df = pd.DataFrame(data_stream_columns)
+
+    filename = os.path.join(testdir, "mexican_towns.pdf")
+    tables = camelot.read_pdf(
+        filename, columns=["67,180,230,425,475"], row_close_tol=10)
+    assert df.equals(tables[0].df)
+
+
+def test_lattice():
+    df = pd.DataFrame(data_lattice)
+
+    filename = os.path.join(testdir,
+        "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
+    tables = camelot.read_pdf(filename, pages="2", mesh=True)
+    assert df.equals(tables[0].df)
+
+
+def test_lattice_table_rotated():
+    df = pd.DataFrame(data_lattice_table_rotated)
+
+    filename = os.path.join(testdir, "clockwise_table_1.pdf")
+    tables = camelot.read_pdf(filename, mesh=True)
+    assert df.equals(tables[0].df)
+
+    filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
+    tables = camelot.read_pdf(filename, mesh=True)
+    assert df.equals(tables[0].df)
+
+
+def test_lattice_process_background():
+    df = pd.DataFrame(data_lattice_process_background)
+
+    filename = os.path.join(testdir, "background_lines_1.pdf")
+    tables = camelot.read_pdf(filename, mesh=True, process_background=True)
+    assert df.equals(tables[1].df)
+
+
+def test_lattice_copy_text():
+    df = pd.DataFrame(data_lattice_copy_text)
+
+    filename = os.path.join(testdir, "row_span_1.pdf")
+    tables = camelot.read_pdf(filename, mesh=True, line_size_scaling=60, copy_text="v")
+    assert df.equals(tables[0].df)
\ No newline at end of file
diff --git a/tests/test_data.py b/tests/test_data.py
new file mode 100644
index 0000000..3582937
--- /dev/null
+++ b/tests/test_data.py
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+
+data_stream_table_rotated = [
+    ["","","Table 21  Current use of contraception by background characteristics—Continued","","","","","","","","","","","","","","",""],
+    ["","","","","","","Modern method","","","","","","","Traditional method","","","",""],
+    ["","","","Any","","","","","","","Other","Any","","","","Not","","Number"],
+    ["","","Any","modern","Female","Male","","","","Condom/","modern","traditional","","With-","Folk","currently","","of"],
+    ["","Background characteristic","method","method","sterilization","sterilization","Pill","IUD","Injectables","Nirodh","method","method","Rhythm","drawal","method","using","Total","women"],
+    ["","Caste/tribe","","","","","","","","","","","","","","","",""],
+    ["","Scheduled caste","74.8","55.8","42.9","0.9","9.7","0.0","0.2","2.2","0.0","19.0","11.2","7.4","0.4","25.2","100.0","1,363"],
+    ["","Scheduled tribe","59.3","39.0","26.8","0.6","6.4","0.6","1.2","3.5","0.0","20.3","10.4","5.8","4.1","40.7","100.0","256"],
+    ["","Other backward class","71.4","51.1","34.9","0.0","8.6","1.4","0.0","6.2","0.0","20.4","12.6","7.8","0.0","28.6","100.0","211"],
+    ["","Other","71.1","48.8","28.2","0.8","13.3","0.9","0.3","5.2","0.1","22.3","12.9","9.1","0.3","28.9","100.0","3,319"],
+    ["","Wealth index","","","","","","","","","","","","","","","",""],
+    ["","Lowest","64.5","48.6","34.3","0.5","10.5","0.6","0.7","2.0","0.0","15.9","9.9","4.6","1.4","35.5","100.0","1,258"],
+    ["","Second","68.5","50.4","36.2","1.1","11.4","0.5","0.1","1.1","0.0","18.1","11.2","6.7","0.2","31.5","100.0","1,317"],
+    ["","Middle","75.5","52.8","33.6","0.6","14.2","0.4","0.5","3.4","0.1","22.7","13.4","8.9","0.4","24.5","100.0","1,018"],
+    ["","Fourth","73.9","52.3","32.0","0.5","12.5","0.6","0.2","6.3","0.2","21.6","11.5","9.9","0.2","26.1","100.0","908"],
+    ["","Highest","78.3","44.4","19.5","1.0","9.7","1.4","0.0","12.7","0.0","33.8","18.2","15.6","0.0","21.7","100.0","733"],
+    ["","Number of living children","","","","","","","","","","","","","","","",""],
+    ["","No children","25.1","7.6","0.3","0.5","2.0","0.0","0.0","4.8","0.0","17.5","9.0","8.5","0.0","74.9","100.0","563"],
+    ["","1 child","66.5","32.1","3.7","0.7","20.1","0.7","0.1","6.9","0.0","34.3","18.9","15.2","0.3","33.5","100.0","1,190"],
+    ["","1 son","66.8","33.2","4.1","0.7","21.1","0.5","0.3","6.6","0.0","33.5","21.2","12.3","0.0","33.2","100.0","672"],
+    ["","No sons","66.1","30.7","3.1","0.6","18.8","0.8","0.0","7.3","0.0","35.4","15.8","19.0","0.6","33.9","100.0","517"],
+    ["","2 children","81.6","60.5","41.8","0.9","11.6","0.8","0.3","4.8","0.2","21.1","12.2","8.3","0.6","18.4","100.0","1,576"],
+    ["","1 or more sons","83.7","64.2","46.4","0.9","10.8","0.8","0.4","4.8","0.1","19.5","11.1","7.6","0.7","16.3","100.0","1,268"],
+    ["","No sons","73.2","45.5","23.2","1.0","15.1","0.9","0.0","4.8","0.5","27.7","16.8","11.0","0.0","26.8","100.0","308"],
+    ["","3 children","83.9","71.2","57.7","0.8","9.8","0.6","0.5","1.8","0.0","12.7","8.7","3.3","0.8","16.1","100.0","961"],
+    ["","1 or more sons","85.0","73.2","60.3","0.9","9.4","0.5","0.5","1.6","0.0","11.8","8.1","3.0","0.7","15.0","100.0","860"],
+    ["","No sons","74.7","53.8","35.3","0.0","13.7","1.6","0.0","3.2","0.0","20.9","13.4","6.1","1.5","25.3","100.0","101"],
+    ["","4+ children","74.3","58.1","45.1","0.6","8.7","0.6","0.7","2.4","0.0","16.1","9.9","5.4","0.8","25.7","100.0","944"],
+    ["","1 or more sons","73.9","58.2","46.0","0.7","8.3","0.7","0.7","1.9","0.0","15.7","9.4","5.5","0.8","26.1","100.0","901"],
+    ["","No sons","(82.1)","(57.3)","(25.6)","(0.0)","(17.8)","(0.0)","(0.0)","(13.9)","(0.0)","(24.8)","(21.3)","(3.5)","(0.0)","(17.9)","100.0","43"],
+    ["","Total","71.2","49.9","32.2","0.7","11.7","0.6","0.3","4.3","0.1","21.3","12.3","8.4","0.5","28.8","100.0","5,234"],
+    ["","NFHS-2 (1998-99)","66.6","47.3","32.0","1.8","9.2","1.4","na","2.9","na","na","8.7","9.8","na","33.4","100.0","4,116"],
+    ["","NFHS-1 (1992-93)","57.7","37.6","26.5","4.3","3.6","1.3","0.1","1.9","na","na","11.3","8.3","na","42.3","100.0","3,970"],
+    ["","","Note: If more than one method is used, only the most effective method is considered in this tabulation. Total includes women for whom caste/tribe was not known or is missing, who are","","","","","","","","","","","","","","",""],
+    ["","not shown separately.","","","","","","","","","","","","","","","",""],
+    ["","na = Not available","","","","","","","","","","","","","","","",""],
+    ["","","ns = Not shown; see table 2b, footnote 1","","","","","","","","","","","","","","",""],
+    ["","( ) Based on 25-49 unweighted cases.","","","","","","","","","","","","","","","",""],
+    ["","","","","","","","","54","","","","","","","","",""]
+]
+
+
+data_stream_table_area_single = [
+    ["","One Withholding"],
+    ["Payroll Period","Allowance"],
+    ["Weekly","$71.15"],
+    ["Biweekly","142.31"],
+    ["Semimonthly","154.17"],
+    ["Monthly","308.33"],
+    ["Quarterly","925.00"],
+    ["Semiannually","1,850.00"],
+    ["Annually","3,700.00"],
+    ["Daily or Miscellaneous","14.23"],
+    ["(each day of the payroll period)",""]
+]
+
+
+data_stream_columns = [
+    ["Clave","Nombre Entidad","Clave","Nombre Municipio","Clave","Nombre Localidad"],
+    ["Entidad","","Municipio","","Localidad",""],
+    ["01","Aguascalientes","001","Aguascalientes","0094","Granja Adelita"],
+    ["01","Aguascalientes","001","Aguascalientes","0096","Agua Azul"],
+    ["01","Aguascalientes","001","Aguascalientes","0100","Rancho Alegre"],
+    ["01","Aguascalientes","001","Aguascalientes","0102","Los Arbolitos [Rancho]"],
+    ["01","Aguascalientes","001","Aguascalientes","0104","Ardillas de Abajo (Las Ardillas)"],
+    ["01","Aguascalientes","001","Aguascalientes","0106","Arellano"],
+    ["01","Aguascalientes","001","Aguascalientes","0112","Bajío los Vázquez"],
+    ["01","Aguascalientes","001","Aguascalientes","0113","Bajío de Montoro"],
+    ["01","Aguascalientes","001","Aguascalientes","0114","Residencial San Nicolás [Baños la Cantera]"],
+    ["01","Aguascalientes","001","Aguascalientes","0120","Buenavista de Peñuelas"],
+    ["01","Aguascalientes","001","Aguascalientes","0121","Cabecita 3 Marías (Rancho Nuevo)"],
+    ["01","Aguascalientes","001","Aguascalientes","0125","Cañada Grande de Cotorina"],
+    ["01","Aguascalientes","001","Aguascalientes","0126","Cañada Honda [Estación]"],
+    ["01","Aguascalientes","001","Aguascalientes","0127","Los Caños"],
+    ["01","Aguascalientes","001","Aguascalientes","0128","El Cariñán"],
+    ["01","Aguascalientes","001","Aguascalientes","0129","El Carmen [Granja]"],
+    ["01","Aguascalientes","001","Aguascalientes","0135","El Cedazo (Cedazo de San Antonio)"],
+    ["01","Aguascalientes","001","Aguascalientes","0138","Centro de Arriba (El Taray)"],
+    ["01","Aguascalientes","001","Aguascalientes","0139","Cieneguilla (La Lumbrera)"],
+    ["01","Aguascalientes","001","Aguascalientes","0141","Cobos"],
+    ["01","Aguascalientes","001","Aguascalientes","0144","El Colorado (El Soyatal)"],
+    ["01","Aguascalientes","001","Aguascalientes","0146","El Conejal"],
+    ["01","Aguascalientes","001","Aguascalientes","0157","Cotorina de Abajo"],
+    ["01","Aguascalientes","001","Aguascalientes","0162","Coyotes"],
+    ["01","Aguascalientes","001","Aguascalientes","0166","La Huerta (La Cruz)"],
+    ["01","Aguascalientes","001","Aguascalientes","0170","Cuauhtémoc (Las Palomas)"],
+    ["01","Aguascalientes","001","Aguascalientes","0171","Los Cuervos (Los Ojos de Agua)"],
+    ["01","Aguascalientes","001","Aguascalientes","0172","San José [Granja]"],
+    ["01","Aguascalientes","001","Aguascalientes","0176","La Chiripa"],
+    ["01","Aguascalientes","001","Aguascalientes","0182","Dolores"],
+    ["01","Aguascalientes","001","Aguascalientes","0183","Los Dolores"],
+    ["01","Aguascalientes","001","Aguascalientes","0190","El Duraznillo"],
+    ["01","Aguascalientes","001","Aguascalientes","0191","Los Durón"],
+    ["01","Aguascalientes","001","Aguascalientes","0197","La Escondida"],
+    ["01","Aguascalientes","001","Aguascalientes","0201","Brande Vin [Bodegas]"],
+    ["01","Aguascalientes","001","Aguascalientes","0207","Valle Redondo"],
+    ["01","Aguascalientes","001","Aguascalientes","0209","La Fortuna"],
+    ["01","Aguascalientes","001","Aguascalientes","0212","Lomas del Gachupín"],
+    ["01","Aguascalientes","001","Aguascalientes","0213","El Carmen (Gallinas Güeras) [Rancho]"],
+    ["01","Aguascalientes","001","Aguascalientes","0216","La Gloria"],
+    ["01","Aguascalientes","001","Aguascalientes","0226","Hacienda Nueva"],
+]
+
+
+data_lattice = [
+    ["Cycle Name","KI (1/km)","Distance (mi)","Percent Fuel Savings","","",""],
+    ["","","","Improved Speed","Decreased Accel","Eliminate Stops","Decreased Idle"],
+    ["2012_2","3.30","1.3","5.9%","9.5%","29.2%","17.4%"],
+    ["2145_1","0.68","11.2","2.4%","0.1%","9.5%","2.7%"],
+    ["4234_1","0.59","58.7","8.5%","1.3%","8.5%","3.3%"],
+    ["2032_2","0.17","57.8","21.7%","0.3%","2.7%","1.2%"],
+    ["4171_1","0.07","173.9","58.1%","1.6%","2.1%","0.5%"]
+]
+
+
+data_lattice_table_rotated = [
+    ["State","Nutritional Assessment  (No. of individuals)","","","","IYCF Practices  (No. of mothers: 2011-12)","Blood Pressure  (No. of adults: 2011-12)","","Fasting  Blood Sugar (No. of adults:2011-12)",""],
+    ["","1975-79","1988-90","1996-97","2011-12","","Men","Women","Men","Women"],
+    ["Kerala","5738","6633","8864","8297","245","2161","3195","1645","2391"],
+    ["Tamil Nadu","7387","10217","5813","7851","413","2134","2858","1119","1739"],
+    ["Karnataka","6453","8138","12606","8958","428","2467","2894","1628","2028"],
+    ["Andhra Pradesh","5844","9920","9545","8300","557","1899","2493","1111","1529"],
+    ["Maharashtra","5161","7796","6883","9525","467","2368","2648","1417","1599"],
+    ["Gujarat","4403","5374","4866","9645","477","2687","3021","2122","2503"],
+    ["Madhya Pradesh","*","*","*","7942","470","1965","2150","1579","1709"],
+    ["Orissa","3756","5540","12024","8473","398","2040","2624","1093","1628"],
+    ["West Bengal","*","*","*","8047","423","2058","2743","1413","2027"],
+    ["Uttar Pradesh","*","*","*","9860","581","2139","2415","1185","1366"],
+    ["Pooled","38742","53618","60601","86898","4459","21918","27041","14312","18519"]
+]
+
+
+data_lattice_process_background = [
+    ["State","Date","Halt stations","Halt days","Persons directly reached(in lakh)","Persons trained","Persons counseled","Persons testedfor HIV"],
+    ["Delhi","1.12.2009","8","17","1.29","3,665","2,409","1,000"],
+    ["Rajasthan","2.12.2009 to 19.12.2009","","","","","",""],
+    ["Gujarat","20.12.2009 to 3.1.2010","6","13","6.03","3,810","2,317","1,453"],
+    ["Maharashtra","4.01.2010 to 1.2.2010","13","26","1.27","5,680","9,027","4,153"],
+    ["Karnataka","2.2.2010 to 22.2.2010","11","19","1.80","5,741","3,658","3,183"],
+    ["Kerala","23.2.2010 to 11.3.2010","9","17","1.42","3,559","2,173","855"],
+    ["Total","","47","92","11.81","22,455","19,584","10,644"]
+]
+
+
+data_lattice_copy_text = [
+    ["Plan Type","County","Plan Name","Totals"],
+    ["GMC","Sacramento","Anthem Blue Cross","164,380"],
+    ["GMC","Sacramento","Health Net","126,547"],
+    ["GMC","Sacramento","Kaiser Foundation","74,620"],
+    ["GMC","Sacramento","Molina Healthcare","59,989"],
+    ["GMC","San Diego","Care 1st Health Plan","71,831"],
+    ["GMC","San Diego","Community Health Group","264,639"],
+    ["GMC","San Diego","Health Net","72,404"],
+    ["GMC","San Diego","Kaiser","50,415"],
+    ["GMC","San Diego","Molina Healthcare","206,430"],
+    ["GMC","Total GMC Enrollment","","1,091,255"],
+    ["COHS","Marin","Partnership Health Plan of CA","36,006"],
+    ["COHS","Mendocino","Partnership Health Plan of CA","37,243"],
+    ["COHS","Napa","Partnership Health Plan of CA","28,398"],
+    ["COHS","Solano","Partnership Health Plan of CA","113,220"],
+    ["COHS","Sonoma","Partnership Health Plan of CA","112,271"],
+    ["COHS","Yolo","Partnership Health Plan of CA","52,674"],
+    ["COHS","Del Norte","Partnership Health Plan of CA","11,242"],
+    ["COHS","Humboldt","Partnership Health Plan of CA","49,911"],
+    ["COHS","Lake","Partnership Health Plan of CA","29,149"],
+    ["COHS","Lassen","Partnership Health Plan of CA","7,360"],
+    ["COHS","Modoc","Partnership Health Plan of CA","2,940"],
+    ["COHS","Shasta","Partnership Health Plan of CA","61,763"],
+    ["COHS","Siskiyou","Partnership Health Plan of CA","16,715"],
+    ["COHS","Trinity","Partnership Health Plan of CA","4,542"],
+    ["COHS","Merced","Central California Alliance for Health","123,907"],
+    ["COHS","Monterey","Central California Alliance for Health","147,397"],
+    ["COHS","Santa Cruz","Central California Alliance for Health","69,458"],
+    ["COHS","Santa Barbara","CenCal","117,609"],
+    ["COHS","San Luis Obispo","CenCal","55,761"],
+    ["COHS","Orange","CalOptima","783,079"],
+    ["COHS","San Mateo","Health Plan of San Mateo","113,202"],
+    ["COHS","Ventura","Gold Coast Health Plan","202,217"],
+    ["COHS","Total COHS Enrollment","","2,176,064"],
+    ["Subtotal for Two-Plan, Regional Model, GMC and COHS","","","10,132,022"],
+    ["PCCM","Los Angeles","AIDS Healthcare Foundation","828"],
+    ["PCCM","San Francisco","Family Mosaic","25"],
+    ["PCCM","Total PHP Enrollment","","853"],
+    ["All Models Total Enrollments","","","10,132,875"],
+    ["Source:   Data Warehouse 12/14/15","","",""]
+]
\ No newline at end of file
diff --git a/tests/test_lattice.py b/tests/test_lattice.py
deleted file mode 100644
index 818e16a..0000000
--- a/tests/test_lattice.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# coding: utf8
-import os
-
-from nose.tools import assert_equal
-
-from camelot.pdf import Pdf
-from camelot.lattice import Lattice
-
-
-testdir = os.path.dirname(os.path.abspath(__file__))
-
-
-def test_lattice_basic():
-
-    data = [
-        ["Cycle Name","KI (1/km)","Distance (mi)","Percent Fuel Savings","","",""],
-        ["","","","Improved Speed","Decreased Accel","Eliminate Stops","Decreased Idle"],
-        ["2012_2","3.30","1.3","5.9%","9.5%","29.2%","17.4%"],
-        ["2145_1","0.68","11.2","2.4%","0.1%","9.5%","2.7%"],
-        ["4234_1","0.59","58.7","8.5%","1.3%","8.5%","3.3%"],
-        ["2032_2","0.17","57.8","21.7%","0.3%","2.7%","1.2%"],
-        ["4171_1","0.07","173.9","58.1%","1.6%","2.1%","0.5%"]
-    ]
-    pdfname = os.path.join(testdir,
-        "tabula_test_pdfs/icdar2013-dataset/competition-dataset-us/us-030.pdf")
-    manager = Pdf(Lattice(), pdfname, pagenos=[{'start': 2, 'end': 2}],
-        clean=True)
-    tables = manager.extract()
-    assert_equal(tables['page-2']['table-1']['data'], data)
-
-
-def test_lattice_fill():
-
-    data = [
-        ["Plan Type","County","Plan Name","Totals"],
-        ["GMC","Sacramento","Anthem Blue Cross","164,380"],
-        ["GMC","Sacramento","Health Net","126,547"],
-        ["GMC","Sacramento","Kaiser Foundation","74,620"],
-        ["GMC","Sacramento","Molina Healthcare","59,989"],
-        ["GMC","San Diego","Care 1st Health Plan","71,831"],
-        ["GMC","San Diego","Community Health Group","264,639"],
-        ["GMC","San Diego","Health Net","72,404"],
-        ["GMC","San Diego","Kaiser","50,415"],
-        ["GMC","San Diego","Molina Healthcare","206,430"],
-        ["GMC","Total GMC Enrollment","","1,091,255"],
-        ["COHS","Marin","Partnership Health Plan of CA","36,006"],
-        ["COHS","Mendocino","Partnership Health Plan of CA","37,243"],
-        ["COHS","Napa","Partnership Health Plan of CA","28,398"],
-        ["COHS","Solano","Partnership Health Plan of CA","113,220"],
-        ["COHS","Sonoma","Partnership Health Plan of CA","112,271"],
-        ["COHS","Yolo","Partnership Health Plan of CA","52,674"],
-        ["COHS","Del Norte","Partnership Health Plan of CA","11,242"],
-        ["COHS","Humboldt","Partnership Health Plan of CA","49,911"],
-        ["COHS","Lake","Partnership Health Plan of CA","29,149"],
-        ["COHS","Lassen","Partnership Health Plan of CA","7,360"],
-        ["COHS","Modoc","Partnership Health Plan of CA","2,940"],
-        ["COHS","Shasta","Partnership Health Plan of CA","61,763"],
-        ["COHS","Siskiyou","Partnership Health Plan of CA","16,715"],
-        ["COHS","Trinity","Partnership Health Plan of CA","4,542"],
-        ["COHS","Merced","Central California Alliance for Health","123,907"],
-        ["COHS","Monterey","Central California Alliance for Health","147,397"],
-        ["COHS","Santa Cruz","Central California Alliance for Health","69,458"],
-        ["COHS","Santa Barbara","CenCal","117,609"],
-        ["COHS","San Luis Obispo","CenCal","55,761"],
-        ["COHS","Orange","CalOptima","783,079"],
-        ["COHS","San Mateo","Health Plan of San Mateo","113,202"],
-        ["COHS","Ventura","Gold Coast Health Plan","202,217"],
-        ["COHS","Total COHS Enrollment","","2,176,064"],
-        ["Subtotal for Two-Plan, Regional Model, GMC and COHS","","","10,132,022"],
-        ["PCCM","Los Angeles","AIDS Healthcare Foundation","828"],
-        ["PCCM","San Francisco","Family Mosaic","25"],
-        ["PCCM","Total PHP Enrollment","","853"],
-        ["All Models Total Enrollments","","","10,132,875"],
-        ["Source:   Data Warehouse 12/14/15","","",""]
-    ]
-    pdfname = os.path.join(testdir, 'row_span_1.pdf')
-    manager = Pdf(Lattice(fill='v', scale=40), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables['page-1']['table-1']['data'], data)
-
-
-def test_lattice_invert():
-
-    data = [
-        ["State","Date","Halt stations","Halt days","Persons directly reached(in lakh)","Persons trained","Persons counseled","Persons testedfor HIV"],
-        ["Delhi","1.12.2009","8","17","1.29","3,665","2,409","1,000"],
-        ["Rajasthan","2.12.2009 to 19.12.2009","","","","","",""],
-        ["Gujarat","20.12.2009 to 3.1.2010","6","13","6.03","3,810","2,317","1,453"],
-        ["Maharashtra","4.01.2010 to 1.2.2010","13","26","1.27","5,680","9,027","4,153"],
-        ["Karnataka","2.2.2010 to 22.2.2010","11","19","1.80","5,741","3,658","3,183"],
-        ["Kerala","23.2.2010 to 11.3.2010","9","17","1.42","3,559","2,173","855"],
-        ["Total","","47","92","11.81","22,455","19,584","10,644"]
-    ]
-    pdfname = os.path.join(testdir, 'lines_in_background_1.pdf')
-    manager = Pdf(Lattice(invert=True), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables['page-1']['table-2']['data'], data)
-
-
-def test_lattice_table_rotation():
-
-    data = [
-        ["State","Nutritional Assessment  (No. of individuals)","","","","IYCF Practices  (No. of mothers: 2011-12)","Blood Pressure  (No. of adults: 2011-12)","","Fasting  Blood Sugar (No. of adults:2011-12)",""],
-        ["","1975-79","1988-90","1996-97","2011-12","","Men","Women","Men","Women"],
-        ["Kerala","5738","6633","8864","8297","245","2161","3195","1645","2391"],
-        ["Tamil Nadu","7387","10217","5813","7851","413","2134","2858","1119","1739"],
-        ["Karnataka","6453","8138","12606","8958","428","2467","2894","1628","2028"],
-        ["Andhra Pradesh","5844","9920","9545","8300","557","1899","2493","1111","1529"],
-        ["Maharashtra","5161","7796","6883","9525","467","2368","2648","1417","1599"],
-        ["Gujarat","4403","5374","4866","9645","477","2687","3021","2122","2503"],
-        ["Madhya Pradesh","*","*","*","7942","470","1965","2150","1579","1709"],
-        ["Orissa","3756","5540","12024","8473","398","2040","2624","1093","1628"],
-        ["West Bengal","*","*","*","8047","423","2058","2743","1413","2027"],
-        ["Uttar Pradesh","*","*","*","9860","581","2139","2415","1185","1366"],
-        ["Pooled","38742","53618","60601","86898","4459","21918","27041","14312","18519"]
-    ]
-    pdfname = os.path.join(testdir, 'left_rotated_table_1.pdf')
-    manager = Pdf(Lattice(), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables['page-1']['table-1']['data'], data)
-
-    pdfname = os.path.join(testdir, 'right_rotated_table_1.pdf')
-    manager = Pdf(Lattice(), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables['page-1']['table-1']['data'], data)
\ No newline at end of file
diff --git a/tests/test_stream.py b/tests/test_stream.py
deleted file mode 100644
index a947b2b..0000000
--- a/tests/test_stream.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# coding: utf8
-import os
-
-from nose.tools import assert_equal
-
-from camelot.pdf import Pdf
-from camelot.stream import Stream
-
-
-testdir = os.path.dirname(os.path.abspath(__file__))
-
-
-def test_stream_basic():
-
-    data = [
-        ["", "Table 6.", ""],
-        ["", "U.S. Production, Imports, Exports, and Net Supply of Conventional Pesticides", ""],
-        ["", "at Producer Level, 1994/95 Estimates.", ""],
-        ["", "Active Ingredient", "Sales Value"],
-        ["", "(in billions of lbs.)", "(in billions of dollars)"],
-        ["Category", "1994/95", "1994/95"],
-        ["U.S. Production", "1.3", "7.0"],
-        ["U.S. Imports", "0.2", "2.2"],
-        ["Total Supply", "1.5", "9.2"],
-        ["U.S. Exports", "0.5", "2.6"],
-        ["Net Supply/Usage", "1.0", "6.6"],
-        ["SOURCE:", "EPA  estimates  based  on  ACPA  Surveys,  Department  of  Commerce  Publications,  tabulations  and  other", ""],
-        ["sources.", "", ""],
-        ["16\xe2\x80\x9494/95 Pesticides Industry Sales And Usage", "", ""]
-    ]
-
-    pdfname = os.path.join(testdir, "tabula_test_pdfs/us-024.pdf")
-    manager = Pdf(Stream(), pdfname, pagenos=[{"start": 1, "end": 1}],
-        clean=True)
-    tables = manager.extract()
-    assert_equal(tables["page-1"]["table-1"]["data"], data)
-
-
-def test_stream_missing_value():
-
-    data = [
-        ["Bhandara - Key Indicators","","","",""],
-        ["","DLHS-4 (2012-13)","","DLHS-3 (2007-08)",""],
-        ["Indicators","TOTAL","RURAL","TOTAL","RURAL"],
-        ["Reported Prevalence of Morbidity","","","",""],
-        ["Any Injury .....................................................................................................................................","1.9","2.1","",""],
-        ["Acute Illness .................................................................................................................................","4.5","5.6","",""],
-        ["Chronic Illness ..............................................................................................................................","5.1","4.1","",""],
-        ["Reported Prevalence of Chronic Illness during last one year (%)","","","",""],
-        ["Disease of respiratory system ......................................................................................................","11.7","15.0","",""],
-        ["Disease of cardiovascular system ................................................................................................","8.9","9.3","",""],
-        ["Persons suffering from tuberculosis .............................................................................................","2.2","1.5","",""],
-        ["Anaemia Status by Haemoglobin Level14 (%)","","","",""],
-        ["Children (6-59 months) having anaemia ......................................................................................","68.5","71.9","",""],
-        ["Children (6-59 months) having severe anaemia ..........................................................................","6.7","9.4","",""],
-        ["Children (6-9 Years) having anaemia - Male ................................................................................","67.1","71.4","",""],
-        ["Children (6-9 Years) having severe anaemia - Male ....................................................................","4.4","2.4","",""],
-        ["Children (6-9 Years) having anaemia - Female ...........................................................................","52.4","48.8","",""],
-        ["Children (6-9 Years) having severe anaemia - Female ................................................................","1.2","0.0","",""],
-        ["Children (6-14 years) having  anaemia - Male .............................................................................","50.8","62.5","",""],
-        ["Children (6-14 years) having severe anaemia - Male ..................................................................","3.7","3.6","",""],
-        ["Children (6-14 years) having  anaemia - Female .........................................................................","48.3","50.0","",""],
-        ["Children (6-14 years) having severe anaemia - Female ..............................................................","4.3","6.1","",""],
-        ["Children (10-19 Years15) having anaemia - Male .........................................................................","37.9","51.2","",""],
-        ["Children (10-19 Years15) having severe anaemia - Male .............................................................","3.5","4.0","",""],
-        ["Children (10-19 Years15) having anaemia - Female .....................................................................","46.6","52.1","",""],
-        ["Children (10-19 Years15) having severe anaemia - Female .........................................................","6.4","6.5","",""],
-        ["Adolescents (15-19 years) having  anaemia ................................................................................","39.4","46.5","",""],
-        ["Adolescents (15-19 years) having severe anaemia .....................................................................","5.4","5.1","",""],
-        ["Pregnant women (15-49 aged) having anaemia ..........................................................................","48.8","51.5","",""],
-        ["Pregnant women (15-49 aged) having severe anaemia ..............................................................","7.1","8.8","",""],
-        ["Women (15-49 aged) having anaemia .........................................................................................","45.2","51.7","",""],
-        ["Women (15-49 aged) having severe anaemia .............................................................................","4.8","5.9","",""],
-        ["Persons (20 years and above) having anaemia ...........................................................................","37.8","42.1","",""],
-        ["Persons (20 years and above) having Severe anaemia ..............................................................","4.6","4.8","",""],
-        ["Blood Sugar Level (age 18 years and above) (%)","","","",""],
-        ["Blood Sugar Level >140 mg/dl (high) ...........................................................................................","12.9","11.1","",""],
-        ["Blood Sugar Level >160 mg/dl (very high) ...................................................................................","7.0","5.1","",""],
-        ["Hypertension (age 18 years and above) (%)","","","",""],
-        ["Above Normal Range (Systolic >140 mm of Hg & Diastolic >90 mm of Hg )  ..............................","23.8","22.8","",""],
-        ["Moderately High (Systolic >160 mm of Hg & Diastolic >100 mm of Hg ) .....................................","8.2","7.1","",""],
-        ["Very High (Systolic >180 mm of Hg & Diastolic >110 mm of Hg ) ...............................................","3.7","3.1","",""],
-        ["14 Any anaemia below 11g/dl, severe anaemia below 7g/dl. 15 Excluding age group 19 years","","","",""],
-        ["Chronic Illness :Any person with symptoms persisting for longer than one month is defined as suffering from chronic illness","","","",""],
-        ["4","","","",""]
-    ]
-    pdfname = os.path.join(testdir, "missing_values.pdf")
-    manager = Pdf(Stream(flag_size=False), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables["page-1"]["table-1"]["data"], data)
-
-
-def test_stream_single_table_area():
-
-    data = [
-        ["","One Withholding"],
-        ["Payroll Period","Allowance"],
-        ["Weekly","$71.15"],
-        ["Biweekly","142.31"],
-        ["Semimonthly","154.17"],
-        ["Monthly","308.33"],
-        ["Quarterly","925.00"],
-        ["Semiannually","1,850.00"],
-        ["Annually","3,700.00"],
-        ["Daily or Miscellaneous","14.23"],
-        ["(each day of the payroll period)",""]
-    ]
-    pdfname = os.path.join(testdir, "tabula_test_pdfs/us-007.pdf")
-    manager = Pdf(Stream(table_area=["320,500,573,335"]),
-                  pdfname, pagenos=[{"start": 1, "end": 1}], clean=True)
-    tables = manager.extract()
-    assert_equal(tables["page-1"]["table-1"]["data"], data)
-
-
-def test_stream_columns():
-
-    data = [
-        ["Clave","Nombre Entidad","Clave","Nombre Municipio","Clave","Nombre Localidad"],
-        ["Entidad","","Municipio","","Localidad",""],
-        ["01","Aguascalientes","001","Aguascalientes","0094","Granja Adelita"],
-        ["01","Aguascalientes","001","Aguascalientes","0096","Agua Azul"],
-        ["01","Aguascalientes","001","Aguascalientes","0100","Rancho Alegre"],
-        ["01","Aguascalientes","001","Aguascalientes","0102","Los Arbolitos [Rancho]"],
-        ["01","Aguascalientes","001","Aguascalientes","0104","Ardillas de Abajo (Las Ardillas)"],
-        ["01","Aguascalientes","001","Aguascalientes","0106","Arellano"],
-        ["01","Aguascalientes","001","Aguascalientes","0112","Bajío los Vázquez"],
-        ["01","Aguascalientes","001","Aguascalientes","0113","Bajío de Montoro"],
-        ["01","Aguascalientes","001","Aguascalientes","0114","Residencial San Nicolás [Baños la Cantera]"],
-        ["01","Aguascalientes","001","Aguascalientes","0120","Buenavista de Peñuelas"],
-        ["01","Aguascalientes","001","Aguascalientes","0121","Cabecita 3 Marías (Rancho Nuevo)"],
-        ["01","Aguascalientes","001","Aguascalientes","0125","Cañada Grande de Cotorina"],
-        ["01","Aguascalientes","001","Aguascalientes","0126","Cañada Honda [Estación]"],
-        ["01","Aguascalientes","001","Aguascalientes","0127","Los Caños"],
-        ["01","Aguascalientes","001","Aguascalientes","0128","El Cariñán"],
-        ["01","Aguascalientes","001","Aguascalientes","0129","El Carmen [Granja]"],
-        ["01","Aguascalientes","001","Aguascalientes","0135","El Cedazo (Cedazo de San Antonio)"],
-        ["01","Aguascalientes","001","Aguascalientes","0138","Centro de Arriba (El Taray)"],
-        ["01","Aguascalientes","001","Aguascalientes","0139","Cieneguilla (La Lumbrera)"],
-        ["01","Aguascalientes","001","Aguascalientes","0141","Cobos"],
-        ["01","Aguascalientes","001","Aguascalientes","0144","El Colorado (El Soyatal)"],
-        ["01","Aguascalientes","001","Aguascalientes","0146","El Conejal"],
-        ["01","Aguascalientes","001","Aguascalientes","0157","Cotorina de Abajo"],
-        ["01","Aguascalientes","001","Aguascalientes","0162","Coyotes"],
-        ["01","Aguascalientes","001","Aguascalientes","0166","La Huerta (La Cruz)"],
-        ["01","Aguascalientes","001","Aguascalientes","0170","Cuauhtémoc (Las Palomas)"],
-        ["01","Aguascalientes","001","Aguascalientes","0171","Los Cuervos (Los Ojos de Agua)"],
-        ["01","Aguascalientes","001","Aguascalientes","0172","San José [Granja]"],
-        ["01","Aguascalientes","001","Aguascalientes","0176","La Chiripa"],
-        ["01","Aguascalientes","001","Aguascalientes","0182","Dolores"],
-        ["01","Aguascalientes","001","Aguascalientes","0183","Los Dolores"],
-        ["01","Aguascalientes","001","Aguascalientes","0190","El Duraznillo"],
-        ["01","Aguascalientes","001","Aguascalientes","0191","Los Durón"],
-        ["01","Aguascalientes","001","Aguascalientes","0197","La Escondida"],
-        ["01","Aguascalientes","001","Aguascalientes","0201","Brande Vin [Bodegas]"],
-        ["01","Aguascalientes","001","Aguascalientes","0207","Valle Redondo"],
-        ["01","Aguascalientes","001","Aguascalientes","0209","La Fortuna"],
-        ["01","Aguascalientes","001","Aguascalientes","0212","Lomas del Gachupín"],
-        ["01","Aguascalientes","001","Aguascalientes","0213","El Carmen (Gallinas Güeras) [Rancho]"],
-        ["01","Aguascalientes","001","Aguascalientes","0216","La Gloria"],
-        ["01","Aguascalientes","001","Aguascalientes","0226","Hacienda Nueva"],
-    ]
-    pdfname = os.path.join(testdir, "mexican_towns.pdf")
-    manager = Pdf(Stream(columns=["67,180,230,425,475"], ytol=[10]), pdfname,
-        clean=True)
-    tables = manager.extract()
-    assert_equal(tables["page-1"]["table-1"]["data"], data)
-
-
-def test_stream_table_rotation():
-
-    data = [
-        ["","","Table 21  Current use of contraception by background characteristics—Continued","","","","","","","","","","","","","","",""],
-        ["","","","","","","Modern method","","","","","","","Traditional method","","","",""],
-        ["","","","Any","","","","","","","Other","Any","","","","Not","","Number"],
-        ["","","Any","modern","Female","Male","","","","Condom/","modern","traditional","","With-","Folk","currently","","of"],
-        ["","Background characteristic","method","method","sterilization","sterilization","Pill","IUD","Injectables","Nirodh","method","method","Rhythm","drawal","method","using","Total","women"],
-        ["","Caste/tribe","","","","","","","","","","","","","","","",""],
-        ["","Scheduled caste","74.8","55.8","42.9","0.9","9.7","0.0","0.2","2.2","0.0","19.0","11.2","7.4","0.4","25.2","100.0","1,363"],
-        ["","Scheduled tribe","59.3","39.0","26.8","0.6","6.4","0.6","1.2","3.5","0.0","20.3","10.4","5.8","4.1","40.7","100.0","256"],
-        ["","Other backward class","71.4","51.1","34.9","0.0","8.6","1.4","0.0","6.2","0.0","20.4","12.6","7.8","0.0","28.6","100.0","211"],
-        ["","Other","71.1","48.8","28.2","0.8","13.3","0.9","0.3","5.2","0.1","22.3","12.9","9.1","0.3","28.9","100.0","3,319"],
-        ["","Wealth index","","","","","","","","","","","","","","","",""],
-        ["","Lowest","64.5","48.6","34.3","0.5","10.5","0.6","0.7","2.0","0.0","15.9","9.9","4.6","1.4","35.5","100.0","1,258"],
-        ["","Second","68.5","50.4","36.2","1.1","11.4","0.5","0.1","1.1","0.0","18.1","11.2","6.7","0.2","31.5","100.0","1,317"],
-        ["","Middle","75.5","52.8","33.6","0.6","14.2","0.4","0.5","3.4","0.1","22.7","13.4","8.9","0.4","24.5","100.0","1,018"],
-        ["","Fourth","73.9","52.3","32.0","0.5","12.5","0.6","0.2","6.3","0.2","21.6","11.5","9.9","0.2","26.1","100.0","908"],
-        ["","Highest","78.3","44.4","19.5","1.0","9.7","1.4","0.0","12.7","0.0","33.8","18.2","15.6","0.0","21.7","100.0","733"],
-        ["","Number of living children","","","","","","","","","","","","","","","",""],
-        ["","No children","25.1","7.6","0.3","0.5","2.0","0.0","0.0","4.8","0.0","17.5","9.0","8.5","0.0","74.9","100.0","563"],
-        ["","1 child","66.5","32.1","3.7","0.7","20.1","0.7","0.1","6.9","0.0","34.3","18.9","15.2","0.3","33.5","100.0","1,190"],
-        ["","1 son","66.8","33.2","4.1","0.7","21.1","0.5","0.3","6.6","0.0","33.5","21.2","12.3","0.0","33.2","100.0","672"],
-        ["","No sons","66.1","30.7","3.1","0.6","18.8","0.8","0.0","7.3","0.0","35.4","15.8","19.0","0.6","33.9","100.0","517"],
-        ["","2 children","81.6","60.5","41.8","0.9","11.6","0.8","0.3","4.8","0.2","21.1","12.2","8.3","0.6","18.4","100.0","1,576"],
-        ["","1 or more sons","83.7","64.2","46.4","0.9","10.8","0.8","0.4","4.8","0.1","19.5","11.1","7.6","0.7","16.3","100.0","1,268"],
-        ["","No sons","73.2","45.5","23.2","1.0","15.1","0.9","0.0","4.8","0.5","27.7","16.8","11.0","0.0","26.8","100.0","308"],
-        ["","3 children","83.9","71.2","57.7","0.8","9.8","0.6","0.5","1.8","0.0","12.7","8.7","3.3","0.8","16.1","100.0","961"],
-        ["","1 or more sons","85.0","73.2","60.3","0.9","9.4","0.5","0.5","1.6","0.0","11.8","8.1","3.0","0.7","15.0","100.0","860"],
-        ["","No sons","74.7","53.8","35.3","0.0","13.7","1.6","0.0","3.2","0.0","20.9","13.4","6.1","1.5","25.3","100.0","101"],
-        ["","4+ children","74.3","58.1","45.1","0.6","8.7","0.6","0.7","2.4","0.0","16.1","9.9","5.4","0.8","25.7","100.0","944"],
-        ["","1 or more sons","73.9","58.2","46.0","0.7","8.3","0.7","0.7","1.9","0.0","15.7","9.4","5.5","0.8","26.1","100.0","901"],
-        ["","No sons","(82.1)","(57.3)","(25.6)","(0.0)","(17.8)","(0.0)","(0.0)","(13.9)","(0.0)","(24.8)","(21.3)","(3.5)","(0.0)","(17.9)","100.0","43"],
-        ["","Total","71.2","49.9","32.2","0.7","11.7","0.6","0.3","4.3","0.1","21.3","12.3","8.4","0.5","28.8","100.0","5,234"],
-        ["","NFHS-2 (1998-99)","66.6","47.3","32.0","1.8","9.2","1.4","na","2.9","na","na","8.7","9.8","na","33.4","100.0","4,116"],
-        ["","NFHS-1 (1992-93)","57.7","37.6","26.5","4.3","3.6","1.3","0.1","1.9","na","na","11.3","8.3","na","42.3","100.0","3,970"],
-        ["","","Note: If more than one method is used, only the most effective method is considered in this tabulation. Total includes women for whom caste/tribe was not known or is missing, who are","","","","","","","","","","","","","","",""],
-        ["","not shown separately.","","","","","","","","","","","","","","","",""],
-        ["","na = Not available","","","","","","","","","","","","","","","",""],
-        ["","","ns = Not shown; see table 2b, footnote 1","","","","","","","","","","","","","","",""],
-        ["","( ) Based on 25-49 unweighted cases.","","","","","","","","","","","","","","","",""],
-        ["","","","","","","","","54","","","","","","","","",""]
-    ]
-    pdfname = os.path.join(testdir, "left_rotated_table_2.pdf")
-    manager = Pdf(Stream(flag_size=False), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables["page-1"]["table-1"]["data"], data)
-
-    pdfname = os.path.join(testdir, "right_rotated_table_2.pdf")
-    manager = Pdf(Stream(flag_size=False), pdfname, clean=True)
-    tables = manager.extract()
-    assert_equal(tables["page-1"]["table-1"]["data"], data)
\ No newline at end of file
diff --git a/tests/twotables_1.pdf b/tests/twotables_1.pdf
deleted file mode 100644
index cbbeeda..0000000
Binary files a/tests/twotables_1.pdf and /dev/null differ
diff --git a/tests/twotables_2.pdf b/tests/twotables_2.pdf
deleted file mode 100644
index 5249887..0000000
Binary files a/tests/twotables_2.pdf and /dev/null differ
diff --git a/tools/camelot b/tools/camelot
deleted file mode 100755
index 0918fe1..0000000
--- a/tools/camelot
+++ /dev/null
@@ -1,694 +0,0 @@
-#!/usr/bin/env python2
-from __future__ import print_function
-import os
-import csv
-import sys
-import glob
-import time
-import zipfile
-import warnings
-import cStringIO
-
-import numpy as np
-from docopt import docopt
-from collections import Counter
-import matplotlib.pyplot as plt
-from PyPDF2 import PdfFileReader
-
-from camelot.pdf import Pdf
-from camelot.lattice import Lattice
-from camelot.stream import Stream
-from camelot.ocr import OCRLattice, OCRStream
-from camelot import utils
-
-
-doc = """
-Camelot: PDF parsing made simpler!
-
-usage:
- camelot [options] <method> [<args>...]
-
-options:
- -h, --help                Show this screen.
- -v, --version             Show version.
- -p, --pages <pageno>      Comma-separated list of page numbers.
-                           Example: -p 1,3-6,10  [default: 1]
- -P, --parallel            Parallelize the parsing process.
- -f, --format <format>     Output format. (csv,tsv,zip,html,json,xlsx) [default: csv]
- -l, --log <logfile>       Log to file.
- -o, --output <directory>  Output directory.
- -M, --cmargin <cmargin>   Char margin. Chars closer than cmargin are
-                           grouped together to form a word. [default: 1.0]
- -L, --lmargin <lmargin>   Line margin. Lines closer than lmargin are
-                           grouped together to form a textbox. [default: 0.5]
- -W, --wmargin <wmargin>   Word margin. Insert blank spaces between chars
-                           if distance between words is greater than word
-                           margin. [default: 0.1]
- -J, --split_text          Split text lines if they span across multiple cells.
- -K, --flag_size           Flag substring if its size differs from the whole string.
-                           Useful for super and subscripts.
- -X, --print-stats         List stats on the parsing process.
- -Y, --save-stats          Save stats to a file.
- -Z, --plot <dist>         Plot distributions. (page,all,rc)
-
-camelot methods:
- lattice  Looks for lines between data.
- stream   Looks for spaces between data.
- ocrl     Lattice, but for images.
- ocrs     Stream, but for images.
-
-See 'camelot <method> -h' for more information on a specific method.
-"""
-
-lattice_doc = """
-Lattice method looks for lines between text to form a table.
-
-usage:
- camelot lattice [-t <tarea>...] [-F <fill>...] [-m <mtol>...]
- [-j <jtol>...] [options] [--] <file>
-
-options:
- -t, --tarea <tarea>            Specific table areas to analyze.
- -F, --fill <fill>              Fill data in horizontal and/or vertical spanning
-                                cells. Example: -F h, -F v, -F hv
- -m, --mtol <mtol>              Tolerance to account for when merging lines
-                                which are very close. [default: 2]
- -j, --jtol <jtol>              Tolerance to account for when matching line endings
-                                with intersections. [default: 2]
- -b, --blocksize <blocksize>    See adaptive threshold doc. [default: 15]
- -C, --constant <constant>      See adaptive threshold doc. [default: -2]
- -s, --scale <scale>            Scaling factor. Large scaling factor leads to
-                                smaller lines being detected. [default: 15]
- -I, --iterations <iterations>  Number of iterations for dilation. [default: 0]
- -i, --invert                   Invert pdf image to make sure that lines are
-                                in foreground.
- -T, --shift_text <shift_text>  Specify where the text in a spanning cell
-                                should flow, order-sensitive. [default: lt]
- -d, --debug <debug>            Debug by visualizing pdf geometry.
-                                (contour,line,joint,table) Example: -d table
-"""
-
-stream_doc = """
-Stream method looks for whitespaces between text to form a table.
-
-usage:
- camelot stream [-t <tarea>...] [-c <columns>...] [-m <mtol>...]
- [-y <ytol>...] [options] [--] <file>
-
-options:
- -t, --tarea <tarea>      Specific table areas to analyze.
- -c, --columns <columns>  Comma-separated list of column x-coordinates.
-                          Example: -c 10.1,20.2,30.3
- -m, --mtol <mtol>        Tolerance to account for when merging columns
-                          together. [default: 0]
- -y, --ytol <ytol>        Tolerance to account for when grouping rows
-                          together. [default: 2]
- -d, --debug              Debug by visualizing textboxes.
-"""
-
-
-ocrl_doc = """
-Lattice, but for images.
-
-usage:
- camelot ocrl [-t <tarea>...] [-m <mtol>...] [options] [--] <file>
-
-options:
- -t, --tarea <tarea>            Specific table areas to analyze.
- -m, --mtol <mtol>              Tolerance to account for when merging lines
-                                which are very close. [default: 2]
- -b, --blocksize <blocksize>    See adaptive threshold doc. [default: 15]
- -C, --constant <constant>      See adaptive threshold doc. [default: -2]
- -D, --dpi <dpi>                Dots per inch, specify image quality to be used for OCR.
-                                [default: 300]
- -g, --layout <layout>          Tesseract page segmentation mode. [default: 7]
- -l, --lang <lang>              Specify language to be used for OCR. [default: eng]
- -s, --scale <scale>            Scaling factor. Large scaling factor leads to
-                                smaller lines being detected. [default: 15]
- -I, --iterations <iterations>  Number of iterations for dilation. [default: 0]
- -d, --debug <debug>            Debug by visualizing pdf geometry.
-                                (contour,line,joint,table) Example: -d table
-"""
-
-ocrs_doc = """
-Stream, but for images.
-
-usage:
- camelot ocrs [-t <tarea>...] [-c <columns>...] [options] [--] <file>
-
-options:
- -t, --tarea <tarea>                    Specific table areas to analyze.
- -c, --columns <columns>                Comma-separated list of column x-coordinates.
-                                        Example: -c 10.1,20.2,30.3
- -b, --blocksize <blocksize>            See adaptive threshold doc. [default: 15]
- -C, --constant <constant>              See adaptive threshold doc. [default: -2]
- -D, --dpi <dpi>                        Dots per inch, specify image quality to be used for OCR.
-                                        [default: 300]
- -g, --layout <layout>                  Tesseract page segmentation mode. [default: 7]
- -l, --lang <lang>                      Specify language to be used for OCR. [default: eng]
- -G, --line-scale <line_scale>          Line scaling factor. [default: 15]
- -S, --char-scale <char_scale>          Char scaling factor. [default: 200]
- -d, --debug                            Debug by visualizing image.
-"""
-
-
-def plot_table_barchart(r, c, p, pno, tno):
-    row_idx = [i + 1 for i, row in enumerate(r)]
-    col_idx = [i + 1 for i, col in enumerate(c)]
-    r_index = np.arange(len(r))
-    c_index = np.arange(len(c))
-    width = 0.7
-
-    plt.figure(figsize=(8, 6))
-    plt.subplot(2, 1, 1)
-    plt.title('Percentage of empty cells in table: {0:.2f}'.format(p))
-    plt.xlabel('row index')
-    plt.ylabel('number of non-empty cells in row')
-    plt.bar(r_index, r)
-    plt.xticks(r_index + width * 0.5, row_idx)
-    plt.ylim(0, len(c))
-
-    plt.subplot(2, 1, 2)
-    plt.xlabel('column index')
-    plt.ylabel('number of non-empty cells in column')
-    plt.bar(c_index, c)
-    plt.xticks(c_index + width * 0.5, col_idx)
-    plt.ylim(0, len(r))
-    plt.savefig(''.join([pno, '_', tno, '.png']), dpi=300)
-
-
-def plot_all_barchart(data, output):
-    r_empty_cells = []
-    for page_number in data.keys():
-        page = data[page_number]
-        for table_number in page.keys():
-            table = page[table_number]
-            r_empty_cells.extend([r / float(table['ncols']) for r in table['r_nempty_cells']])
-    c = Counter(r_empty_cells)
-    if 0.0 not in c:
-        c.update({0.0: 0})
-    if 1.0 not in c:
-        c.update({1.0: 0})
-
-    plt.figure(figsize=(8, 6))
-    plt.xlabel('percentage of non-empty cells in a row')
-    plt.ylabel('percentage of rows processed')
-    row_p = [count / float(sum(c.values())) for count in c.values()]
-    plt.bar(c.keys(), row_p, align='center', width=0.05)
-    plt.ylim(0, 1.0)
-    plt.savefig(''.join([output, '_all.png']), dpi=300)
-
-
-def plot_rc_piechart(data, output):
-    from matplotlib import cm
-
-    tables = 0
-    rows, cols = [], []
-    for page_number in data.keys():
-        page = data[page_number]
-        for table_number in page.keys():
-            table = page[table_number]
-            tables += 1
-            rows.append(table['nrows'])
-            cols.append(table['ncols'])
-
-    r = Counter(rows)
-    c = Counter(cols)
-
-    plt.figure(figsize=(8, 6))
-    cs1 = cm.Set1(np.arange(len(r)) / float(len(r)))
-    ax1 = plt.subplot(211, aspect='equal')
-    ax1.pie(r.values(), colors=cs1, labels=r.keys(), startangle=90)
-    ax1.set_title('row distribution across tables')
-
-    cs2 = cm.Set1(np.arange(len(c)) / float(len(c)))
-    ax2 = plt.subplot(212, aspect='equal')
-    ax2.pie(c.values(), colors=cs2, labels=c.keys(), startangle=90)
-    ax2.set_title('column distribution across tables')
-    plt.savefig(''.join([output, '_rc.png']), dpi=300)
-
-
-def print_stats(data, p_time):
-    from operator import itemgetter
-    from itertools import groupby
-
-    scores = []
-    continuous_tables = []
-    total_tables = 0
-    for page_number in data.keys():
-        page = data[page_number]
-        total_tables += len(page.keys())
-        for table_number in page.keys():
-            table = page[table_number]
-            continuous_tables.append((page_number, table_number, table['ncols']))
-            scores.append(table['score'])
-    avg_score = np.mean(scores)
-
-    ct_pages = []
-    header_string = ""
-    if len(continuous_tables) > 1:
-        tables = sorted(continuous_tables, key=lambda x: (int(x[0][5:]), int(x[1][6:])))
-        for k, g in groupby(tables, key=itemgetter(2)):
-            g = list(g)
-            tables_same_ncols = set([int(t[0][5:]) for t in g])
-            tables_same_ncols = sorted(list(tables_same_ncols))
-            for K, G in groupby(enumerate(tables_same_ncols), key=lambda (i, x): i - x):
-                G = list(G)
-                ct_pages.append((str(G[0][1]), str(G[-1][1])))
-
-        result_headers = []
-        for ct in ct_pages:
-            header_idx = {}
-            possible_headers = []
-            ncols = 0
-            for page_number in range(int(ct[0]), int(ct[1]) + 1):
-                page = data['page-{0}'.format(page_number)]
-                for table_number in page.keys():
-                    table = page[table_number]
-                    ncols = table['ncols']
-                    for i, row in enumerate(table['data']):
-                        try:
-                            header_idx[tuple(row)].append(i)
-                        except KeyError:
-                            header_idx[tuple(row)] = [i]
-            possible_headers = sorted(header_idx, key=lambda k: len(header_idx[k]), reverse=True)[:10]
-            possible_headers = filter(lambda z: len(z) == ncols,
-                [filter(lambda x: x != '', p_h) for p_h in possible_headers])
-            modes = []
-            for p_h in possible_headers:
-                try:
-                    modes.append((p_h, max(set(header_idx[p_h]), key=header_idx[p_h].count)))
-                except KeyError:
-                    pass
-            header = modes[modes.index(min(modes, key=lambda x: x[1]))][0]
-            result_headers.append(header)
-
-        header_string = "Multi-page table headers*:\n"
-        header_string = ''.join([header_string, '\n'.join(['pages {0} -> {1}{2}{3}'.format(
-            '-'.join([cr[0][0], cr[0][1]]), '"', '","'.join(cr[1]), '"') for cr in zip(
-            ct_pages, result_headers)])])
-
-    avg_time = "Time taken per page: {0:.2f} seconds\n".format(
-        p_time / float(len(data))) if len(data) not in [0, 1] else ""
-    equal_ncols = "\nMulti-page tables on*: {0}\n".format(
-        ', '.join(['-'.join(ct) for ct in ct_pages])) if len(data) not in [0, 1] else ""
-    stats = [len(data), p_time, avg_time, total_tables, avg_score, equal_ncols]
-    stat_string = ("Pages processed: {0}\nTime taken: {1:.2f} seconds\n"
-        "{2}Tables found: {3}\nAverage score: {4:.2f}{5}".format(*stats))
-
-    print(''.join([stat_string, header_string]))
-
-
-def convert_to_html(table):
-    html = ''
-    html = ''.join([html, '<table border="1">\n'])
-    for row in table:
-        html = ''.join([html, ' <tr>\n'])
-        for data in row:
-            html = ''.join([html, '  <td>', data, '</td>\n'])
-        html = ''.join([html, ' </tr>\n'])
-    html = ''.join([html, '</table>\n'])
-    return html
-
-
-def write_to_disk(data, f='csv', output=None, filename=None):
-    # raise something if filename and/or output are None
-    fname = os.path.basename(filename)
-    froot, __ = os.path.splitext(fname)
-    if f in ['csv', 'tsv']:
-        delimiter = ',' if f == 'csv' else '\t'
-        for page_number in sorted(data.keys()):
-            if data[page_number] is not None:
-                for table_number in sorted(data[page_number].keys()):
-                    dsvname = '{0}.{1}'.format(''.join([page_number, '_', table_number]), f)
-                    with open(os.path.join(output, dsvname), 'w') as outfile:
-                        writer = csv.writer(
-                            outfile, delimiter=delimiter, quoting=csv.QUOTE_ALL)
-                        for row in data[page_number][table_number]['data']:
-                            writer.writerow(row)
-    elif f == 'zip':
-        csv_zip = os.path.join(output, '{0}.zip'.format(froot))
-        with zipfile.ZipFile(csv_zip, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) \
-                as zfile:
-            for page_number in sorted(data.keys()):
-                if data[page_number] is not None:
-                    for table_number in sorted(data[page_number].keys()):
-                        csvname = '{0}.{1}'.format(''.join([page_number, '_', table_number]), 'csv')
-                        outfile = cStringIO.StringIO()
-                        writer = csv.writer(
-                            outfile, delimiter=',', quoting=csv.QUOTE_ALL)
-                        for row in data[page_number][table_number]['data']:
-                            writer.writerow(row)
-                        zfile.writestr(csvname, outfile.getvalue())
-                        outfile.close()
-    elif f == 'html':
-        htmlname = '{0}.html'.format(froot)
-        for page_number in sorted(data.keys()):
-            for table_number in sorted(data[page_number].keys()):
-                with open(os.path.join(output, htmlname), 'a') as htmlfile:
-                    htmlfile.write(convert_to_html(data[page_number][table_number]['data']))
-    elif f == 'json':
-        import json
-        with open(os.path.join(output, '{0}.json'.format(froot)), 'w') \
-                as jsonfile:
-            json.dump(data, jsonfile)
-    elif f == 'xlsx':
-        try:
-            from pyexcel_xlsx import save_data
-            from collections import OrderedDict
-            xlsx_data = OrderedDict()
-            for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                for table_number in sorted(data[page_number].keys(), key=lambda x: int(x[6:])):
-                    sheet_name = ''.join([page_number, '_', table_number])
-                    xlsx_data.update({sheet_name:
-                                      [row for row in data[page_number][table_number]['data']]})
-            save_data(os.path.join(output, '{0}.xlsx'.format(froot)), xlsx_data)
-        except ImportError:
-            print("link to install docs")
-
-
-if __name__ == '__main__':
-    start_time = time.time()
-
-    args = docopt(doc, version='0.1', options_first=True)
-    argv = [args['<method>']] + args['<args>']
-    if args['<method>'] == 'lattice':
-        args.update(docopt(lattice_doc, argv=argv))
-    elif args['<method>'] == 'stream':
-        args.update(docopt(stream_doc, argv=argv))
-    elif args['<method>'] == 'ocrl':
-        args.update(docopt(ocrl_doc, argv=argv))
-    elif args['<method>'] == 'ocrs':
-        args.update(docopt(ocrs_doc, argv=argv))
-
-    filename = args['<file>']
-    filedir = os.path.dirname(args['<file>'])
-    logname, __ = os.path.splitext(filename)
-    logname = ''.join([logname, '.log'])
-    scorename, __ = os.path.splitext(filename)
-    scorename = ''.join([scorename, '_info.csv'])
-    pngname, __ = os.path.splitext(filename)
-
-    FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
-    if args['--log'] is not None:
-        logger = utils.setup_logging(args['--log'])
-    else:
-        logger = utils.setup_logging(os.path.join(os.getcwd(), 'camelot.log'))
-
-    p = []
-    if args['--pages'] == '1':
-        p.append({'start': 1, 'end': 1})
-    else:
-        infile = PdfFileReader(open(filename, 'rb'), strict=False)
-        if args['--pages'] == 'all':
-            p.append({'start': 1, 'end': infile.getNumPages()})
-        else:
-            for r in args['--pages'].split(','):
-                if '-' in r:
-                    a, b = r.split('-')
-                    if b == 'end':
-                        b = infile.getNumPages()
-                    p.append({'start': int(a), 'end': int(b)})
-                else:
-                    p.append({'start': int(r), 'end': int(r)})
-
-    logger.info('Applying {0} method on {1}'.format(args['<method>'],
-        os.path.basename(filename)))
-    margins = (float(args['--cmargin']), float(args['--lmargin']),
-        float(args['--wmargin']))
-    if args['<method>'] == 'lattice':
-        try:
-            kwargs = {
-                'table_area': args['--tarea'] if args['--tarea'] else None,
-                'fill': args['--fill'] if args['--fill'] else None,
-                'mtol': [int(m) for m in args['--mtol']],
-                'jtol': [int(j) for j in args['--jtol']],
-                'blocksize': int(args['--blocksize']),
-                'threshold_constant': float(args['--constant']),
-                'scale': int(args['--scale']),
-                'iterations': int(args['--iterations']),
-                'invert': args['--invert'],
-                'margins': margins,
-                'split_text': args['--split_text'],
-                'flag_size': args['--flag_size'],
-                'shift_text': list(args['--shift_text']) if args['--shift_text'] else ['l', 't'],
-                'debug': args['--debug']
-            }
-            manager = Pdf(Lattice(**kwargs), filename, pagenos=p, clean=True,
-                          parallel=args['--parallel'])
-            data = manager.extract()
-
-            processing_time = time.time() - start_time
-            logger.info("Finished processing in " + str(processing_time) + " seconds")
-
-            if args['--plot']:
-                if args['--output']:
-                    pngname = os.path.join(args['--output'], os.path.basename(pngname))
-                plot_type = args['--plot'].split(',')
-                if 'page' in plot_type:
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            plot_table_barchart(table['r_nempty_cells'],
-                                table['c_nempty_cells'],
-                                table['empty_p'],
-                                page_number,
-                                table_number)
-
-                if 'all' in plot_type:
-                    plot_all_barchart(data, pngname)
-
-                if 'rc' in plot_type:
-                    plot_rc_piechart(data, pngname)
-
-            if args['--print-stats']:
-                print_stats(data, processing_time)
-
-            if args['--save-stats']:
-                if args['--output']:
-                    scorename = os.path.join(args['--output'], os.path.basename(scorename))
-                with open(scorename, 'w') as score_file:
-                    score_file.write('table,nrows,ncols,empty_p,line_p,text_p,score\n')
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            score_file.write('{0},{1},{2},{3},{4},{5},{6}\n'.format(
-                                ''.join([page_number, '_', table_number]),
-                                table['nrows'],
-                                table['ncols'],
-                                table['empty_p'],
-                                table['line_p'],
-                                table['text_p'],
-                                table['score']))
-            if args['--debug']:
-                manager.debug_plot()
-        except Exception as e:
-            logger.exception(e.message, exc_info=True)
-            sys.exit()
-    elif args['<method>'] == 'stream':
-        try:
-            kwargs = {
-                'table_area': args['--tarea'] if args['--tarea'] else None,
-                'columns': args['--columns'] if args['--columns'] else None,
-                'ytol': [int(y) for y in args['--ytol']],
-                'mtol': [int(m) for m in args['--mtol']],
-                'margins': margins,
-                'split_text': args['--split_text'],
-                'flag_size': args['--flag_size'],
-                'debug': args['--debug']
-            }
-            manager = Pdf(Stream(**kwargs), filename, pagenos=p, clean=True,
-                          parallel=args['--parallel'])
-            data = manager.extract()
-
-            processing_time = time.time() - start_time
-            logger.info("Finished processing in " + str(processing_time) + " seconds")
-
-            if args['--plot']:
-                if args['--output']:
-                    pngname = os.path.join(args['--output'], os.path.basename(pngname))
-                plot_type = args['--plot'].split(',')
-                if 'page' in plot_type:
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            plot_table_barchart(table['r_nempty_cells'],
-                                table['c_nempty_cells'],
-                                table['empty_p'],
-                                page_number,
-                                table_number)
-
-                if 'all' in plot_type:
-                    plot_all_barchart(data, pngname)
-
-                if 'rc' in plot_type:
-                    plot_rc_piechart(data, pngname)
-
-            if args['--print-stats']:
-                print_stats(data, processing_time)
-
-            if args['--save-stats']:
-                if args['--output']:
-                    scorename = os.path.join(args['--output'], os.path.basename(scorename))
-                with open(scorename, 'w') as score_file:
-                    score_file.write('table,nrows,ncols,empty_p,,score\n')
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            score_file.write('{0},{1},{2},{3},{4}\n'.format(
-                                ''.join([page_number, '_', table_number]),
-                                table['nrows'],
-                                table['ncols'],
-                                table['empty_p'],
-                                table['score']))
-
-            if args['--debug']:
-                manager.debug_plot()
-        except Exception as e:
-            logger.exception(e.message, exc_info=True)
-            sys.exit()
-    elif args['<method>'] == 'ocrl':
-        try:
-            kwargs = {
-                'table_area': args['--tarea'] if args['--tarea'] else None,
-                'mtol': [int(m) for m in args['--mtol']],
-                'blocksize': int(args['--blocksize']),
-                'threshold_constant': float(args['--constant']),
-                'dpi': int(args['--dpi']),
-                'layout': int(args['--layout']),
-                'lang': args['--lang'],
-                'scale': int(args['--scale']),
-                'iterations': int(args['--iterations']),
-                'debug': args['--debug']
-            }
-            manager = Pdf(OCRLattice(**kwargs), filename, pagenos=p, clean=True,
-                          parallel=args['--parallel'])
-            data = manager.extract()
-
-            processing_time = time.time() - start_time
-            logger.info("Finished processing in " + str(processing_time) + " seconds")
-
-            if args['--plot']:
-                if args['--output']:
-                    pngname = os.path.join(args['--output'], os.path.basename(pngname))
-                plot_type = args['--plot'].split(',')
-                if 'page' in plot_type:
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            plot_table_barchart(table['r_nempty_cells'],
-                                table['c_nempty_cells'],
-                                table['empty_p'],
-                                page_number,
-                                table_number)
-
-                if 'all' in plot_type:
-                    plot_all_barchart(data, pngname)
-
-                if 'rc' in plot_type:
-                    plot_rc_piechart(data, pngname)
-
-            if args['--print-stats']:
-                print_stats(data, processing_time)
-
-            if args['--save-stats']:
-                if args['--output']:
-                    scorename = os.path.join(args['--output'], os.path.basename(scorename))
-                with open(scorename, 'w') as score_file:
-                    score_file.write('table,nrows,ncols,empty_p,line_p,text_p,score\n')
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            score_file.write('{0},{1},{2},{3},{4},{5},{6}\n'.format(
-                                ''.join([page_number, '_', table_number]),
-                                table['nrows'],
-                                table['ncols'],
-                                table['empty_p'],
-                                table['line_p'],
-                                table['text_p'],
-                                table['score']))
-            if args['--debug']:
-                manager.debug_plot()
-        except Exception as e:
-            logger.exception(e.message, exc_info=True)
-            sys.exit()
-    elif args['<method>'] == 'ocrs':
-        try:
-            kwargs = {
-                'table_area': args['--tarea'] if args['--tarea'] else None,
-                'columns': args['--columns'] if args['--columns'] else None,
-                'blocksize': int(args['--blocksize']),
-                'threshold_constant': float(args['--constant']),
-                'dpi': int(args['--dpi']),
-                'layout': int(args['--layout']),
-                'lang': args['--lang'],
-                'line_scale': int(args['--line-scale']),
-                'char_scale': int(args['--char-scale']),
-                'debug': args['--debug']
-            }
-            manager = Pdf(OCRStream(**kwargs), filename, pagenos=p, clean=True,
-                          parallel=args['--parallel'])
-            data = manager.extract()
-
-            processing_time = time.time() - start_time
-            logger.info("Finished processing in " + str(processing_time) + " seconds")
-
-            if args['--plot']:
-                if args['--output']:
-                    pngname = os.path.join(args['--output'], os.path.basename(pngname))
-                plot_type = args['--plot'].split(',')
-                if 'page' in plot_type:
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            plot_table_barchart(table['r_nempty_cells'],
-                                table['c_nempty_cells'],
-                                table['empty_p'],
-                                page_number,
-                                table_number)
-
-                if 'all' in plot_type:
-                    plot_all_barchart(data, pngname)
-
-                if 'rc' in plot_type:
-                    plot_rc_piechart(data, pngname)
-
-            if args['--print-stats']:
-                print_stats(data, processing_time)
-
-            if args['--save-stats']:
-                if args['--output']:
-                    scorename = os.path.join(args['--output'], os.path.basename(scorename))
-                with open(scorename, 'w') as score_file:
-                    score_file.write('table,nrows,ncols,empty_p,line_p,text_p,score\n')
-                    for page_number in sorted(data.keys(), key=lambda x: int(x[5:])):
-                        page = data[page_number]
-                        for table_number in sorted(page.keys(), key=lambda x: int(x[6:])):
-                            table = page[table_number]
-                            score_file.write('{0},{1},{2},{3},{4},{5},{6}\n'.format(
-                                ''.join([page_number, '_', table_number]),
-                                table['nrows'],
-                                table['ncols'],
-                                table['empty_p'],
-                                table['line_p'],
-                                table['text_p'],
-                                table['score']))
-            if args['--debug']:
-                manager.debug_plot()
-        except Exception as e:
-            logger.exception(e.message, exc_info=True)
-            sys.exit()
-
-    if args.get('--debug') is not None and args['--debug']:
-        print("See 'camelot <method> -h' for various parameters you can tweak.")
-    else:
-        output = filedir if args['--output'] is None else args['--output']
-        write_to_disk(data, f=args['--format'],
-                      output=output, filename=filename)