diff --git a/camelot/core.py b/camelot/core.py
index 5fff3c6..1ce71ab 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -10,6 +10,15 @@ from operator import itemgetter
 import numpy as np
 import pandas as pd
 
+from cv2 import cv2
+
+from .utils import (
+    build_file_path_in_temp_dir,
+    compute_accuracy,
+    compute_whitespace,
+    export_pdf_as_png
+)
+
 
 # minimum number of vertical textline intersections for a textedge
 # to be considered valid
@@ -159,7 +168,10 @@ class TextEdges(object):
         # get vertical textedges that intersect maximum number of
         # times with horizontal textlines
         relevant_align = max(intersections_sum.items(), key=itemgetter(1))[0]
-        return self._textedges[relevant_align]
+        return list(filter(
+            lambda te: te.is_valid,
+            self._textedges[relevant_align])
+        )
 
     def get_table_areas(self, textlines, relevant_textedges):
         """Returns a dict of interesting table areas on the PDF page
@@ -179,7 +191,6 @@ class TextEdges(object):
 
         table_areas = {}
         for te in relevant_textedges:
-            if te.is_valid:
                 if not table_areas:
                     table_areas[(te.x, te.y0, te.x, te.y1)] = None
                 else:
@@ -225,7 +236,8 @@ class TextEdges(object):
                     max(found[3], tl.y1),
                 )
                 table_areas[updated_area] = None
-        average_textline_height = sum_textline_height / float(len(textlines))
+        average_textline_height = sum_textline_height / \
+            float(len(textlines))
 
         # add some padding to table areas
         table_areas_padded = {}
@@ -339,6 +351,8 @@ class Table(object):
         Accuracy with which text was assigned to the cell.
     whitespace : float
         Percentage of whitespace in the table.
+    filename : str
+        Path of the original PDF
     order : int
         Table number on PDF page.
     page : int
@@ -356,8 +370,15 @@ class Table(object):
         self.shape = (0, 0)
         self.accuracy = 0
         self.whitespace = 0
+        self.filename = None
         self.order = None
         self.page = None
+        self.flavor = None      # Flavor of the parser that generated the table
+        self.pdf_size = None    # Dimensions of the original PDF page
+        self.debug_info = None  # Field holding debug data
+
+        self._image = None
+        self._image_path = None  # Temporary file to hold an image of the pdf
 
     def __repr__(self):
         return "<{} shape={}>".format(self.__class__.__name__, self.shape)
@@ -392,6 +413,32 @@ class Table(object):
         }
         return report
 
+    def record_metadata(self, parser):
+        """Record data about the origin of the table
+        """
+        self.flavor = parser.id
+        self.filename = parser.filename
+        self.debug_info = parser.debug_info
+        data = self.data
+        self.df = pd.DataFrame(data)
+        self.shape = self.df.shape
+
+        self.whitespace = compute_whitespace(data)
+        self.pdf_size = (parser.pdf_width, parser.pdf_height)
+
+    def get_pdf_image(self):
+        """Compute pdf image and cache it
+        """
+        if self._image is None:
+            if self._image_path is None:
+                self._image_path = build_file_path_in_temp_dir(
+                    os.path.basename(self.filename),
+                    ".png"
+                )
+                export_pdf_as_png(self.filename, self._image_path)
+            self._image = cv2.imread(self._image_path)
+        return self._image
+
     def set_all_edges(self):
         """Sets all table edges to True.
         """
diff --git a/camelot/handlers.py b/camelot/handlers.py
index a689ee5..7a9f2ff 100644
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@@ -8,7 +8,7 @@ from PyPDF2 import PdfFileReader, PdfFileWriter
 from .core import TableList
 from .parsers import Stream, Lattice
 from .utils import (
-    TemporaryDirectory,
+    build_file_path_in_temp_dir,
     get_page_layout,
     get_text_objects,
     get_rotation,
@@ -16,6 +16,11 @@ from .utils import (
     download_url,
 )
 
+PARSERS = {
+    "lattice": Lattice,
+    "stream": Stream
+}
+
 
 class PDFHandler(object):
     """Handles all operations like temp directory creation, splitting
@@ -89,31 +94,47 @@ class PDFHandler(object):
             P.extend(range(p["start"], p["end"] + 1))
         return sorted(set(P))
 
-    def _save_page(self, filepath, page, temp):
-        """Saves specified page from PDF into a temporary directory.
+    def _read_pdf_page(self, page=1, layout_kwargs=None):
+        """Saves specified page from PDF into a temporary directory. Removes
+        password protection and normalizes rotation.
 
         Parameters
         ----------
-        filepath : str
-            Filepath or URL of the PDF file.
         page : int
             Page number.
-        temp : str
-            Tmp directory.
+        layout_kwargs : dict, optional (default: {})
+            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.  # noqa
+
+
+        Returns
+        -------
+        layout : object
+
+        dimensions : tuple
+            The dimensions of the pdf page
+
+        filepath : str
+            The path of the single page PDF - either the original, or a
+            normalized version.
 
         """
-        with open(filepath, "rb") as fileobj:
+        layout_kwargs = layout_kwargs or {}
+        with open(self.filepath, "rb") as fileobj:
+            # Normalize the pdf file, but skip if it's not encrypted or has
+            # only one page.
             infile = PdfFileReader(fileobj, strict=False)
             if infile.isEncrypted:
                 infile.decrypt(self.password)
-            fpath = os.path.join(temp, "page-{0}.pdf".format(page))
+            fpath = build_file_path_in_temp_dir(
+                "page-{page}.pdf".format(page=page))
             froot, fext = os.path.splitext(fpath)
             p = infile.getPage(page - 1)
             outfile = PdfFileWriter()
             outfile.addPage(p)
             with open(fpath, "wb") as f:
                 outfile.write(f)
-            layout, __ = get_page_layout(fpath)
+            layout, dimensions = get_page_layout(
+                fpath, **layout_kwargs)
             # fix rotated PDF
             chars = get_text_objects(layout, ltype="char")
             horizontal_text = get_text_objects(layout, ltype="horizontal_text")
@@ -121,12 +142,7 @@ class PDFHandler(object):
             rotation = get_rotation(chars, horizontal_text, vertical_text)
             if rotation != "":
                 fpath_new = "".join(
-                    [
-                        froot.replace("page", "p"),
-                        "_rotated",
-                        fext
-                    ]
-                )
+                    [froot.replace("page", "p"), "_rotated", fext])
                 os.rename(fpath, fpath_new)
                 infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
                 if infile.isEncrypted:
@@ -140,10 +156,13 @@ class PDFHandler(object):
                 outfile.addPage(p)
                 with open(fpath, "wb") as f:
                     outfile.write(f)
+                layout, dimensions = get_page_layout(
+                    fpath, **layout_kwargs)
+        return layout, dimensions, fpath
 
     def parse(
-        self, flavor="lattice", suppress_stdout=False, layout_kwargs=None,
-        **kwargs
+        self, flavor="lattice", suppress_stdout=False,
+        layout_kwargs=None, **kwargs
     ):
         """Extracts tables by calling parser.get_tables on all single
         page PDFs.
@@ -168,19 +187,22 @@ class PDFHandler(object):
         """
         layout_kwargs = layout_kwargs or {}
         tables = []
-        with TemporaryDirectory() as tempdir:
-            for p in self.pages:
-                self._save_page(self.filepath, p, tempdir)
-            pages = [
-                os.path.join(tempdir, "page-{0}.pdf".format(p))
-                for p in self.pages
-            ]
-            parser = Lattice(**kwargs) \
-                if flavor == "lattice" else Stream(**kwargs)
-            for p in pages:
-                t = parser.extract_tables(
-                    p, suppress_stdout=suppress_stdout,
-                    layout_kwargs=layout_kwargs
-                )
-                tables.extend(t)
+
+        parser_obj = PARSERS[flavor]
+        parser = parser_obj(**kwargs)
+
+        # Read the layouts/dimensions of each of the pages we need to
+        # parse. This might require creating a temporary .pdf.
+        for page_idx in self.pages:
+            layout, dimensions, source_file = self._read_pdf_page(
+                page_idx,
+                layout_kwargs=layout_kwargs
+            )
+            parser._generate_layout(source_file, layout, dimensions,
+                                page_idx, layout_kwargs)
+            t = parser.extract_tables(
+                source_file,
+                suppress_stdout=suppress_stdout
+            )
+            tables.extend(t)
         return TableList(sorted(tables))
diff --git a/camelot/parsers/base.py b/camelot/parsers/base.py
index 5713625..bd41fc3 100644
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@@ -2,20 +2,28 @@
 
 import os
 
-from ..utils import get_page_layout, get_text_objects
+from ..utils import (
+    get_text_objects
+)
+from ..core import Table
 
 
 class BaseParser(object):
     """Defines a base parser.
     """
+    def __init__(self, parser_id):
+        self.id = parser_id
 
-    def _generate_layout(self, filename, layout_kwargs):
+        # For plotting details of parsing algorithms
+        self.debug_info = {}
+
+    def _generate_layout(self, filename, layout, dimensions,
+                         page_idx, layout_kwargs):
         self.filename = filename
         self.layout_kwargs = layout_kwargs
-        self.layout, self.dimensions = get_page_layout(
-            filename,
-            **layout_kwargs
-        )
+        self.layout = layout
+        self.dimensions = dimensions
+        self.page = page_idx
         self.images = get_text_objects(self.layout, ltype="image")
         self.horizontal_text = get_text_objects(
             self.layout,
@@ -27,3 +35,25 @@ class BaseParser(object):
         )
         self.pdf_width, self.pdf_height = self.dimensions
         self.rootname, __ = os.path.splitext(self.filename)
+
+    """Initialize new table object, ready to be populated
+
+    Parameters
+    ----------
+    table_idx : int
+        Index of this table within the pdf page analyzed
+    cols : list
+        list of coordinate boundaries tuples (left, right)
+    rows : list
+        list of coordinate boundaries tuples (bottom, top)
+
+    Returns
+    -------
+    table : camelot.core.Table
+
+    """
+    def _initialize_new_table(self, table_idx, cols, rows):
+        table = Table(cols, rows)
+        table.page = self.page
+        table.order = table_idx + 1
+        return table
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index 3a40f47..5bb130b 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -2,15 +2,20 @@
 
 from __future__ import division
 import os
+import sys
 import copy
+import locale
 import logging
 import warnings
+import subprocess
 
+import numpy as np
 import pandas as pd
 
 from .base import BaseParser
-from ..core import Table
 from ..utils import (
+    build_file_path_in_temp_dir,
+    export_pdf_as_png,
     scale_image,
     scale_pdf,
     segments_in_bbox,
@@ -18,7 +23,6 @@ from ..utils import (
     merge_close_lines,
     get_table_index,
     compute_accuracy,
-    compute_whitespace,
 )
 from ..image_processing import (
     adaptive_threshold,
@@ -110,13 +114,13 @@ class Lattice(BaseParser):
         resolution=300,
         **kwargs
     ):
-        shift_text = shift_text or ["l", "t"]
+        super().__init__("lattice")
         self.table_regions = table_regions
         self.table_areas = table_areas
         self.process_background = process_background
         self.line_scale = line_scale
         self.copy_text = copy_text
-        self.shift_text = shift_text
+        self.shift_text = shift_text or ["l", "t"]
         self.split_text = split_text
         self.flag_size = flag_size
         self.strip_text = strip_text
@@ -126,6 +130,8 @@ class Lattice(BaseParser):
         self.threshold_constant = threshold_constant
         self.iterations = iterations
         self.resolution = resolution
+        self.image_path = None
+        self.pdf_image = None
 
     @staticmethod
     def _reduce_index(t, idx, shift_text):
@@ -205,18 +211,6 @@ class Lattice(BaseParser):
                                 t.cells[i][j].text = t.cells[i - 1][j].text
         return t
 
-    def _generate_image(self):
-        from ..ext.ghostscript import Ghostscript
-
-        self.imagename = "".join([self.rootname, ".png"])
-        gs_call = "-q -sDEVICE=png16m -o {} -r300 {}".format(
-            self.imagename, self.filename
-        )
-        gs_call = gs_call.encode().split()
-        null = open(os.devnull, "wb")
-        Ghostscript(*gs_call, stdout=null)
-        null.close()
-
     def _generate_table_bbox(self):
         def scale_areas(areas):
             scaled_areas = []
@@ -230,15 +224,20 @@ class Lattice(BaseParser):
                 scaled_areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
             return scaled_areas
 
-        self.image, self.threshold = adaptive_threshold(
-            self.imagename,
+        self.image_path = build_file_path_in_temp_dir(
+            os.path.basename(self.filename),
+            ".png"
+        )
+        export_pdf_as_png(self.filename, self.image_path)
+        self.pdf_image, self.threshold = adaptive_threshold(
+            self.image_path,
             process_background=self.process_background,
             blocksize=self.threshold_blocksize,
             c=self.threshold_constant,
         )
 
-        image_width = self.image.shape[1]
-        image_height = self.image.shape[0]
+        image_width = self.pdf_image.shape[1]
+        image_height = self.pdf_image.shape[0]
         image_width_scaler = image_width / float(self.pdf_width)
         image_height_scaler = image_height / float(self.pdf_height)
         pdf_width_scaler = self.pdf_width / float(image_width)
@@ -332,7 +331,7 @@ class Lattice(BaseParser):
         if v_s is None or h_s is None:
             raise ValueError("No segments found on {}".format(self.rootname))
 
-        table = Table(cols, rows)
+        table = self._initialize_new_table(table_idx, cols, rows)
         # set table edges to True using ver+hor lines
         table = table.set_edges(v_s, h_s, joint_tol=self.joint_tol)
         # set table border edges to True
@@ -360,6 +359,7 @@ class Lattice(BaseParser):
                     )
                     for r_idx, c_idx, text in indices:
                         table.cells[r_idx][c_idx].text = text
+        # FRHTODO
         accuracy = compute_accuracy([[100, pos_errors]])
 
         if self.copy_text is not None:
@@ -368,39 +368,27 @@ class Lattice(BaseParser):
                 copy_text=self.copy_text
             )
 
-        data = table.data
-        table.df = pd.DataFrame(data)
-        table.shape = table.df.shape
-
-        whitespace = compute_whitespace(data)
-        table.flavor = "lattice"
+        table.record_metadata(self)
         table.accuracy = accuracy
-        table.whitespace = whitespace
-        table.order = table_idx + 1
-        table.page = int(os.path.basename(self.rootname).replace("page-", ""))
 
         # for plotting
         _text = []
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
         table._text = _text
-        table._image = (self.image, self.table_bbox_unscaled)
+        table._image = self.pdf_image  # Reuse the image used for calc
+        table._bbox_unscaled = self.table_bbox_unscaled
         table._segments = (self.vertical_segments, self.horizontal_segments)
         table._textedges = None
 
         return table
 
-    def extract_tables(
-        self,
-        filename,
-        suppress_stdout=False,
-        layout_kwargs=None
-    ):
-        layout_kwargs = layout_kwargs or {}
-        self._generate_layout(filename, layout_kwargs)
+    def extract_tables(self, filename, suppress_stdout=False):
+        # FRHTODO: move extract table core to the base class
         rootname = os.path.basename(self.rootname)
         if not suppress_stdout:
-            logger.info("Processing {rootname}".format(rootname=rootname))
+            logger.info(
+                "Processing {rootname}".format(rootname=rootname))
 
         if not self.horizontal_text:
             if self.images:
@@ -415,7 +403,6 @@ class Lattice(BaseParser):
                 )
             return []
 
-        self._generate_image()
         self._generate_table_bbox()
 
         _tables = []
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 4af0a0e..554e2f8 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -9,7 +9,7 @@ import numpy as np
 import pandas as pd
 
 from .base import BaseParser
-from ..core import TextEdges, Table
+from ..core import TextEdges
 from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
                      compute_whitespace)
 
@@ -69,11 +69,9 @@ class Stream(BaseParser):
         column_tol=0,
         **kwargs
     ):
+        super().__init__("stream")
         self.table_regions = table_regions
         self.table_areas = table_areas
-        self.table_bbox = None
-        self.t_bbox = None
-        self.textedges = []
         self.columns = columns
         self._validate_columns()
         self.split_text = split_text
@@ -191,7 +189,8 @@ class Stream(BaseParser):
 
     @staticmethod
     def _join_rows(rows_grouped, text_y_max, text_y_min):
-        """Makes row coordinates continuous.
+        """Makes row coordinates continuous. For the row to "touch"
+        we split the existing gap between them in half.
 
         Parameters
         ----------
@@ -206,18 +205,20 @@ class Stream(BaseParser):
             List of continuous row y-coordinate tuples.
 
         """
-        row_mids = [
-            sum((t.y0 + t.y1) / 2 for t in r) / len(r) if len(r) > 0 else 0
+        row_boundaries = [
+            [
+                max(t.y1 for t in r),
+                min(t.y0 for t in r)
+            ]
             for r in rows_grouped
         ]
-        rows = [
-            (row_mids[i] + row_mids[i - 1]) / 2
-            for i in range(1, len(row_mids))
-        ]
-        rows.insert(0, text_y_max)
-        rows.append(text_y_min)
-        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
-        return rows
+        for i in range(0, len(row_boundaries)-1):
+            top_row = row_boundaries[i]
+            bottom_row = row_boundaries[i+1]
+            top_row[1] = bottom_row[0] = (top_row[1] + bottom_row[0]) / 2
+        row_boundaries[0][0] = text_y_max
+        row_boundaries[-1][1] = text_y_min
+        return row_boundaries
 
     @staticmethod
     def _add_columns(cols, text, row_tol):
@@ -414,7 +415,7 @@ class Stream(BaseParser):
         return cols, rows
 
     def _generate_table(self, table_idx, cols, rows, **kwargs):
-        table = Table(cols, rows)
+        table = self._initialize_new_table(table_idx, cols, rows)
         table = table.set_all_edges()
 
         pos_errors = []
@@ -436,32 +437,22 @@ class Stream(BaseParser):
                         table.cells[r_idx][c_idx].text = text
         accuracy = compute_accuracy([[100, pos_errors]])
 
-        data = table.data
-        table.df = pd.DataFrame(data)
-        table.shape = table.df.shape
+        table.record_metadata(self)
 
-        whitespace = compute_whitespace(data)
-        table.flavor = "stream"
         table.accuracy = accuracy
-        table.whitespace = whitespace
-        table.order = table_idx + 1
-        table.page = int(os.path.basename(self.rootname).replace("page-", ""))
 
         # for plotting
         _text = []
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
         table._text = _text
-        table._image = None
+        table._bbox = self.table_bbox
         table._segments = None
         table._textedges = self.textedges
 
         return table
 
-    def extract_tables(self, filename, suppress_stdout=False,
-                       layout_kwargs=None):
-        layout_kwargs = layout_kwargs or {}
-        self._generate_layout(filename, layout_kwargs)
+    def extract_tables(self, filename, suppress_stdout=False):
         if not suppress_stdout:
             logger.info("Processing {}".format(
                 os.path.basename(self.rootname)))
diff --git a/camelot/plotting.py b/camelot/plotting.py
index 51928e9..0782bb1 100644
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@@ -68,11 +68,14 @@ class PlotMethods(object):
                 patches.Rectangle(
                         (t[0], t[1]),
                         t[2] - t[0],
-                        t[3] - t[1]
+                        t[3] - t[1],
+                        alpha=0.5
                     )
                 )
         ax.set_xlim(min(xs) - 10, max(xs) + 10)
         ax.set_ylim(min(ys) - 10, max(ys) + 10)
+        img = table.get_pdf_image()
+        ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1]))
         return fig
 
     def grid(self, table):
@@ -100,6 +103,9 @@ class PlotMethods(object):
                     ax.plot([cell.lt[0], cell.rt[0]], [cell.lt[1], cell.rt[1]])
                 if cell.bottom:
                     ax.plot([cell.lb[0], cell.rb[0]], [cell.lb[1], cell.rb[1]])
+
+        img = table.get_pdf_image()
+        ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1]))
         return fig
 
     def contour(self, table):
@@ -115,12 +121,13 @@ class PlotMethods(object):
         fig : matplotlib.fig.Figure
 
         """
-        try:
-            img, table_bbox = table._image
-            _FOR_LATTICE = True
-        except TypeError:
-            img, table_bbox = (None, {table._bbox: None})
-            _FOR_LATTICE = False
+
+        img = table.get_pdf_image()
+        _FOR_LATTICE = table.flavor == "lattice"
+        if _FOR_LATTICE:
+            table_bbox = table._bbox_unscaled
+        else:
+            table_bbox = {table._bbox: None}
         fig = plt.figure()
         ax = fig.add_subplot(111, aspect="equal")
 
@@ -150,6 +157,8 @@ class PlotMethods(object):
 
         if _FOR_LATTICE:
             ax.imshow(img)
+        else:
+            ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1]))
         return fig
 
     def textedge(self, table):
@@ -173,7 +182,8 @@ class PlotMethods(object):
             ax.add_patch(
                 patches.Rectangle(
                     (t[0], t[1]), t[2] - t[0], t[3] - t[1],
-                    color="blue"
+                    color="blue",
+                    alpha=0.5
                 )
             )
         ax.set_xlim(min(xs) - 10, max(xs) + 10)
@@ -182,6 +192,8 @@ class PlotMethods(object):
         for te in table._textedges:
             ax.plot([te.x, te.x], [te.y0, te.y1])
 
+        img = table.get_pdf_image()
+        ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1]))
         return fig
 
     def joint(self, table):
@@ -197,7 +209,8 @@ class PlotMethods(object):
         fig : matplotlib.fig.Figure
 
         """
-        img, table_bbox = table._image
+        img = table.get_pdf_image()
+        table_bbox = table._bbox_unscaled
         fig = plt.figure()
         ax = fig.add_subplot(111, aspect="equal")
         x_coord = []
@@ -230,4 +243,7 @@ class PlotMethods(object):
             ax.plot([v[0], v[2]], [v[1], v[3]])
         for h in horizontal:
             ax.plot([h[0], h[2]], [h[1], h[3]])
+
+        img = table.get_pdf_image()
+        ax.imshow(img, extent=(0, table.pdf_size[0], 0, table.pdf_size[1]))
         return fig
diff --git a/camelot/utils.py b/camelot/utils.py
index c3bf723..89b6eee 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -3,6 +3,7 @@ from __future__ import division
 
 import re
 import os
+import atexit
 import sys
 import random
 import shutil
@@ -13,6 +14,7 @@ from itertools import groupby
 from operator import itemgetter
 
 import numpy as np
+import pandas as pd
 from pdfminer.pdfparser import PDFParser
 from pdfminer.pdfdocument import PDFDocument
 from pdfminer.pdfpage import PDFPage
@@ -29,6 +31,7 @@ from pdfminer.layout import (
     LTImage,
 )
 
+from .ext.ghostscript import Ghostscript
 
 # pylint: disable=import-error
 # PyLint will evaluate both branches, and will necessarily complain about one
@@ -150,13 +153,40 @@ def remove_extra(kwargs, flavor="lattice"):
 
 
 # https://stackoverflow.com/a/22726782
+# and https://stackoverflow.com/questions/10965479
 class TemporaryDirectory(object):
     def __enter__(self):
         self.name = tempfile.mkdtemp()
+        # Only delete the temporary directory upon
+        # program exit.
+        atexit.register(shutil.rmtree, self.name)
         return self.name
 
     def __exit__(self, exc_type, exc_value, traceback):
-        shutil.rmtree(self.name)
+        pass
+
+
+def build_file_path_in_temp_dir(filename, extension=None):
+    """Generates a new path within a temporary directory
+
+    Parameters
+    ----------
+    filename : str
+    extension : str
+
+    Returns
+    -------
+    file_path_in_temporary_dir : str
+
+    """
+    with TemporaryDirectory() as temp_dir:
+        if extension:
+            filename = filename + extension
+        path = os.path.join(
+            temp_dir,
+            filename
+        )
+    return path
 
 
 def translate(x1, x2):
@@ -387,6 +417,117 @@ def text_in_bbox(bbox, text):
     return t_bbox
 
 
+def bbox_from_text(textlines):
+    """Returns the smallest bbox containing all the text objects passed as
+    a parameters.
+
+    Parameters
+    ----------
+    textlines : List of PDFMiner text objects.
+
+    Returns
+    -------
+    bbox : tuple
+        Tuple (x1, y1, x2, y2) representing a bounding box where
+        (x1, y1) -> lb and (x2, y2) -> rt in the PDF coordinate
+        space.
+
+    """
+    if len(textlines) == 0:
+        return None
+    bbox = (
+        textlines[0].x0,
+        textlines[0].y0,
+        textlines[0].x1,
+        textlines[0].y1
+    )
+
+    for tl in textlines[1:]:
+        bbox = (
+            min(bbox[0], tl.x0),
+            min(bbox[1], tl.y0),
+            max(bbox[2], tl.x1),
+            max(bbox[3], tl.y1)
+        )
+    return bbox
+
+
+def find_columns_coordinates(tls):
+    """Given a list of text objects, guess columns boundaries and returns a
+    list of x-coordinates for split points between columns.
+
+    Parameters
+    ----------
+    tls : list of PDFMiner text object.
+
+    Returns
+    -------
+    cols_anchors : list
+        List of x-coordinates for columns.
+
+    """
+    # Make a list of disjunct cols boundaries across the textlines
+    # that comprise the table.
+    # [(1st col left, 1st col right), (2nd col left, 2nd col right), ...]
+    cols_bounds = []
+    tls.sort(key=lambda tl: tl.x0)
+    for tl in tls:
+        if (not cols_bounds) or cols_bounds[-1][1] < tl.x0:
+            cols_bounds.append([tl.x0, tl.x1])
+        else:
+            cols_bounds[-1][1] = max(cols_bounds[-1][1], tl.x1)
+
+    # From the row boundaries, identify splits by getting the mid points
+    # between the boundaries.
+    # Row boundaries: [ a ]        [b]    [   c   ]
+    # Splits:         |        |        |         |
+    cols_anchors = list(map(
+        lambda idx: (cols_bounds[idx-1][1] + cols_bounds[idx][0]) / 2.0,
+        range(1, len(cols_bounds)-1)
+    ))
+    cols_anchors.insert(0, cols_bounds[0][0])
+    cols_anchors.append(cols_bounds[-1][1])
+    return cols_anchors
+
+
+def distance_tl_to_bbox(tl, bbox):
+    """Returns a tuple corresponding to the horizontal and vertical gaps
+    between a textline and a bbox.
+
+    Parameters
+    ----------
+    tl : PDFMiner text object.
+    bbox : tuple (x0, y0, x1, y1)
+
+    Returns
+    -------
+    distance : tuple
+        Tuple (horizontal distance, vertical distance)
+
+    """
+    v_distance, h_distance = None, None
+    if tl.x1 <= bbox[0]:
+        # tl to the left
+        h_distance = bbox[0] - tl.x1
+    elif bbox[2] <= tl.x0:
+        # tl to the right
+        h_distance = tl.x0 - bbox[2]
+    else:
+        # textline overlaps vertically
+        h_distance = 0
+
+    if tl.y1 <= bbox[1]:
+        # tl below
+        v_distance = bbox[1] - tl.y1
+    elif bbox[3] <= tl.y0:
+        # tl above
+        v_distance = tl.y0 - bbox[3]
+    else:
+        # tl overlaps horizontally
+        v_distance = 0
+    return (h_distance, v_distance)
+
+
 def merge_close_lines(ar, line_tol=2):
     """Merges lines which are within a tolerance by calculating a
     moving mean, based on their x or y axis projections.
@@ -867,3 +1008,94 @@ def get_text_objects(layout, ltype="char", t=None):
     except AttributeError:
         pass
     return t
+
+
+def export_pdf_as_png(pdf_path, destination_path):
+    """Generate an image from a pdf.
+
+    Parameters
+    ----------
+    pdf_path : str
+    destination_path : str
+    """
+    gs_call = f"-q -sDEVICE=png16m -o {destination_path} -r300 {pdf_path}"
+    gs_call = gs_call.encode().split()
+    null = open(os.devnull, "wb")
+    Ghostscript(*gs_call, stdout=null)
+    null.close()
+
+
+def compare_tables(left, right):
+    """Compare two tables and displays differences in a human readable form.
+
+    Parameters
+    ----------
+    left : data frame
+    right : data frame
+    """
+    diff_cols = right.shape[1]-left.shape[1]
+    diff_rows = right.shape[0]-left.shape[0]
+    differences = []
+    if (diff_rows):
+        differences.append(
+            f"{abs(diff_rows)} "
+            f"{'more' if diff_rows>0 else 'fewer'} rows"
+        )
+    if (diff_cols):
+        differences.append(
+            f"{abs(diff_cols)} "
+            f"{'more' if diff_cols>0 else 'fewer'} columns"
+        )
+    if differences:
+        differences_str = " and ".join(differences)
+        print(f"Right has {differences_str} than left "
+              f"[{right.shape[0]},{right.shape[1]}] vs "
+              f"[{left.shape[0]},{left.shape[1]}]")
+
+    table1, table2 = [left, right]
+    name_table1, name_table2 = ["left", "right"]
+    if not diff_rows:
+        # Same number of rows: compare columns since they're of the same length
+        if diff_cols > 0:
+            # Use the longest table as a reference
+            table1, table2 = table2, table1
+            name_table1, name_table2 = name_table2, name_table1
+        for i, col in enumerate(table1.columns):
+            lcol = table1.iloc[:, i]
+            if col in table2:
+                scol = table2.iloc[:, i]
+                if not lcol.equals(scol):
+                    diff_df = pd.DataFrame()
+                    diff_df[name_table1] = scol
+                    diff_df[name_table2] = lcol
+                    diff_df["Match"] = lcol == scol
+                    print(
+                        f"Column {i} different:\n"
+                        f"{diff_df}"
+                    )
+                    break
+            else:
+                print("Column {i} unique to {name_table1}: {lcol}")
+                break
+    elif not diff_cols:
+        # Same number of cols: compare rows since they're of the same length
+        if diff_rows > 0:
+            # Use the longest table as a reference
+            table1, table2 = table2, table1
+            name_table1, name_table2 = name_table2, name_table1
+        for index, lrow in table1.iterrows():
+            if index < table2.shape[1]:
+                srow = table2.loc[index, :]
+                if not lrow.equals(srow):
+                    diff_df = pd.DataFrame()
+                    diff_df = diff_df.append(lrow, ignore_index=True)
+                    diff_df = diff_df.append(srow, ignore_index=True)
+                    diff_df.insert(0, 'Table', [name_table1, name_table2])
+                    print(f"Row {index} differs:")
+                    print(diff_df.values)
+                    break
+            else:
+                print(f"Row {index} unique to {name_table1}: {lrow}")
+                break
+    else:
+        print("Tables have different shapes")
diff --git a/tests/files/baseline_plots/test_grid_plot.png b/tests/files/baseline_plots/test_grid_plot.png
index 0607d15..87fe2aa 100644
Binary files a/tests/files/baseline_plots/test_grid_plot.png and b/tests/files/baseline_plots/test_grid_plot.png differ
diff --git a/tests/files/baseline_plots/test_line_plot.png b/tests/files/baseline_plots/test_line_plot.png
index 12c44c0..6ddeace 100644
Binary files a/tests/files/baseline_plots/test_line_plot.png and b/tests/files/baseline_plots/test_line_plot.png differ
diff --git a/tests/files/baseline_plots/test_stream_contour_plot.png b/tests/files/baseline_plots/test_stream_contour_plot.png
index 958ea0a..d781439 100644
Binary files a/tests/files/baseline_plots/test_stream_contour_plot.png and b/tests/files/baseline_plots/test_stream_contour_plot.png differ
diff --git a/tests/files/baseline_plots/test_stream_grid_plot.png b/tests/files/baseline_plots/test_stream_grid_plot.png
index 818958c..b04a2f1 100644
Binary files a/tests/files/baseline_plots/test_stream_grid_plot.png and b/tests/files/baseline_plots/test_stream_grid_plot.png differ
diff --git a/tests/files/baseline_plots/test_text_plot.png b/tests/files/baseline_plots/test_text_plot.png
index 63b5520..497af37 100644
Binary files a/tests/files/baseline_plots/test_text_plot.png and b/tests/files/baseline_plots/test_text_plot.png differ
diff --git a/tests/files/baseline_plots/test_textedge_plot.png b/tests/files/baseline_plots/test_textedge_plot.png
index 1de4e9c..1c04473 100644
Binary files a/tests/files/baseline_plots/test_textedge_plot.png and b/tests/files/baseline_plots/test_textedge_plot.png differ