Blacken code

2019-07-03 22:04:19 +05:30 · 2019-07-03 22:04:19 +05:30 · 2115a0e177
parent 27d55d056c
commit 2115a0e177
15 changed files with 892 additions and 551 deletions
--- a/camelot/init.py
+++ b/camelot/init.py
@ -9,8 +9,8 @@ from .io import read_pdf
 from .plotting import PlotMethods
-def _write_usage(self, prog, args='', prefix='Usage: '):
+def _write_usage(self, prog, args="", prefix="Usage: "):
-    return self._write_usage('camelot', args, prefix=prefix)
+    return self._write_usage("camelot", args, prefix=prefix)
 # monkey patch click.HelpFormatter
@ -18,10 +18,10 @@ HelpFormatter._write_usage = HelpFormatter.write_usage
 HelpFormatter.write_usage = _write_usage
 # set up logging
-logger = logging.getLogger('camelot')
+logger = logging.getLogger("camelot")
-format_string = '%(asctime)s - %(levelname)s - %(message)s'
+format_string = "%(asctime)s - %(levelname)s - %(message)s"
-formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
+formatter = logging.Formatter(format_string, datefmt="%Y-%m-%dT%H:%M:%S")
 handler = logging.StreamHandler()
 handler.setFormatter(formatter)
--- a/camelot/main.py
+++ b/camelot/main.py
@ -3,7 +3,7 @@
 from __future__ import absolute_import
-__all__ = ('main',)
+__all__ = ("main",)
 def main():
--- a/camelot/version.py
+++ b/camelot/version.py
@ -1,23 +1,23 @@
 # -*- coding: utf-8 -*-
 VERSION = (0, 7, 2)
-PRERELEASE = None # alpha, beta or rc
+PRERELEASE = None  # alpha, beta or rc
 REVISION = None
 def generate_version(version, prerelease=None, revision=None):
-    version_parts = ['.'.join(map(str, version))]
+    version_parts = [".".join(map(str, version))]
    if prerelease is not None:
-        version_parts.append('-{}'.format(prerelease))
+        version_parts.append("-{}".format(prerelease))
    if revision is not None:
-        version_parts.append('.{}'.format(revision))
+        version_parts.append(".{}".format(revision))
-    return ''.join(version_parts)
+    return "".join(version_parts)
-__title__ = 'camelot-py'
+__title__ = "camelot-py"
-__description__ = 'PDF Table Extraction for Humans.'
+__description__ = "PDF Table Extraction for Humans."
-__url__ = 'http://camelot-py.readthedocs.io/'
+__url__ = "http://camelot-py.readthedocs.io/"
 __version__ = generate_version(VERSION, prerelease=PRERELEASE, revision=REVISION)
-__author__ = 'Vinayak Mehta'
+__author__ = "Vinayak Mehta"
-__author_email__ = 'vmehta94@gmail.com'
+__author_email__ = "vmehta94@gmail.com"
-__license__ = 'MIT License'
+__license__ = "MIT License"
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -3,6 +3,7 @@
 import logging
 import click
 try:
    import matplotlib.pyplot as plt
 except ImportError:
@ -13,7 +14,7 @@ else:
 from . import __version__, read_pdf, plot
-logger = logging.getLogger('camelot')
+logger = logging.getLogger("camelot")
 logger.setLevel(logging.INFO)
@ -30,23 +31,47 @@ pass_config = click.make_pass_decorator(Config)
@click.group()
@click.version_option(version=__version__)
-@click.option('-q', '--quiet', is_flag=False, help='Suppress logs and warnings.')
+@click.option("-q", "--quiet", is_flag=False, help="Suppress logs and warnings.")
-@click.option('-p', '--pages', default='1', help='Comma-separated page numbers.'
+@click.option(
-              ' Example: 1,3,4 or 1,4-end or all.')
+    "-p",
-@click.option('-pw', '--password', help='Password for decryption.')
+    "--pages",
-@click.option('-o', '--output', help='Output file path.')
+    default="1",
-@click.option('-f', '--format',
+    help="Comma-separated page numbers." " Example: 1,3,4 or 1,4-end or all.",
-              type=click.Choice(['csv', 'json', 'excel', 'html', 'sqlite']),
+)
-              help='Output file format.')
+@click.option("-pw", "--password", help="Password for decryption.")
-@click.option('-z', '--zip', is_flag=True, help='Create ZIP archive.')
+@click.option("-o", "--output", help="Output file path.")
-@click.option('-split', '--split_text', is_flag=True,
+@click.option(
-              help='Split text that spans across multiple cells.')
+    "-f",
-@click.option('-flag', '--flag_size', is_flag=True, help='Flag text based on'
+    "--format",
-              ' font size. Useful to detect super/subscripts.')
+    type=click.Choice(["csv", "json", "excel", "html", "sqlite"]),
-@click.option('-strip', '--strip_text', help='Characters that should be stripped from a string before'
+    help="Output file format.",
-              ' assigning it to a cell.')
+)
-@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
+@click.option("-z", "--zip", is_flag=True, help="Create ZIP archive.")
-              help='PDFMiner char_margin, line_margin and word_margin.')
+@click.option(
    "-split",
    "--split_text",
    is_flag=True,
    help="Split text that spans across multiple cells.",
 )
@click.option(
    "-flag",
    "--flag_size",
    is_flag=True,
    help="Flag text based on" " font size. Useful to detect super/subscripts.",
 )
@click.option(
    "-strip",
    "--strip_text",
    help="Characters that should be stripped from a string before"
    " assigning it to a cell.",
 )
@click.option(
    "-M",
    "--margins",
    nargs=3,
    default=(1.0, 0.5, 0.1),
    help="PDFMiner char_margin, line_margin and word_margin.",
 )
@click.pass_context
 def cli(ctx, *args, **kwargs):
    """Camelot: PDF Table Extraction for Humans"""
@ -55,79 +80,131 @@ def cli(ctx, *args, **kwargs):
        ctx.obj.set_config(key, value)
-@cli.command('lattice')
+@cli.command("lattice")
-@click.option('-R', '--table_regions', default=[], multiple=True,
+@click.option(
-              help='Page regions to analyze. Example: x1,y1,x2,y2'
+    "-R",
-              ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
+    "--table_regions",
-@click.option('-T', '--table_areas', default=[], multiple=True,
+    default=[],
-              help='Table areas to process. Example: x1,y1,x2,y2'
+    multiple=True,
-              ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
+    help="Page regions to analyze. Example: x1,y1,x2,y2"
-@click.option('-back', '--process_background', is_flag=True,
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
-              help='Process background lines.')
+)
-@click.option('-scale', '--line_scale', default=15,
+@click.option(
-              help='Line size scaling factor. The larger the value,'
+    "-T",
-              ' the smaller the detected lines.')
+    "--table_areas",
-@click.option('-copy', '--copy_text', default=[], type=click.Choice(['h', 'v']),
+    default=[],
-              multiple=True, help='Direction in which text in a spanning cell'
+    multiple=True,
-              ' will be copied over.')
+    help="Table areas to process. Example: x1,y1,x2,y2"
-@click.option('-shift', '--shift_text', default=['l', 't'],
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
-              type=click.Choice(['', 'l', 'r', 't', 'b']), multiple=True,
+)
-              help='Direction in which text in a spanning cell will flow.')
+@click.option(
-@click.option('-l', '--line_tol', default=2,
+    "-back", "--process_background", is_flag=True, help="Process background lines."
-              help='Tolerance parameter used to merge close vertical'
+)
-              ' and horizontal lines.')
+@click.option(
-@click.option('-j', '--joint_tol', default=2,
+    "-scale",
-              help='Tolerance parameter used to decide whether'
+    "--line_scale",
-              ' the detected lines and points lie close to each other.')
+    default=15,
-@click.option('-block', '--threshold_blocksize', default=15,
+    help="Line size scaling factor. The larger the value,"
-              help='For adaptive thresholding, size of a pixel'
+    " the smaller the detected lines.",
-              ' neighborhood that is used to calculate a threshold value for'
+)
-              ' the pixel. Example: 3, 5, 7, and so on.')
+@click.option(
-@click.option('-const', '--threshold_constant', default=-2,
+    "-copy",
-              help='For adaptive thresholding, constant subtracted'
+    "--copy_text",
-              ' from the mean or weighted mean. Normally, it is positive but'
+    default=[],
-              ' may be zero or negative as well.')
+    type=click.Choice(["h", "v"]),
-@click.option('-I', '--iterations', default=0,
+    multiple=True,
-              help='Number of times for erosion/dilation will be applied.')
+    help="Direction in which text in a spanning cell" " will be copied over.",
-@click.option('-res', '--resolution', default=300,
+)
-              help='Resolution used for PDF to PNG conversion.')
+@click.option(
-@click.option('-plot', '--plot_type',
+    "-shift",
-              type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']),
+    "--shift_text",
-              help='Plot elements found on PDF page for visual debugging.')
+    default=["l", "t"],
-@click.argument('filepath', type=click.Path(exists=True))
+    type=click.Choice(["", "l", "r", "t", "b"]),
    multiple=True,
    help="Direction in which text in a spanning cell will flow.",
 )
@click.option(
    "-l",
    "--line_tol",
    default=2,
    help="Tolerance parameter used to merge close vertical" " and horizontal lines.",
 )
@click.option(
    "-j",
    "--joint_tol",
    default=2,
    help="Tolerance parameter used to decide whether"
    " the detected lines and points lie close to each other.",
 )
@click.option(
    "-block",
    "--threshold_blocksize",
    default=15,
    help="For adaptive thresholding, size of a pixel"
    " neighborhood that is used to calculate a threshold value for"
    " the pixel. Example: 3, 5, 7, and so on.",
 )
@click.option(
    "-const",
    "--threshold_constant",
    default=-2,
    help="For adaptive thresholding, constant subtracted"
    " from the mean or weighted mean. Normally, it is positive but"
    " may be zero or negative as well.",
 )
@click.option(
    "-I",
    "--iterations",
    default=0,
    help="Number of times for erosion/dilation will be applied.",
 )
@click.option(
    "-res",
    "--resolution",
    default=300,
    help="Resolution used for PDF to PNG conversion.",
 )
@click.option(
    "-plot",
    "--plot_type",
    type=click.Choice(["text", "grid", "contour", "joint", "line"]),
    help="Plot elements found on PDF page for visual debugging.",
 )
@click.argument("filepath", type=click.Path(exists=True))
@pass_config
 def lattice(c, *args, **kwargs):
    """Use lines between text to parse the table."""
    conf = c.config
-    pages = conf.pop('pages')
+    pages = conf.pop("pages")
-    output = conf.pop('output')
+    output = conf.pop("output")
-    f = conf.pop('format')
+    f = conf.pop("format")
-    compress = conf.pop('zip')
+    compress = conf.pop("zip")
-    quiet = conf.pop('quiet')
+    quiet = conf.pop("quiet")
-    plot_type = kwargs.pop('plot_type')
+    plot_type = kwargs.pop("plot_type")
-    filepath = kwargs.pop('filepath')
+    filepath = kwargs.pop("filepath")
    kwargs.update(conf)
-    table_regions = list(kwargs['table_regions'])
+    table_regions = list(kwargs["table_regions"])
-    kwargs['table_regions'] = None if not table_regions else table_regions
+    kwargs["table_regions"] = None if not table_regions else table_regions
-    table_areas = list(kwargs['table_areas'])
+    table_areas = list(kwargs["table_areas"])
-    kwargs['table_areas'] = None if not table_areas else table_areas
+    kwargs["table_areas"] = None if not table_areas else table_areas
-    copy_text = list(kwargs['copy_text'])
+    copy_text = list(kwargs["copy_text"])
-    kwargs['copy_text'] = None if not copy_text else copy_text
+    kwargs["copy_text"] = None if not copy_text else copy_text
-    kwargs['shift_text'] = list(kwargs['shift_text'])
+    kwargs["shift_text"] = list(kwargs["shift_text"])
    if plot_type is not None:
        if not _HAS_MPL:
-            raise ImportError('matplotlib is required for plotting.')
+            raise ImportError("matplotlib is required for plotting.")
    else:
        if output is None:
-            raise click.UsageError('Please specify output file path using --output')
+            raise click.UsageError("Please specify output file path using --output")
        if f is None:
-            raise click.UsageError('Please specify output file format using --format')
+            raise click.UsageError("Please specify output file format using --format")
-    tables = read_pdf(filepath, pages=pages, flavor='lattice',
+    tables = read_pdf(
-                      suppress_stdout=quiet, **kwargs)
+        filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs
-    click.echo('Found {} tables'.format(tables.n))
+    )
    click.echo("Found {} tables".format(tables.n))
    if plot_type is not None:
        for table in tables:
            plot(table, kind=plot_type)
@ -136,57 +213,89 @@ def lattice(c, *args, **kwargs):
        tables.export(output, f=f, compress=compress)
-@cli.command('stream')
+@cli.command("stream")
-@click.option('-R', '--table_regions', default=[], multiple=True,
+@click.option(
-              help='Page regions to analyze. Example: x1,y1,x2,y2'
+    "-R",
-              ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
+    "--table_regions",
-@click.option('-T', '--table_areas', default=[], multiple=True,
+    default=[],
-              help='Table areas to process. Example: x1,y1,x2,y2'
+    multiple=True,
-              ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
+    help="Page regions to analyze. Example: x1,y1,x2,y2"
-@click.option('-C', '--columns', default=[], multiple=True,
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
-              help='X coordinates of column separators.')
+)
-@click.option('-e', '--edge_tol', default=50, help='Tolerance parameter'
+@click.option(
-              ' for extending textedges vertically.')
+    "-T",
-@click.option('-r', '--row_tol', default=2, help='Tolerance parameter'
+    "--table_areas",
-              ' used to combine text vertically, to generate rows.')
+    default=[],
-@click.option('-c', '--column_tol', default=0, help='Tolerance parameter'
+    multiple=True,
-              ' used to combine text horizontally, to generate columns.')
+    help="Table areas to process. Example: x1,y1,x2,y2"
-@click.option('-plot', '--plot_type',
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
-              type=click.Choice(['text', 'grid', 'contour', 'textedge']),
+)
-              help='Plot elements found on PDF page for visual debugging.')
+@click.option(
-@click.argument('filepath', type=click.Path(exists=True))
+    "-C",
    "--columns",
    default=[],
    multiple=True,
    help="X coordinates of column separators.",
 )
@click.option(
    "-e",
    "--edge_tol",
    default=50,
    help="Tolerance parameter" " for extending textedges vertically.",
 )
@click.option(
    "-r",
    "--row_tol",
    default=2,
    help="Tolerance parameter" " used to combine text vertically, to generate rows.",
 )
@click.option(
    "-c",
    "--column_tol",
    default=0,
    help="Tolerance parameter"
    " used to combine text horizontally, to generate columns.",
 )
@click.option(
    "-plot",
    "--plot_type",
    type=click.Choice(["text", "grid", "contour", "textedge"]),
    help="Plot elements found on PDF page for visual debugging.",
 )
@click.argument("filepath", type=click.Path(exists=True))
@pass_config
 def stream(c, *args, **kwargs):
    """Use spaces between text to parse the table."""
    conf = c.config
-    pages = conf.pop('pages')
+    pages = conf.pop("pages")
-    output = conf.pop('output')
+    output = conf.pop("output")
-    f = conf.pop('format')
+    f = conf.pop("format")
-    compress = conf.pop('zip')
+    compress = conf.pop("zip")
-    quiet = conf.pop('quiet')
+    quiet = conf.pop("quiet")
-    plot_type = kwargs.pop('plot_type')
+    plot_type = kwargs.pop("plot_type")
-    filepath = kwargs.pop('filepath')
+    filepath = kwargs.pop("filepath")
    kwargs.update(conf)
-    table_regions = list(kwargs['table_regions'])
+    table_regions = list(kwargs["table_regions"])
-    kwargs['table_regions'] = None if not table_regions else table_regions
+    kwargs["table_regions"] = None if not table_regions else table_regions
-    table_areas = list(kwargs['table_areas'])
+    table_areas = list(kwargs["table_areas"])
-    kwargs['table_areas'] = None if not table_areas else table_areas
+    kwargs["table_areas"] = None if not table_areas else table_areas
-    columns = list(kwargs['columns'])
+    columns = list(kwargs["columns"])
-    kwargs['columns'] = None if not columns else columns
+    kwargs["columns"] = None if not columns else columns
    if plot_type is not None:
        if not _HAS_MPL:
-            raise ImportError('matplotlib is required for plotting.')
+            raise ImportError("matplotlib is required for plotting.")
    else:
        if output is None:
-            raise click.UsageError('Please specify output file path using --output')
+            raise click.UsageError("Please specify output file path using --output")
        if f is None:
-            raise click.UsageError('Please specify output file format using --format')
+            raise click.UsageError("Please specify output file format using --format")
-    tables = read_pdf(filepath, pages=pages, flavor='stream',
+    tables = read_pdf(
-                      suppress_stdout=quiet, **kwargs)
+        filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
-    click.echo('Found {} tables'.format(tables.n))
+    )
    click.echo("Found {} tables".format(tables.n))
    if plot_type is not None:
        for table in tables:
            plot(table, kind=plot_type)
--- a/camelot/core.py
+++ b/camelot/core.py
@ -42,7 +42,8 @@ class TextEdge(object):
        TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.
    """
-    def __init__(self, x, y0, y1, align='left'):
+
    def __init__(self, x, y0, y1, align="left"):
        self.x = x
        self.y0 = y0
        self.y1 = y1
@ -51,8 +52,13 @@ class TextEdge(object):
        self.is_valid = False
    def __repr__(self):
-        return '<TextEdge x={} y0={} y1={} align={} valid={}>'.format(
+        return "<TextEdge x={} y0={} y1={} align={} valid={}>".format(
-            round(self.x, 2), round(self.y0, 2), round(self.y1, 2), self.align, self.is_valid)
+            round(self.x, 2),
            round(self.y0, 2),
            round(self.y1, 2),
            self.align,
            self.is_valid,
        )
    def update_coords(self, x, y0, edge_tol=50):
        """Updates the text edge's x and bottom y coordinates and sets
@ -73,9 +79,10 @@ class TextEdges(object):
    the PDF page. The dict has three keys based on the alignments,
    and each key's value is a list of camelot.core.TextEdge objects.
    """
    def __init__(self, edge_tol=50):
        self.edge_tol = edge_tol
-        self._textedges = {'left': [], 'right': [], 'middle': []}
+        self._textedges = {"left": [], "right": [], "middle": []}
    @staticmethod
    def get_x_coord(textline, align):
@ -85,7 +92,7 @@ class TextEdges(object):
        x_left = textline.x0
        x_right = textline.x1
        x_middle = x_left + (x_right - x_left) / 2.0
-        x_coord = {'left': x_left, 'middle': x_middle, 'right': x_right}
+        x_coord = {"left": x_left, "middle": x_middle, "right": x_right}
        return x_coord[align]
    def find(self, x_coord, align):
@ -109,21 +116,22 @@ class TextEdges(object):
    def update(self, textline):
        """Updates an existing text edge in the current dict.
        """
-        for align in ['left', 'right', 'middle']:
+        for align in ["left", "right", "middle"]:
            x_coord = self.get_x_coord(textline, align)
            idx = self.find(x_coord, align)
            if idx is None:
                self.add(textline, align)
            else:
                self._textedges[align][idx].update_coords(
-                    x_coord, textline.y0, edge_tol=self.edge_tol)
+                    x_coord, textline.y0, edge_tol=self.edge_tol
                )
    def generate(self, textlines):
        """Generates the text edges dict based on horizontal text
        rows.
        """
        for tl in textlines:
-            if len(tl.get_text().strip()) > 1: # TODO: hacky
+            if len(tl.get_text().strip()) > 1:  # TODO: hacky
                self.update(tl)
    def get_relevant(self):
@ -132,9 +140,15 @@ class TextEdges(object):
        the most.
        """
        intersections_sum = {
-            'left': sum(te.intersections for te in self._textedges['left'] if te.is_valid),
+            "left": sum(
-            'right': sum(te.intersections for te in self._textedges['right'] if te.is_valid),
+                te.intersections for te in self._textedges["left"] if te.is_valid
-            'middle': sum(te.intersections for te in self._textedges['middle'] if te.is_valid)
+            ),
            "right": sum(
                te.intersections for te in self._textedges["right"] if te.is_valid
            ),
            "middle": sum(
                te.intersections for te in self._textedges["middle"] if te.is_valid
            ),
        }
        # TODO: naive
@ -147,6 +161,7 @@ class TextEdges(object):
        """Returns a dict of interesting table areas on the PDF page
        calculated using relevant text edges.
        """
        def pad(area, average_row_height):
            x0 = area[0] - TABLE_AREA_PADDING
            y0 = area[1] - TABLE_AREA_PADDING
@ -175,7 +190,11 @@ class TextEdges(object):
                    else:
                        table_areas.pop(found)
                        updated_area = (
-                            found[0], min(te.y0, found[1]), max(found[2], te.x), max(found[3], te.y1))
+                            found[0],
                            min(te.y0, found[1]),
                            max(found[2], te.x),
                            max(found[3], te.y1),
                        )
                        table_areas[updated_area] = None
        # extend table areas based on textlines that overlap
@ -196,7 +215,11 @@ class TextEdges(object):
            if found is not None:
                table_areas.pop(found)
                updated_area = (
-                    min(tl.x0, found[0]), min(tl.y0, found[1]), max(found[2], tl.x1), max(found[3], tl.y1))
+                    min(tl.x0, found[0]),
                    min(tl.y0, found[1]),
                    max(found[2], tl.x1),
                    max(found[3], tl.y1),
                )
                table_areas[updated_area] = None
        average_textline_height = sum_textline_height / float(len(textlines))
@ -265,11 +288,12 @@ class Cell(object):
        self.bottom = False
        self.hspan = False
        self.vspan = False
-        self._text = ''
+        self._text = ""
    def __repr__(self):
-        return '<Cell x1={} y1={} x2={} y2={}>'.format(
+        return "<Cell x1={} y1={} x2={} y2={}>".format(
-            round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2))
+            round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
        )
    @property
    def text(self):
@ -277,7 +301,7 @@ class Cell(object):
    @text.setter
    def text(self, t):
-        self._text = ''.join([self._text, t])
+        self._text = "".join([self._text, t])
    @property
    def bound(self):
@ -314,11 +338,11 @@ class Table(object):
        PDF page number.
    """
    def __init__(self, cols, rows):
        self.cols = cols
        self.rows = rows
-        self.cells = [[Cell(c[0], r[1], c[1], r[0])
+        self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
                       for c in cols] for r in rows]
        self.df = None
        self.shape = (0, 0)
        self.accuracy = 0
@ -327,7 +351,7 @@ class Table(object):
        self.page = None
    def __repr__(self):
-        return '<{} shape={}>'.format(self.__class__.__name__, self.shape)
+        return "<{} shape={}>".format(self.__class__.__name__, self.shape)
    def __lt__(self, other):
        if self.page == other.page:
@ -352,10 +376,10 @@ class Table(object):
        """
        # pretty?
        report = {
-            'accuracy': round(self.accuracy, 2),
+            "accuracy": round(self.accuracy, 2),
-            'whitespace': round(self.whitespace, 2),
+            "whitespace": round(self.whitespace, 2),
-            'order': self.order,
+            "order": self.order,
-            'page': self.page
+            "page": self.page,
        }
        return report
@ -383,12 +407,21 @@ class Table(object):
        for v in vertical:
            # find closest x coord
            # iterate over y coords and find closest start and end points
-            i = [i for i, t in enumerate(self.cols)
+            i = [
-                 if np.isclose(v[0], t[0], atol=joint_tol)]
+                i
-            j = [j for j, t in enumerate(self.rows)
+                for i, t in enumerate(self.cols)
-                 if np.isclose(v[3], t[0], atol=joint_tol)]
+                if np.isclose(v[0], t[0], atol=joint_tol)
-            k = [k for k, t in enumerate(self.rows)
+            ]
-                 if np.isclose(v[1], t[0], atol=joint_tol)]
+            j = [
                j
                for j, t in enumerate(self.rows)
                if np.isclose(v[3], t[0], atol=joint_tol)
            ]
            k = [
                k
                for k, t in enumerate(self.rows)
                if np.isclose(v[1], t[0], atol=joint_tol)
            ]
            if not j:
                continue
            J = j[0]
@ -434,12 +467,21 @@ class Table(object):
        for h in horizontal:
            # find closest y coord
            # iterate over x coords and find closest start and end points
-            i = [i for i, t in enumerate(self.rows)
+            i = [
-                 if np.isclose(h[1], t[0], atol=joint_tol)]
+                i
-            j = [j for j, t in enumerate(self.cols)
+                for i, t in enumerate(self.rows)
-                 if np.isclose(h[0], t[0], atol=joint_tol)]
+                if np.isclose(h[1], t[0], atol=joint_tol)
-            k = [k for k, t in enumerate(self.cols)
+            ]
-                 if np.isclose(h[2], t[0], atol=joint_tol)]
+            j = [
                j
                for j, t in enumerate(self.cols)
                if np.isclose(h[0], t[0], atol=joint_tol)
            ]
            k = [
                k
                for k, t in enumerate(self.cols)
                if np.isclose(h[2], t[0], atol=joint_tol)
            ]
            if not j:
                continue
            J = j[0]
@ -537,12 +579,7 @@ class Table(object):
            Output filepath.
        """
-        kw = {
+        kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
            'encoding': 'utf-8',
            'index': False,
            'header': False,
            'quoting': 1
        }
        kw.update(kwargs)
        self.df.to_csv(path, **kw)
@ -557,12 +594,10 @@ class Table(object):
            Output filepath.
        """
-        kw = {
+        kw = {"orient": "records"}
            'orient': 'records'
        }
        kw.update(kwargs)
        json_string = self.df.to_json(**kw)
-        with open(path, 'w') as f:
+        with open(path, "w") as f:
            f.write(json_string)
    def to_excel(self, path, **kwargs):
@ -577,8 +612,8 @@ class Table(object):
        """
        kw = {
-            'sheet_name': 'page-{}-table-{}'.format(self.page, self.order),
+            "sheet_name": "page-{}-table-{}".format(self.page, self.order),
-            'encoding': 'utf-8'
+            "encoding": "utf-8",
        }
        kw.update(kwargs)
        writer = pd.ExcelWriter(path)
@ -597,7 +632,7 @@ class Table(object):
        """
        html_string = self.df.to_html(**kwargs)
-        with open(path, 'w') as f:
+        with open(path, "w") as f:
            f.write(html_string)
    def to_sqlite(self, path, **kwargs):
@ -611,13 +646,10 @@ class Table(object):
            Output filepath.
        """
-        kw = {
+        kw = {"if_exists": "replace", "index": False}
            'if_exists': 'replace',
            'index': False
        }
        kw.update(kwargs)
        conn = sqlite3.connect(path)
-        table_name = 'page-{}-table-{}'.format(self.page, self.order)
+        table_name = "page-{}-table-{}".format(self.page, self.order)
        self.df.to_sql(table_name, conn, **kw)
        conn.commit()
        conn.close()
@ -633,12 +665,12 @@ class TableList(object):
        Number of tables in the list.
    """
    def __init__(self, tables):
        self._tables = tables
    def __repr__(self):
-        return '<{} n={}>'.format(
+        return "<{} n={}>".format(self.__class__.__name__, self.n)
            self.__class__.__name__, self.n)
    def __len__(self):
        return len(self._tables)
@ -648,37 +680,39 @@ class TableList(object):
    @staticmethod
    def _format_func(table, f):
-        return getattr(table, 'to_{}'.format(f))
+        return getattr(table, "to_{}".format(f))
    @property
    def n(self):
        return len(self)
    def _write_file(self, f=None, **kwargs):
-        dirname = kwargs.get('dirname')
+        dirname = kwargs.get("dirname")
-        root = kwargs.get('root')
+        root = kwargs.get("root")
-        ext = kwargs.get('ext')
+        ext = kwargs.get("ext")
        for table in self._tables:
-            filename = os.path.join('{}-page-{}-table-{}{}'.format(
+            filename = os.path.join(
-                                    root, table.page, table.order, ext))
+                "{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
            )
            filepath = os.path.join(dirname, filename)
            to_format = self._format_func(table, f)
            to_format(filepath)
    def _compress_dir(self, **kwargs):
-        path = kwargs.get('path')
+        path = kwargs.get("path")
-        dirname = kwargs.get('dirname')
+        dirname = kwargs.get("dirname")
-        root = kwargs.get('root')
+        root = kwargs.get("root")
-        ext = kwargs.get('ext')
+        ext = kwargs.get("ext")
-        zipname = os.path.join(os.path.dirname(path), root) + '.zip'
+        zipname = os.path.join(os.path.dirname(path), root) + ".zip"
-        with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
+        with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
            for table in self._tables:
-                filename = os.path.join('{}-page-{}-table-{}{}'.format(
+                filename = os.path.join(
-                                        root, table.page, table.order, ext))
+                    "{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
                )
                filepath = os.path.join(dirname, filename)
                z.write(filepath, os.path.basename(filepath))
-    def export(self, path, f='csv', compress=False):
+    def export(self, path, f="csv", compress=False):
        """Exports the list of tables to specified file format.
        Parameters
@ -697,33 +731,28 @@ class TableList(object):
        if compress:
            dirname = tempfile.mkdtemp()
-        kwargs = {
+        kwargs = {"path": path, "dirname": dirname, "root": root, "ext": ext}
            'path': path,
            'dirname': dirname,
            'root': root,
            'ext': ext
        }
-        if f in ['csv', 'json', 'html']:
+        if f in ["csv", "json", "html"]:
            self._write_file(f=f, **kwargs)
            if compress:
                self._compress_dir(**kwargs)
-        elif f == 'excel':
+        elif f == "excel":
            filepath = os.path.join(dirname, basename)
            writer = pd.ExcelWriter(filepath)
            for table in self._tables:
-                sheet_name = 'page-{}-table-{}'.format(table.page, table.order)
+                sheet_name = "page-{}-table-{}".format(table.page, table.order)
-                table.df.to_excel(writer, sheet_name=sheet_name, encoding='utf-8')
+                table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
            writer.save()
            if compress:
-                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
+                zipname = os.path.join(os.path.dirname(path), root) + ".zip"
-                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
+                with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
                    z.write(filepath, os.path.basename(filepath))
-        elif f == 'sqlite':
+        elif f == "sqlite":
            filepath = os.path.join(dirname, basename)
            for table in self._tables:
                table.to_sqlite(filepath)
            if compress:
-                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
+                zipname = os.path.join(os.path.dirname(path), root) + ".zip"
-                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
+                with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
                    z.write(filepath, os.path.basename(filepath))
--- a/camelot/ext/ghostscript/init.py
+++ b/camelot/ext/ghostscript/init.py
@ -24,10 +24,10 @@ ghostscript - A Python interface for the Ghostscript interpreter C-API
 from . import _gsprint as gs
-__author__ = 'Hartmut Goebel <h.goebel@crazy-compilers.com>'
+__author__ = "Hartmut Goebel <h.goebel@crazy-compilers.com>"
-__copyright__ = 'Copyright 2010-2018 by Hartmut Goebel <h.goebel@crazy-compilers.com>'
+__copyright__ = "Copyright 2010-2018 by Hartmut Goebel <h.goebel@crazy-compilers.com>"
-__license__ = 'GNU General Public License version 3 (GPL v3)'
+__license__ = "GNU General Public License version 3 (GPL v3)"
-__version__ = '0.6'
+__version__ = "0.6"
 class __Ghostscript(object):
@ -87,10 +87,13 @@ def Ghostscript(*args, **kwargs):
    # Ghostscript only supports a single instance
    if __instance__ is None:
        __instance__ = gs.new_instance()
-    return __Ghostscript(__instance__, args,
+    return __Ghostscript(
-                         stdin=kwargs.get('stdin', None),
+        __instance__,
-                         stdout=kwargs.get('stdout', None),
+        args,
-                         stderr=kwargs.get('stderr', None))
+        stdin=kwargs.get("stdin", None),
        stdout=kwargs.get("stdout", None),
        stderr=kwargs.get("stderr", None),
    )
 __instance__ = None
--- a/camelot/ext/ghostscript/_gsprint.py
+++ b/camelot/ext/ghostscript/_gsprint.py
@ -42,10 +42,10 @@ e_Info = -110
 #
 e_Quit = -101
-__author__ = 'Hartmut Goebel <h.goebel@crazy-compilers.com>'
+__author__ = "Hartmut Goebel <h.goebel@crazy-compilers.com>"
-__copyright__ = 'Copyright 2010-2018 by Hartmut Goebel <h.goebel@crazy-compilers.com>'
+__copyright__ = "Copyright 2010-2018 by Hartmut Goebel <h.goebel@crazy-compilers.com>"
-__license__ = 'GNU General Public License version 3 (GPL v3)'
+__license__ = "GNU General Public License version 3 (GPL v3)"
-__version__ = '0.6'
+__version__ = "0.6"
 gs_main_instance = c_void_p
 display_callback = c_void_p
@ -55,7 +55,7 @@ display_callback = c_void_p
 class GhostscriptError(Exception):
    def __init__(self, ecode):
-         self.code = ecode
+        self.code = ecode
 def new_instance():
@ -89,6 +89,7 @@ def _wrap_stdin(infp):
    """Wrap a filehandle into a C function to be used as `stdin` callback
    for ``set_stdio``. The filehandle has to support the readline() method.
    """
    def _wrap(instance, dest, count):
        try:
            data = infp.readline(count)
@ -110,6 +111,7 @@ def _wrap_stdout(outfp):
    `stderr` callback for ``set_stdio``. The filehandle has to support the
    write() and flush() methods.
    """
    def _wrap(instance, str, count):
        outfp.write(str[:count])
        outfp.flush()
@ -187,11 +189,23 @@ def __win32_finddll():
        import winreg
    except ImportError:
        # assume Python 2
-        from _winreg import OpenKey, CloseKey, EnumKey, QueryValueEx, \
+        from _winreg import (
-            QueryInfoKey, HKEY_LOCAL_MACHINE
+            OpenKey,
            CloseKey,
            EnumKey,
            QueryValueEx,
            QueryInfoKey,
            HKEY_LOCAL_MACHINE,
        )
    else:
-        from winreg import OpenKey, CloseKey, EnumKey, QueryValueEx, \
+        from winreg import (
-            QueryInfoKey, HKEY_LOCAL_MACHINE
+            OpenKey,
            CloseKey,
            EnumKey,
            QueryValueEx,
            QueryInfoKey,
            HKEY_LOCAL_MACHINE,
        )
    from distutils.version import LooseVersion
    import os
@ -199,15 +213,19 @@ def __win32_finddll():
    dlls = []
    # Look up different variants of Ghostscript and take the highest
    # version for which the DLL is to be found in the filesystem.
-    for key_name in ('AFPL Ghostscript', 'Aladdin Ghostscript',
+    for key_name in (
-                     'GNU Ghostscript', 'GPL Ghostscript'):
+        "AFPL Ghostscript",
        "Aladdin Ghostscript",
        "GNU Ghostscript",
        "GPL Ghostscript",
    ):
        try:
            k1 = OpenKey(HKEY_LOCAL_MACHINE, "Software\\%s" % key_name)
            for num in range(0, QueryInfoKey(k1)[0]):
                version = EnumKey(k1, num)
                try:
                    k2 = OpenKey(k1, version)
-                    dll_path = QueryValueEx(k2, 'GS_DLL')[0]
+                    dll_path = QueryValueEx(k2, "GS_DLL")[0]
                    CloseKey(k2)
                    if os.path.exists(dll_path):
                        dlls.append((LooseVersion(version), dll_path))
@ -223,21 +241,21 @@ def __win32_finddll():
        return None
-if sys.platform == 'win32':
+if sys.platform == "win32":
    libgs = __win32_finddll()
    if not libgs:
-        raise RuntimeError('Please make sure that Ghostscript is installed')
+        raise RuntimeError("Please make sure that Ghostscript is installed")
    libgs = windll.LoadLibrary(libgs)
 else:
    try:
-        libgs = cdll.LoadLibrary('libgs.so')
+        libgs = cdll.LoadLibrary("libgs.so")
    except OSError:
        # shared object file not found
        import ctypes.util
-        libgs = ctypes.util.find_library('gs')
+        libgs = ctypes.util.find_library("gs")
        if not libgs:
-            raise RuntimeError('Please make sure that Ghostscript is installed')
+            raise RuntimeError("Please make sure that Ghostscript is installed")
        libgs = cdll.LoadLibrary(libgs)
 del __win32_finddll
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -7,8 +7,14 @@ from PyPDF2 import PdfFileReader, PdfFileWriter
 from .core import TableList
 from .parsers import Stream, Lattice
-from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
+from .utils import (
-                    get_rotation, is_url, download_url)
+    TemporaryDirectory,
    get_page_layout,
    get_text_objects,
    get_rotation,
    is_url,
    download_url,
 )
 class PDFHandler(object):
@ -27,19 +33,20 @@ class PDFHandler(object):
        Password for decryption.
    """
-    def __init__(self, filepath, pages='1', password=None):
+
    def __init__(self, filepath, pages="1", password=None):
        if is_url(filepath):
            filepath = download_url(filepath)
        self.filepath = filepath
-        if not filepath.lower().endswith('.pdf'):
+        if not filepath.lower().endswith(".pdf"):
            raise NotImplementedError("File format not supported")
        if password is None:
-            self.password = ''
+            self.password = ""
        else:
            self.password = password
            if sys.version_info[0] < 3:
-                self.password = self.password.encode('ascii')
+                self.password = self.password.encode("ascii")
        self.pages = self._get_pages(self.filepath, pages)
    def _get_pages(self, filepath, pages):
@ -60,26 +67,26 @@ class PDFHandler(object):
        """
        page_numbers = []
-        if pages == '1':
+        if pages == "1":
-            page_numbers.append({'start': 1, 'end': 1})
+            page_numbers.append({"start": 1, "end": 1})
        else:
-            infile = PdfFileReader(open(filepath, 'rb'), strict=False)
+            infile = PdfFileReader(open(filepath, "rb"), strict=False)
            if infile.isEncrypted:
                infile.decrypt(self.password)
-            if pages == 'all':
+            if pages == "all":
-                page_numbers.append({'start': 1, 'end': infile.getNumPages()})
+                page_numbers.append({"start": 1, "end": infile.getNumPages()})
            else:
-                for r in pages.split(','):
+                for r in pages.split(","):
-                    if '-' in r:
+                    if "-" in r:
-                        a, b = r.split('-')
+                        a, b = r.split("-")
-                        if b == 'end':
+                        if b == "end":
                            b = infile.getNumPages()
-                        page_numbers.append({'start': int(a), 'end': int(b)})
+                        page_numbers.append({"start": int(a), "end": int(b)})
                    else:
-                        page_numbers.append({'start': int(r), 'end': int(r)})
+                        page_numbers.append({"start": int(r), "end": int(r)})
        P = []
        for p in page_numbers:
-            P.extend(range(p['start'], p['end'] + 1))
+            P.extend(range(p["start"], p["end"] + 1))
        return sorted(set(P))
    def _save_page(self, filepath, page, temp):
@ -95,16 +102,16 @@ class PDFHandler(object):
            Tmp directory.
        """
-        with open(filepath, 'rb') as fileobj:
+        with open(filepath, "rb") as fileobj:
            infile = PdfFileReader(fileobj, strict=False)
            if infile.isEncrypted:
                infile.decrypt(self.password)
-            fpath = os.path.join(temp, 'page-{0}.pdf'.format(page))
+            fpath = os.path.join(temp, "page-{0}.pdf".format(page))
            froot, fext = os.path.splitext(fpath)
            p = infile.getPage(page - 1)
            outfile = PdfFileWriter()
            outfile.addPage(p)
-            with open(fpath, 'wb') as f:
+            with open(fpath, "wb") as f:
                outfile.write(f)
            layout, dim = get_page_layout(fpath)
            # fix rotated PDF
@ -112,23 +119,25 @@ class PDFHandler(object):
            horizontal_text = get_text_objects(layout, ltype="horizontal_text")
            vertical_text = get_text_objects(layout, ltype="vertical_text")
            rotation = get_rotation(chars, horizontal_text, vertical_text)
-            if rotation != '':
+            if rotation != "":
-                fpath_new = ''.join([froot.replace('page', 'p'), '_rotated', fext])
+                fpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
                os.rename(fpath, fpath_new)
-                infile = PdfFileReader(open(fpath_new, 'rb'), strict=False)
+                infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
                if infile.isEncrypted:
                    infile.decrypt(self.password)
                outfile = PdfFileWriter()
                p = infile.getPage(0)
-                if rotation == 'anticlockwise':
+                if rotation == "anticlockwise":
                    p.rotateClockwise(90)
-                elif rotation == 'clockwise':
+                elif rotation == "clockwise":
                    p.rotateCounterClockwise(90)
                outfile.addPage(p)
-                with open(fpath, 'wb') as f:
+                with open(fpath, "wb") as f:
                    outfile.write(f)
-    def parse(self, flavor='lattice', suppress_stdout=False, layout_kwargs={}, **kwargs):
+    def parse(
        self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
    ):
        """Extracts tables by calling parser.get_tables on all single
        page PDFs.
@ -154,11 +163,13 @@ class PDFHandler(object):
        with TemporaryDirectory() as tempdir:
            for p in self.pages:
                self._save_page(self.filepath, p, tempdir)
-            pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
+            pages = [
-                     for p in self.pages]
+                os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages
-            parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
+            ]
            parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
            for p in pages:
-                t = parser.extract_tables(p, suppress_stdout=suppress_stdout,
+                t = parser.extract_tables(
-                                          layout_kwargs=layout_kwargs)
+                    p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
                )
                tables.extend(t)
        return TableList(sorted(tables))
--- a/camelot/image_processing.py
+++ b/camelot/image_processing.py
@ -39,17 +39,23 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
    if process_background:
        threshold = cv2.adaptiveThreshold(
-            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c
-            cv2.THRESH_BINARY, blocksize, c)
+        )
    else:
        threshold = cv2.adaptiveThreshold(
-            np.invert(gray), 255,
+            np.invert(gray),
-            cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
+            255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,
            blocksize,
            c,
        )
    return img, threshold
-def find_lines(threshold, regions=None, direction='horizontal',
+def find_lines(
-               line_scale=15, iterations=0):
+    threshold, regions=None, direction="horizontal", line_scale=15, iterations=0
 ):
    """Finds horizontal and vertical lines by applying morphological
    transformations on an image.
@ -87,15 +93,14 @@ def find_lines(threshold, regions=None, direction='horizontal',
    """
    lines = []
-    if direction == 'vertical':
+    if direction == "vertical":
        size = threshold.shape[0] // line_scale
        el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
-    elif direction == 'horizontal':
+    elif direction == "horizontal":
        size = threshold.shape[1] // line_scale
        el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
    elif direction is None:
-        raise ValueError("Specify direction as either 'vertical' or"
+        raise ValueError("Specify direction as either 'vertical' or" " 'horizontal'")
                         " 'horizontal'")
    if regions is not None:
        region_mask = np.zeros(threshold.shape)
@ -110,19 +115,21 @@ def find_lines(threshold, regions=None, direction='horizontal',
    try:
        _, contours, _ = cv2.findContours(
-            threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
    except ValueError:
        # for opencv backward compatibility
        contours, _ = cv2.findContours(
-            threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
    for c in contours:
        x, y, w, h = cv2.boundingRect(c)
        x1, x2 = x, x + w
        y1, y2 = y, y + h
-        if direction == 'vertical':
+        if direction == "vertical":
            lines.append(((x1 + x2) // 2, y2, (x1 + x2) // 2, y1))
-        elif direction == 'horizontal':
+        elif direction == "horizontal":
            lines.append((x1, (y1 + y2) // 2, x2, (y1 + y2) // 2))
    return dmask, lines
@ -150,11 +157,13 @@ def find_contours(vertical, horizontal):
    try:
        __, contours, __ = cv2.findContours(
-            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
    except ValueError:
        # for opencv backward compatibility
        contours, __ = cv2.findContours(
-            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
    # sort in reverse based on contour area and use first 10 contours
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
@ -196,11 +205,13 @@ def find_joints(contours, vertical, horizontal):
        roi = joints[y : y + h, x : x + w]
        try:
            __, jc, __ = cv2.findContours(
-                roi.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+                roi.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE
            )
        except ValueError:
            # for opencv backward compatibility
            jc, __ = cv2.findContours(
-                roi.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+                roi.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE
            )
        if len(jc) <= 4:  # remove contours with less than 4 joints
            continue
        joint_coords = []
--- a/camelot/io.py
+++ b/camelot/io.py
@ -6,8 +6,15 @@ from .handlers import PDFHandler
 from .utils import validate_input, remove_extra
-def read_pdf(filepath, pages='1', password=None, flavor='lattice',
+def read_pdf(
-             suppress_stdout=False, layout_kwargs={}, **kwargs):
+    filepath,
    pages="1",
    password=None,
    flavor="lattice",
    suppress_stdout=False,
    layout_kwargs={},
    **kwargs
 ):
    """Read PDF and return extracted tables.
    Note: kwargs annotated with ^ can only be used with flavor='stream'
@ -91,9 +98,10 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
    tables : camelot.core.TableList
    """
-    if flavor not in ['lattice', 'stream']:
+    if flavor not in ["lattice", "stream"]:
-        raise NotImplementedError("Unknown flavor specified."
+        raise NotImplementedError(
-                                  " Use either 'lattice' or 'stream'")
+            "Unknown flavor specified." " Use either 'lattice' or 'stream'"
        )
    with warnings.catch_warnings():
        if suppress_stdout:
@ -102,6 +110,10 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
        validate_input(kwargs, flavor=flavor)
        p = PDFHandler(filepath, pages=pages, password=password)
        kwargs = remove_extra(kwargs, flavor=flavor)
-        tables = p.parse(flavor=flavor, suppress_stdout=suppress_stdout,
+        tables = p.parse(
-                         layout_kwargs=layout_kwargs, **kwargs)
+            flavor=flavor,
            suppress_stdout=suppress_stdout,
            layout_kwargs=layout_kwargs,
            **kwargs
        )
        return tables
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -8,13 +8,13 @@ from ..utils import get_page_layout, get_text_objects
 class BaseParser(object):
    """Defines a base parser.
    """
    def _generate_layout(self, filename, layout_kwargs):
        self.filename = filename
        self.layout_kwargs = layout_kwargs
-        self.layout, self.dimensions = get_page_layout(
+        self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)
-            filename, **layout_kwargs)
+        self.images = get_text_objects(self.layout, ltype="image")
-        self.images = get_text_objects(self.layout, ltype='image')
+        self.horizontal_text = get_text_objects(self.layout, ltype="horizontal_text")
-        self.horizontal_text = get_text_objects(self.layout, ltype='horizontal_text')
+        self.vertical_text = get_text_objects(self.layout, ltype="vertical_text")
        self.vertical_text = get_text_objects(self.layout, ltype='vertical_text')
        self.pdf_width, self.pdf_height = self.dimensions
        self.rootname, __ = os.path.splitext(self.filename)
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -14,14 +14,25 @@ import pandas as pd
 from .base import BaseParser
 from ..core import Table
-from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
+from ..utils import (
-                     merge_close_lines, get_table_index, compute_accuracy,
+    scale_image,
-                     compute_whitespace)
+    scale_pdf,
-from ..image_processing import (adaptive_threshold, find_lines,
+    segments_in_bbox,
-                                find_contours, find_joints)
+    text_in_bbox,
    merge_close_lines,
    get_table_index,
    compute_accuracy,
    compute_whitespace,
 )
 from ..image_processing import (
    adaptive_threshold,
    find_lines,
    find_contours,
    find_joints,
 )
-logger = logging.getLogger('camelot')
+logger = logging.getLogger("camelot")
 class Lattice(BaseParser):
@ -83,11 +94,26 @@ class Lattice(BaseParser):
        Resolution used for PDF to PNG conversion.
    """
-    def __init__(self, table_regions=None, table_areas=None, process_background=False,
+
-                 line_scale=15, copy_text=None, shift_text=['l', 't'],
+    def __init__(
-                 split_text=False, flag_size=False, strip_text='', line_tol=2,
+        self,
-                 joint_tol=2, threshold_blocksize=15, threshold_constant=-2,
+        table_regions=None,
-                 iterations=0, resolution=300, **kwargs):
+        table_areas=None,
        process_background=False,
        line_scale=15,
        copy_text=None,
        shift_text=["l", "t"],
        split_text=False,
        flag_size=False,
        strip_text="",
        line_tol=2,
        joint_tol=2,
        threshold_blocksize=15,
        threshold_constant=-2,
        iterations=0,
        resolution=300,
        **kwargs
    ):
        self.table_regions = table_regions
        self.table_areas = table_areas
        self.process_background = process_background
@ -130,19 +156,19 @@ class Lattice(BaseParser):
        indices = []
        for r_idx, c_idx, text in idx:
            for d in shift_text:
-                if d == 'l':
+                if d == "l":
                    if t.cells[r_idx][c_idx].hspan:
                        while not t.cells[r_idx][c_idx].left:
                            c_idx -= 1
-                if d == 'r':
+                if d == "r":
                    if t.cells[r_idx][c_idx].hspan:
                        while not t.cells[r_idx][c_idx].right:
                            c_idx += 1
-                if d == 't':
+                if d == "t":
                    if t.cells[r_idx][c_idx].vspan:
                        while not t.cells[r_idx][c_idx].top:
                            r_idx -= 1
-                if d == 'b':
+                if d == "b":
                    if t.cells[r_idx][c_idx].vspan:
                        while not t.cells[r_idx][c_idx].bottom:
                            r_idx += 1
@ -171,13 +197,13 @@ class Lattice(BaseParser):
            if f == "h":
                for i in range(len(t.cells)):
                    for j in range(len(t.cells[i])):
-                        if t.cells[i][j].text.strip() == '':
+                        if t.cells[i][j].text.strip() == "":
                            if t.cells[i][j].hspan and not t.cells[i][j].left:
                                t.cells[i][j].text = t.cells[i][j - 1].text
            elif f == "v":
                for i in range(len(t.cells)):
                    for j in range(len(t.cells[i])):
-                        if t.cells[i][j].text.strip() == '':
+                        if t.cells[i][j].text.strip() == "":
                            if t.cells[i][j].vspan and not t.cells[i][j].top:
                                t.cells[i][j].text = t.cells[i - 1][j].text
        return t
@ -185,11 +211,12 @@ class Lattice(BaseParser):
    def _generate_image(self):
        from ..ext.ghostscript import Ghostscript
-        self.imagename = ''.join([self.rootname, '.png'])
+        self.imagename = "".join([self.rootname, ".png"])
-        gs_call = '-q -sDEVICE=png16m -o {} -r300 {}'.format(
+        gs_call = "-q -sDEVICE=png16m -o {} -r300 {}".format(
-            self.imagename, self.filename)
+            self.imagename, self.filename
        )
        gs_call = gs_call.encode().split()
-        null = open(os.devnull, 'wb')
+        null = open(os.devnull, "wb")
        with Ghostscript(*gs_call, stdout=null) as gs:
            pass
        null.close()
@ -208,8 +235,11 @@ class Lattice(BaseParser):
            return scaled_areas
        self.image, self.threshold = adaptive_threshold(
-            self.imagename, process_background=self.process_background,
+            self.imagename,
-            blocksize=self.threshold_blocksize, c=self.threshold_constant)
+            process_background=self.process_background,
            blocksize=self.threshold_blocksize,
            c=self.threshold_constant,
        )
        image_width = self.image.shape[1]
        image_height = self.image.shape[0]
@ -226,21 +256,35 @@ class Lattice(BaseParser):
                regions = scale_areas(self.table_regions)
            vertical_mask, vertical_segments = find_lines(
-                self.threshold, regions=regions, direction='vertical',
+                self.threshold,
-                line_scale=self.line_scale, iterations=self.iterations)
+                regions=regions,
                direction="vertical",
                line_scale=self.line_scale,
                iterations=self.iterations,
            )
            horizontal_mask, horizontal_segments = find_lines(
-                self.threshold, regions=regions, direction='horizontal',
+                self.threshold,
-                line_scale=self.line_scale, iterations=self.iterations)
+                regions=regions,
                direction="horizontal",
                line_scale=self.line_scale,
                iterations=self.iterations,
            )
            contours = find_contours(vertical_mask, horizontal_mask)
            table_bbox = find_joints(contours, vertical_mask, horizontal_mask)
        else:
            vertical_mask, vertical_segments = find_lines(
-                self.threshold, direction='vertical', line_scale=self.line_scale,
+                self.threshold,
-                iterations=self.iterations)
+                direction="vertical",
                line_scale=self.line_scale,
                iterations=self.iterations,
            )
            horizontal_mask, horizontal_segments = find_lines(
-                self.threshold, direction='horizontal', line_scale=self.line_scale,
+                self.threshold,
-                iterations=self.iterations)
+                direction="horizontal",
                line_scale=self.line_scale,
                iterations=self.iterations,
            )
            areas = scale_areas(self.table_areas)
            table_bbox = find_joints(areas, vertical_mask, horizontal_mask)
@ -248,18 +292,20 @@ class Lattice(BaseParser):
        self.table_bbox_unscaled = copy.deepcopy(table_bbox)
        self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
-            table_bbox, vertical_segments, horizontal_segments, pdf_scalers)
+            table_bbox, vertical_segments, horizontal_segments, pdf_scalers
        )
    def _generate_columns_and_rows(self, table_idx, tk):
        # select elements which lie within table_bbox
        t_bbox = {}
        v_s, h_s = segments_in_bbox(
-            tk, self.vertical_segments, self.horizontal_segments)
+            tk, self.vertical_segments, self.horizontal_segments
-        t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
+        )
-        t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
+        t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
        t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
-        t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
+        t_bbox["horizontal"].sort(key=lambda x: (-x.y0, x.x0))
-        t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
+        t_bbox["vertical"].sort(key=lambda x: (x.x0, -x.y0))
        self.t_bbox = t_bbox
@ -268,23 +314,19 @@ class Lattice(BaseParser):
        cols.extend([tk[0], tk[2]])
        rows.extend([tk[1], tk[3]])
        # sort horizontal and vertical segments
-        cols = merge_close_lines(
+        cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
-            sorted(cols), line_tol=self.line_tol)
+        rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
        rows = merge_close_lines(
            sorted(rows, reverse=True), line_tol=self.line_tol)
        # make grid using x and y coord of shortlisted rows and cols
-        cols = [(cols[i], cols[i + 1])
+        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
-                for i in range(0, len(cols) - 1)]
+        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
        rows = [(rows[i], rows[i + 1])
                for i in range(0, len(rows) - 1)]
        return cols, rows, v_s, h_s
    def _generate_table(self, table_idx, cols, rows, **kwargs):
-        v_s = kwargs.get('v_s')
+        v_s = kwargs.get("v_s")
-        h_s = kwargs.get('h_s')
+        h_s = kwargs.get("h_s")
        if v_s is None or h_s is None:
-            raise ValueError('No segments found on {}'.format(self.rootname))
+            raise ValueError("No segments found on {}".format(self.rootname))
        table = Table(cols, rows)
        # set table edges to True using ver+hor lines
@ -297,14 +339,21 @@ class Lattice(BaseParser):
        pos_errors = []
        # TODO: have a single list in place of two directional ones?
        # sorted on x-coordinate based on reading order i.e. LTR or RTL
-        for direction in ['vertical', 'horizontal']:
+        for direction in ["vertical", "horizontal"]:
            for t in self.t_bbox[direction]:
                indices, error = get_table_index(
-                    table, t, direction, split_text=self.split_text,
+                    table,
-                    flag_size=self.flag_size, strip_text=self.strip_text)
+                    t,
                    direction,
                    split_text=self.split_text,
                    flag_size=self.flag_size,
                    strip_text=self.strip_text,
                )
                if indices[:2] != (-1, -1):
                    pos_errors.append(error)
-                    indices = Lattice._reduce_index(table, indices, shift_text=self.shift_text)
+                    indices = Lattice._reduce_index(
                        table, indices, shift_text=self.shift_text
                    )
                    for r_idx, c_idx, text in indices:
                        table.cells[r_idx][c_idx].text = text
        accuracy = compute_accuracy([[100, pos_errors]])
@ -317,11 +366,11 @@ class Lattice(BaseParser):
        table.shape = table.df.shape
        whitespace = compute_whitespace(data)
-        table.flavor = 'lattice'
+        table.flavor = "lattice"
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
-        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
+        table.page = int(os.path.basename(self.rootname).replace("page-", ""))
        # for plotting
        _text = []
@ -337,15 +386,18 @@ class Lattice(BaseParser):
    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
        self._generate_layout(filename, layout_kwargs)
        if not suppress_stdout:
-            logger.info('Processing {}'.format(os.path.basename(self.rootname)))
+            logger.info("Processing {}".format(os.path.basename(self.rootname)))
        if not self.horizontal_text:
            if self.images:
-                warnings.warn('{} is image-based, camelot only works on'
+                warnings.warn(
-                              ' text-based pages.'.format(os.path.basename(self.rootname)))
+                    "{} is image-based, camelot only works on"
                    " text-based pages.".format(os.path.basename(self.rootname))
                )
            else:
-                warnings.warn('No tables found on {}'.format(
+                warnings.warn(
-                    os.path.basename(self.rootname)))
+                    "No tables found on {}".format(os.path.basename(self.rootname))
                )
            return []
        self._generate_image()
@ -353,8 +405,9 @@ class Lattice(BaseParser):
        _tables = []
        # sort tables based on y-coord
-        for table_idx, tk in enumerate(sorted(
+        for table_idx, tk in enumerate(
-                self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
+            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
        ):
            cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
            table._bbox = tk
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -10,11 +10,10 @@ import pandas as pd
 from .base import BaseParser
 from ..core import TextEdges, Table
-from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
+from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
                     compute_whitespace)
-logger = logging.getLogger('camelot')
+logger = logging.getLogger("camelot")
 class Stream(BaseParser):
@ -55,9 +54,20 @@ class Stream(BaseParser):
        to generate columns.
    """
-    def __init__(self, table_regions=None, table_areas=None, columns=None, split_text=False,
+
-                 flag_size=False, strip_text='', edge_tol=50, row_tol=2,
+    def __init__(
-                 column_tol=0, **kwargs):
+        self,
        table_regions=None,
        table_areas=None,
        columns=None,
        split_text=False,
        flag_size=False,
        strip_text="",
        edge_tol=50,
        row_tol=2,
        column_tol=0,
        **kwargs
    ):
        self.table_regions = table_regions
        self.table_areas = table_areas
        self.columns = columns
@ -150,8 +160,9 @@ class Stream(BaseParser):
            else:
                lower = merged[-1]
                if column_tol >= 0:
-                    if (higher[0] <= lower[1] or
+                    if higher[0] <= lower[1] or np.isclose(
-                            np.isclose(higher[0], lower[1], atol=column_tol)):
+                        higher[0], lower[1], atol=column_tol
                    ):
                        upper_bound = max(lower[1], higher[1])
                        lower_bound = min(lower[0], higher[0])
                        merged[-1] = (lower_bound, upper_bound)
@ -186,13 +197,14 @@ class Stream(BaseParser):
            List of continuous row y-coordinate tuples.
        """
-        row_mids = [sum([(t.y0 + t.y1) / 2 for t in r]) / len(r)
+        row_mids = [
-                    if len(r) > 0 else 0 for r in rows_grouped]
+            sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) if len(r) > 0 else 0
            for r in rows_grouped
        ]
        rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
        rows.insert(0, text_y_max)
        rows.append(text_y_min)
-        rows = [(rows[i], rows[i + 1])
+        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
                for i in range(0, len(rows) - 1)]
        return rows
    @staticmethod
@ -217,8 +229,9 @@ class Stream(BaseParser):
        if text:
            text = Stream._group_rows(text, row_tol=row_tol)
            elements = [len(r) for r in text]
-            new_cols = [(t.x0, t.x1)
+            new_cols = [
-                        for r in text if len(r) == max(elements) for t in r]
+                (t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
            ]
            cols.extend(Stream._merge_columns(sorted(new_cols)))
        return cols
@ -243,15 +256,13 @@ class Stream(BaseParser):
        cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
        cols.insert(0, text_x_min)
        cols.append(text_x_max)
-        cols = [(cols[i], cols[i + 1])
+        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
                for i in range(0, len(cols) - 1)]
        return cols
    def _validate_columns(self):
        if self.table_areas is not None and self.columns is not None:
            if len(self.table_areas) != len(self.columns):
-                raise ValueError("Length of table_areas and columns"
+                raise ValueError("Length of table_areas and columns" " should be equal")
                                 " should be equal")
    def _nurminen_table_detection(self, textlines):
        """A general implementation of the table detection algorithm
@ -309,16 +320,16 @@ class Stream(BaseParser):
    def _generate_columns_and_rows(self, table_idx, tk):
        # select elements which lie within table_bbox
        t_bbox = {}
-        t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text)
+        t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
-        t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text)
+        t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
-        t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0))
+        t_bbox["horizontal"].sort(key=lambda x: (-x.y0, x.x0))
-        t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0))
+        t_bbox["vertical"].sort(key=lambda x: (x.x0, -x.y0))
        self.t_bbox = t_bbox
        text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
-        rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_tol=self.row_tol)
+        rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
        elements = [len(r) for r in rows_grouped]
@ -327,7 +338,7 @@ class Stream(BaseParser):
            # take (0, pdf_width) by default
            # similar to else condition
            # len can't be 1
-            cols = self.columns[table_idx].split(',')
+            cols = self.columns[table_idx].split(",")
            cols = [float(c) for c in cols]
            cols.insert(0, text_x_min)
            cols.append(text_x_max)
@ -346,20 +357,29 @@ class Stream(BaseParser):
                if len(elements):
                    ncols = max(set(elements), key=elements.count)
                else:
-                    warnings.warn("No tables found in table area {}".format(
+                    warnings.warn(
-                        table_idx + 1))
+                        "No tables found in table area {}".format(table_idx + 1)
                    )
            cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
            cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
            inner_text = []
            for i in range(1, len(cols)):
                left = cols[i - 1][1]
                right = cols[i][0]
-                inner_text.extend([t for direction in self.t_bbox
+                inner_text.extend(
-                                     for t in self.t_bbox[direction]
+                    [
-                                     if t.x0 > left and t.x1 < right])
+                        t
-            outer_text = [t for direction in self.t_bbox
+                        for direction in self.t_bbox
-                            for t in self.t_bbox[direction]
+                        for t in self.t_bbox[direction]
-                            if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
+                        if t.x0 > left and t.x1 < right
                    ]
                )
            outer_text = [
                t
                for direction in self.t_bbox
                for t in self.t_bbox[direction]
                if t.x0 > cols[-1][1] or t.x1 < cols[0][0]
            ]
            inner_text.extend(outer_text)
            cols = self._add_columns(cols, inner_text, self.row_tol)
            cols = self._join_columns(cols, text_x_min, text_x_max)
@ -373,11 +393,16 @@ class Stream(BaseParser):
        pos_errors = []
        # TODO: have a single list in place of two directional ones?
        # sorted on x-coordinate based on reading order i.e. LTR or RTL
-        for direction in ['vertical', 'horizontal']:
+        for direction in ["vertical", "horizontal"]:
            for t in self.t_bbox[direction]:
                indices, error = get_table_index(
-                    table, t, direction, split_text=self.split_text,
+                    table,
-                    flag_size=self.flag_size, strip_text=self.strip_text)
+                    t,
                    direction,
                    split_text=self.split_text,
                    flag_size=self.flag_size,
                    strip_text=self.strip_text,
                )
                if indices[:2] != (-1, -1):
                    pos_errors.append(error)
                    for r_idx, c_idx, text in indices:
@ -389,11 +414,11 @@ class Stream(BaseParser):
        table.shape = table.df.shape
        whitespace = compute_whitespace(data)
-        table.flavor = 'stream'
+        table.flavor = "stream"
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
-        table.page = int(os.path.basename(self.rootname).replace('page-', ''))
+        table.page = int(os.path.basename(self.rootname).replace("page-", ""))
        # for plotting
        _text = []
@ -409,23 +434,27 @@ class Stream(BaseParser):
    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
        self._generate_layout(filename, layout_kwargs)
        if not suppress_stdout:
-            logger.info('Processing {}'.format(os.path.basename(self.rootname)))
+            logger.info("Processing {}".format(os.path.basename(self.rootname)))
        if not self.horizontal_text:
            if self.images:
-                warnings.warn('{} is image-based, camelot only works on'
+                warnings.warn(
-                              ' text-based pages.'.format(os.path.basename(self.rootname)))
+                    "{} is image-based, camelot only works on"
                    " text-based pages.".format(os.path.basename(self.rootname))
                )
            else:
-                warnings.warn('No tables found on {}'.format(
+                warnings.warn(
-                    os.path.basename(self.rootname)))
+                    "No tables found on {}".format(os.path.basename(self.rootname))
                )
            return []
        self._generate_table_bbox()
        _tables = []
        # sort tables based on y-coord
-        for table_idx, tk in enumerate(sorted(
+        for table_idx, tk in enumerate(
-                self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
+            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
        ):
            cols, rows = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows)
            table._bbox = tk
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -10,7 +10,7 @@ else:
 class PlotMethods(object):
-    def __call__(self, table, kind='text', filename=None):
+    def __call__(self, table, kind="text", filename=None):
        """Plot elements found on PDF page based on kind
        specified, useful for debugging and playing with different
        parameters to get the best output.
@ -31,14 +31,16 @@ class PlotMethods(object):
        """
        if not _HAS_MPL:
-            raise ImportError('matplotlib is required for plotting.')
+            raise ImportError("matplotlib is required for plotting.")
-        if table.flavor == 'lattice' and kind in ['textedge']:
+        if table.flavor == "lattice" and kind in ["textedge"]:
-            raise NotImplementedError("Lattice flavor does not support kind='{}'".format(
+            raise NotImplementedError(
-                                      kind))
+                "Lattice flavor does not support kind='{}'".format(kind)
-        elif table.flavor == 'stream' and kind in ['joint', 'line']:
+            )
-            raise NotImplementedError("Stream flavor does not support kind='{}'".format(
+        elif table.flavor == "stream" and kind in ["joint", "line"]:
-                                      kind))
+            raise NotImplementedError(
                "Stream flavor does not support kind='{}'".format(kind)
            )
        plot_method = getattr(self, kind)
        return plot_method(table)
@ -57,18 +59,12 @@ class PlotMethods(object):
        """
        fig = plt.figure()
-        ax = fig.add_subplot(111, aspect='equal')
+        ax = fig.add_subplot(111, aspect="equal")
        xs, ys = [], []
        for t in table._text:
            xs.extend([t[0], t[2]])
            ys.extend([t[1], t[3]])
-            ax.add_patch(
+            ax.add_patch(patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1]))
                patches.Rectangle(
                    (t[0], t[1]),
                    t[2] - t[0],
                    t[3] - t[1]
                )
            )
        ax.set_xlim(min(xs) - 10, max(xs) + 10)
        ax.set_ylim(min(ys) - 10, max(ys) + 10)
        return fig
@ -87,21 +83,17 @@ class PlotMethods(object):
        """
        fig = plt.figure()
-        ax = fig.add_subplot(111, aspect='equal')
+        ax = fig.add_subplot(111, aspect="equal")
        for row in table.cells:
            for cell in row:
                if cell.left:
-                    ax.plot([cell.lb[0], cell.lt[0]],
+                    ax.plot([cell.lb[0], cell.lt[0]], [cell.lb[1], cell.lt[1]])
                            [cell.lb[1], cell.lt[1]])
                if cell.right:
-                    ax.plot([cell.rb[0], cell.rt[0]],
+                    ax.plot([cell.rb[0], cell.rt[0]], [cell.rb[1], cell.rt[1]])
                            [cell.rb[1], cell.rt[1]])
                if cell.top:
-                    ax.plot([cell.lt[0], cell.rt[0]],
+                    ax.plot([cell.lt[0], cell.rt[0]], [cell.lt[1], cell.rt[1]])
                            [cell.lt[1], cell.rt[1]])
                if cell.bottom:
-                    ax.plot([cell.lb[0], cell.rb[0]],
+                    ax.plot([cell.lb[0], cell.rb[0]], [cell.lb[1], cell.rb[1]])
                            [cell.lb[1], cell.rb[1]])
        return fig
    def contour(self, table):
@ -124,7 +116,7 @@ class PlotMethods(object):
            img, table_bbox = (None, {table._bbox: None})
            _FOR_LATTICE = False
        fig = plt.figure()
-        ax = fig.add_subplot(111, aspect='equal')
+        ax = fig.add_subplot(111, aspect="equal")
        xs, ys = [], []
        if not _FOR_LATTICE:
@ -133,21 +125,14 @@ class PlotMethods(object):
                ys.extend([t[1], t[3]])
                ax.add_patch(
                    patches.Rectangle(
-                        (t[0], t[1]),
+                        (t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue"
                        t[2] - t[0],
                        t[3] - t[1],
                        color='blue'
                    )
                )
        for t in table_bbox.keys():
            ax.add_patch(
                patches.Rectangle(
-                    (t[0], t[1]),
+                    (t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red"
                    t[2] - t[0],
                    t[3] - t[1],
                    fill=False,
                    color='red'
                )
            )
            if not _FOR_LATTICE:
@ -173,25 +158,19 @@ class PlotMethods(object):
        """
        fig = plt.figure()
-        ax = fig.add_subplot(111, aspect='equal')
+        ax = fig.add_subplot(111, aspect="equal")
        xs, ys = [], []
        for t in table._text:
            xs.extend([t[0], t[2]])
            ys.extend([t[1], t[3]])
            ax.add_patch(
-                patches.Rectangle(
+                patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue")
                    (t[0], t[1]),
                    t[2] - t[0],
                    t[3] - t[1],
                    color='blue'
                )
            )
        ax.set_xlim(min(xs) - 10, max(xs) + 10)
        ax.set_ylim(min(ys) - 10, max(ys) + 10)
        for te in table._textedges:
-            ax.plot([te.x, te.x],
+            ax.plot([te.x, te.x], [te.y0, te.y1])
                    [te.y0, te.y1])
        return fig
@ -210,14 +189,14 @@ class PlotMethods(object):
        """
        img, table_bbox = table._image
        fig = plt.figure()
-        ax = fig.add_subplot(111, aspect='equal')
+        ax = fig.add_subplot(111, aspect="equal")
        x_coord = []
        y_coord = []
        for k in table_bbox.keys():
            for coord in table_bbox[k]:
                x_coord.append(coord[0])
                y_coord.append(coord[1])
-        ax.plot(x_coord, y_coord, 'ro')
+        ax.plot(x_coord, y_coord, "ro")
        ax.imshow(img)
        return fig
@ -235,7 +214,7 @@ class PlotMethods(object):
        """
        fig = plt.figure()
-        ax = fig.add_subplot(111, aspect='equal')
+        ax = fig.add_subplot(111, aspect="equal")
        vertical, horizontal = table._segments
        for v in vertical:
            ax.plot([v[0], v[2]], [v[1], v[3]])
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -19,8 +19,14 @@ from pdfminer.pdfpage import PDFTextExtractionNotAllowed
 from pdfminer.pdfinterp import PDFResourceManager
 from pdfminer.pdfinterp import PDFPageInterpreter
 from pdfminer.converter import PDFPageAggregator
-from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
+from pdfminer.layout import (
-                             LTTextLineVertical, LTImage)
+    LAParams,
    LTAnno,
    LTChar,
    LTTextLineHorizontal,
    LTTextLineVertical,
    LTImage,
 )
 PY3 = sys.version_info[0] >= 3
@ -35,7 +41,7 @@ else:
 _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
-_VALID_URLS.discard('')
+_VALID_URLS.discard("")
 # https://github.com/pandas-dev/pandas/blob/master/pandas/io/common.py
@ -59,9 +65,11 @@ def is_url(url):
 def random_string(length):
-    ret = ''
+    ret = ""
    while length:
-        ret += random.choice(string.digits + string.ascii_lowercase + string.ascii_uppercase)
+        ret += random.choice(
            string.digits + string.ascii_lowercase + string.ascii_uppercase
        )
        length -= 1
    return ret
@ -79,14 +87,14 @@ def download_url(url):
        Temporary filepath.
    """
-    filename = '{}.pdf'.format(random_string(6))
+    filename = "{}.pdf".format(random_string(6))
-    with tempfile.NamedTemporaryFile('wb', delete=False) as f:
+    with tempfile.NamedTemporaryFile("wb", delete=False) as f:
        obj = urlopen(url)
        if PY3:
            content_type = obj.info().get_content_type()
        else:
-            content_type = obj.info().getheader('Content-Type')
+            content_type = obj.info().getheader("Content-Type")
-        if content_type != 'application/pdf':
+        if content_type != "application/pdf":
            raise NotImplementedError("File format not supported")
        f.write(obj.read())
    filepath = os.path.join(os.path.dirname(f.name), filename)
@ -94,39 +102,38 @@ def download_url(url):
    return filepath
-stream_kwargs = [
+stream_kwargs = ["columns", "row_tol", "column_tol"]
    'columns',
    'row_tol',
    'column_tol'
 ]
 lattice_kwargs = [
-    'process_background',
+    "process_background",
-    'line_scale',
+    "line_scale",
-    'copy_text',
+    "copy_text",
-    'shift_text',
+    "shift_text",
-    'line_tol',
+    "line_tol",
-    'joint_tol',
+    "joint_tol",
-    'threshold_blocksize',
+    "threshold_blocksize",
-    'threshold_constant',
+    "threshold_constant",
-    'iterations'
+    "iterations",
 ]
-def validate_input(kwargs, flavor='lattice'):
+def validate_input(kwargs, flavor="lattice"):
    def check_intersection(parser_kwargs, input_kwargs):
        isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
        if isec:
-            raise ValueError("{} cannot be used with flavor='{}'".format(
+            raise ValueError(
-                             ",".join(sorted(isec)), flavor))
+                "{} cannot be used with flavor='{}'".format(
                    ",".join(sorted(isec)), flavor
                )
            )
-    if flavor == 'lattice':
+    if flavor == "lattice":
        check_intersection(stream_kwargs, kwargs)
    else:
        check_intersection(lattice_kwargs, kwargs)
-def remove_extra(kwargs, flavor='lattice'):
+def remove_extra(kwargs, flavor="lattice"):
-    if flavor == 'lattice':
+    if flavor == "lattice":
        for key in kwargs.keys():
            if key in stream_kwargs:
                kwargs.pop(key)
@ -256,15 +263,19 @@ def scale_image(tables, v_segments, h_segments, factors):
    v_segments_new = []
    for v in v_segments:
        x1, x2 = scale(v[0], scaling_factor_x), scale(v[2], scaling_factor_x)
-        y1, y2 = scale(abs(translate(-img_y, v[1])), scaling_factor_y), scale(
+        y1, y2 = (
-            abs(translate(-img_y, v[3])), scaling_factor_y)
+            scale(abs(translate(-img_y, v[1])), scaling_factor_y),
            scale(abs(translate(-img_y, v[3])), scaling_factor_y),
        )
        v_segments_new.append((x1, y1, x2, y2))
    h_segments_new = []
    for h in h_segments:
        x1, x2 = scale(h[0], scaling_factor_x), scale(h[2], scaling_factor_x)
-        y1, y2 = scale(abs(translate(-img_y, h[1])), scaling_factor_y), scale(
+        y1, y2 = (
-            abs(translate(-img_y, h[3])), scaling_factor_y)
+            scale(abs(translate(-img_y, h[1])), scaling_factor_y),
            scale(abs(translate(-img_y, h[3])), scaling_factor_y),
        )
        h_segments_new.append((x1, y1, x2, y2))
    return tables_new, v_segments_new, h_segments_new
@ -291,13 +302,13 @@ def get_rotation(chars, horizontal_text, vertical_text):
        rotated 90 degree clockwise.
    """
-    rotation = ''
+    rotation = ""
    hlen = len([t for t in horizontal_text if t.get_text().strip()])
    vlen = len([t for t in vertical_text if t.get_text().strip()])
    if hlen < vlen:
        clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
        anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
-        rotation = 'anticlockwise' if clockwise < anticlockwise else 'clockwise'
+        rotation = "anticlockwise" if clockwise < anticlockwise else "clockwise"
    return rotation
@ -325,10 +336,16 @@ def segments_in_bbox(bbox, v_segments, h_segments):
    """
    lb = (bbox[0], bbox[1])
    rt = (bbox[2], bbox[3])
-    v_s = [v for v in v_segments if v[1] > lb[1] - 2 and
+    v_s = [
-           v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2]
+        v
-    h_s = [h for h in h_segments if h[0] > lb[0] - 2 and
+        for v in v_segments
-           h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2]
+        if v[1] > lb[1] - 2 and v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2
    ]
    h_s = [
        h
        for h in h_segments
        if h[0] > lb[0] - 2 and h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2
    ]
    return v_s, h_s
@ -351,9 +368,12 @@ def text_in_bbox(bbox, text):
    """
    lb = (bbox[0], bbox[1])
    rt = (bbox[2], bbox[3])
-    t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
+    t_bbox = [
-              <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
+        t
-              <= rt[1] + 2]
+        for t in text
        if lb[0] - 2 <= (t.x0 + t.x1) / 2.0 <= rt[0] + 2
        and lb[1] - 2 <= (t.y0 + t.y1) / 2.0 <= rt[1] + 2
    ]
    return t_bbox
@ -390,7 +410,7 @@ def merge_close_lines(ar, line_tol=2):
 # (inspired from sklearn.pipeline.Pipeline)
-def flag_font_size(textline, direction, strip_text=''):
+def flag_font_size(textline, direction, strip_text=""):
    """Flags super/subscripts in text by enclosing them with <s></s>.
    May give false positives.
@ -409,10 +429,18 @@ def flag_font_size(textline, direction, strip_text=''):
    fstring : string
    """
-    if direction == 'horizontal':
+    if direction == "horizontal":
-        d = [(t.get_text(), np.round(t.height, decimals=6)) for t in textline if not isinstance(t, LTAnno)]
+        d = [
-    elif direction == 'vertical':
+            (t.get_text(), np.round(t.height, decimals=6))
-        d = [(t.get_text(), np.round(t.width, decimals=6)) for t in textline if not isinstance(t, LTAnno)]
+            for t in textline
            if not isinstance(t, LTAnno)
        ]
    elif direction == "vertical":
        d = [
            (t.get_text(), np.round(t.width, decimals=6))
            for t in textline
            if not isinstance(t, LTAnno)
        ]
    l = [np.round(size, decimals=6) for text, size in d]
    if len(set(l)) > 1:
        flist = []
@ -420,21 +448,21 @@ def flag_font_size(textline, direction, strip_text=''):
        for key, chars in groupby(d, itemgetter(1)):
            if key == min_size:
                fchars = [t[0] for t in chars]
-                if ''.join(fchars).strip():
+                if "".join(fchars).strip():
-                    fchars.insert(0, '<s>')
+                    fchars.insert(0, "<s>")
-                    fchars.append('</s>')
+                    fchars.append("</s>")
-                    flist.append(''.join(fchars))
+                    flist.append("".join(fchars))
            else:
                fchars = [t[0] for t in chars]
-                if ''.join(fchars).strip():
+                if "".join(fchars).strip():
-                    flist.append(''.join(fchars))
+                    flist.append("".join(fchars))
-        fstring = ''.join(flist).strip(strip_text)
+        fstring = "".join(flist).strip(strip_text)
    else:
-        fstring = ''.join([t.get_text() for t in textline]).strip(strip_text)
+        fstring = "".join([t.get_text() for t in textline]).strip(strip_text)
    return fstring
-def split_textline(table, textline, direction, flag_size=False, strip_text=''):
+def split_textline(table, textline, direction, flag_size=False, strip_text=""):
    """Splits PDFMiner LTTextLine into substrings if it spans across
    multiple rows/columns.
@ -464,19 +492,31 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=''):
    cut_text = []
    bbox = textline.bbox
    try:
-        if direction == 'horizontal' and not textline.is_empty():
+        if direction == "horizontal" and not textline.is_empty():
-            x_overlap = [i for i, x in enumerate(table.cols) if x[0] <= bbox[2] and bbox[0] <= x[1]]
+            x_overlap = [
-            r_idx = [j for j, r in enumerate(table.rows) if r[1] <= (bbox[1] + bbox[3]) / 2 <= r[0]]
+                i
                for i, x in enumerate(table.cols)
                if x[0] <= bbox[2] and bbox[0] <= x[1]
            ]
            r_idx = [
                j
                for j, r in enumerate(table.rows)
                if r[1] <= (bbox[1] + bbox[3]) / 2 <= r[0]
            ]
            r = r_idx[0]
-            x_cuts = [(c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right]
+            x_cuts = [
                (c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right
            ]
            if not x_cuts:
                x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
            for obj in textline._objs:
                row = table.rows[r]
                for cut in x_cuts:
                    if isinstance(obj, LTChar):
-                        if (row[1] <= (obj.y0 + obj.y1) / 2 <= row[0] and
+                        if (
-                                (obj.x0 + obj.x1) / 2 <= cut[1]):
+                            row[1] <= (obj.y0 + obj.y1) / 2 <= row[0]
                            and (obj.x0 + obj.x1) / 2 <= cut[1]
                        ):
                            cut_text.append((r, cut[0], obj))
                            break
                        else:
@ -485,19 +525,31 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=''):
                                cut_text.append((r, cut[0] + 1, obj))
                    elif isinstance(obj, LTAnno):
                        cut_text.append((r, cut[0], obj))
-        elif direction == 'vertical' and not textline.is_empty():
+        elif direction == "vertical" and not textline.is_empty():
-            y_overlap = [j for j, y in enumerate(table.rows) if y[1] <= bbox[3] and bbox[1] <= y[0]]
+            y_overlap = [
-            c_idx = [i for i, c in enumerate(table.cols) if c[0] <= (bbox[0] + bbox[2]) / 2 <= c[1]]
+                j
                for j, y in enumerate(table.rows)
                if y[1] <= bbox[3] and bbox[1] <= y[0]
            ]
            c_idx = [
                i
                for i, c in enumerate(table.cols)
                if c[0] <= (bbox[0] + bbox[2]) / 2 <= c[1]
            ]
            c = c_idx[0]
-            y_cuts = [(r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom]
+            y_cuts = [
                (r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom
            ]
            if not y_cuts:
                y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
            for obj in textline._objs:
                col = table.cols[c]
                for cut in y_cuts:
                    if isinstance(obj, LTChar):
-                        if (col[0] <= (obj.x0 + obj.x1) / 2 <= col[1] and
+                        if (
-                                (obj.y0 + obj.y1) / 2 >= cut[1]):
+                            col[0] <= (obj.x0 + obj.x1) / 2 <= col[1]
                            and (obj.y0 + obj.y1) / 2 >= cut[1]
                        ):
                            cut_text.append((cut[0], c, obj))
                            break
                        else:
@ -511,15 +563,24 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=''):
    grouped_chars = []
    for key, chars in groupby(cut_text, itemgetter(0, 1)):
        if flag_size:
-            grouped_chars.append((key[0], key[1],
+            grouped_chars.append(
-                flag_font_size([t[2] for t in chars], direction, strip_text=strip_text)))
+                (
                    key[0],
                    key[1],
                    flag_font_size(
                        [t[2] for t in chars], direction, strip_text=strip_text
                    ),
                )
            )
        else:
            gchars = [t[2].get_text() for t in chars]
-            grouped_chars.append((key[0], key[1], ''.join(gchars).strip(strip_text)))
+            grouped_chars.append((key[0], key[1], "".join(gchars).strip(strip_text)))
    return grouped_chars
-def get_table_index(table, t, direction, split_text=False, flag_size=False, strip_text='',):
+def get_table_index(
    table, t, direction, split_text=False, flag_size=False, strip_text=""
 ):
    """Gets indices of the table cell where given text object lies by
    comparing their y and x-coordinates.
@ -558,8 +619,9 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False, stri
    """
    r_idx, c_idx = [-1] * 2
    for r in range(len(table.rows)):
-        if ((t.y0 + t.y1) / 2.0 < table.rows[r][0] and
+        if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and (t.y0 + t.y1) / 2.0 > table.rows[
-                (t.y0 + t.y1) / 2.0 > table.rows[r][1]):
+            r
        ][1]:
            lt_col_overlap = []
            for c in table.cols:
                if c[0] <= t.x1 and c[1] >= t.x0:
@ -569,11 +631,14 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False, stri
                else:
                    lt_col_overlap.append(-1)
            if len(list(filter(lambda x: x != -1, lt_col_overlap))) == 0:
-                text = t.get_text().strip('\n')
+                text = t.get_text().strip("\n")
                text_range = (t.x0, t.x1)
                col_range = (table.cols[0][0], table.cols[-1][1])
-                warnings.warn("{} {} does not lie in column range {}".format(
+                warnings.warn(
-                    text, text_range, col_range))
+                    "{} {} does not lie in column range {}".format(
                        text, text_range, col_range
                    )
                )
            r_idx = r
            c_idx = lt_col_overlap.index(max(lt_col_overlap))
            break
@ -594,10 +659,24 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False, stri
    error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
    if split_text:
-        return split_textline(table, t, direction, flag_size=flag_size, strip_text=strip_text), error
+        return (
            split_textline(
                table, t, direction, flag_size=flag_size, strip_text=strip_text
            ),
            error,
        )
    else:
        if flag_size:
-            return [(r_idx, c_idx, flag_font_size(t._objs, direction, strip_text=strip_text))], error
+            return (
                [
                    (
                        r_idx,
                        c_idx,
                        flag_font_size(t._objs, direction, strip_text=strip_text),
                    )
                ],
                error,
            )
        else:
            return [(r_idx, c_idx, t.get_text().strip(strip_text))], error
@ -650,14 +729,20 @@ def compute_whitespace(d):
    r_nempty_cells, c_nempty_cells = [], []
    for i in d:
        for j in i:
-            if j.strip() == '':
+            if j.strip() == "":
                whitespace += 1
    whitespace = 100 * (whitespace / float(len(d) * len(d[0])))
    return whitespace
-def get_page_layout(filename, char_margin=1.0, line_margin=0.5, word_margin=0.1,
+def get_page_layout(
-                    detect_vertical=True, all_texts=True):
+    filename,
    char_margin=1.0,
    line_margin=0.5,
    word_margin=0.1,
    detect_vertical=True,
    all_texts=True,
 ):
    """Returns a PDFMiner LTPage object and page dimension of a single
    page pdf. See https://euske.github.io/pdfminer/ to get definitions
    of kwargs.
@ -680,16 +765,18 @@ def get_page_layout(filename, char_margin=1.0, line_margin=0.5, word_margin=0.1,
        Dimension of pdf page in the form (width, height).
    """
-    with open(filename, 'rb') as f:
+    with open(filename, "rb") as f:
        parser = PDFParser(f)
        document = PDFDocument(parser)
        if not document.is_extractable:
            raise PDFTextExtractionNotAllowed
-        laparams = LAParams(char_margin=char_margin,
+        laparams = LAParams(
-                            line_margin=line_margin,
+            char_margin=char_margin,
-                            word_margin=word_margin,
+            line_margin=line_margin,
-                            detect_vertical=detect_vertical,
+            word_margin=word_margin,
-                            all_texts=all_texts)
+            detect_vertical=detect_vertical,
            all_texts=all_texts,
        )
        rsrcmgr = PDFResourceManager()
        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
        interpreter = PDFPageInterpreter(rsrcmgr, device)
@ -721,13 +808,13 @@ def get_text_objects(layout, ltype="char", t=None):
        List of PDFMiner text objects.
    """
-    if ltype == 'char':
+    if ltype == "char":
        LTObject = LTChar
-    elif ltype == 'image':
+    elif ltype == "image":
        LTObject = LTImage
-    elif ltype == 'horizontal_text':
+    elif ltype == "horizontal_text":
        LTObject = LTTextLineHorizontal
-    elif ltype == 'vertical_text':
+    elif ltype == "vertical_text":
        LTObject = LTTextLineVertical
    if t is None:
        t = []