Fix unit tests, lint, drop Python 2 support

Drop EOL Python 2 support. Resolve unit test discrepancies. Update unit tests to pass in Travis across all supported Py. Linting.
2020-04-18 17:25:47 -07:00 · 2020-04-18 17:25:47 -07:00 · bd2aab5b2d
parent 7d4c9e53c6
commit bd2aab5b2d
26 changed files with 498 additions and 276 deletions
--- a/.bandit
+++ b/.bandit
@ -0,0 +1,3 @@
+[bandit]
+# Ignore concerns about asserts, necessary for unit test code
+skips: B101,B102
--- a/.gitignore
+++ b/.gitignore
@ -4,6 +4,7 @@ __pycache__/

 build/
 dist/
+prof/
 *.egg-info/
 .eggs/
 .coverage
@ -17,3 +18,5 @@ htmlcov/

 # vscode
 .vscode
+
+.DS_Store
--- a/.travis.yml
+++ b/.travis.yml
@ -1,4 +1,3 @@
-sudo: true
 language: python
 cache: pip
 addons:
@ -8,10 +7,6 @@ install:
  - make install
 jobs:
  include:
-    - stage: test
-      script:
-        - make test
-      python: '2.7'
    - stage: test
      script:
        - make test
--- a/camelot/core.py
+++ b/camelot/core.py
@ -38,7 +38,7 @@ class TextEdge(object):
    intersections: int
        Number of intersections with horizontal text rows.
    is_valid: bool
-        A text edge is valid if it intersections with at least
+        A text edge is valid if it intersects with at least
        TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.

    """
@ -65,7 +65,8 @@ class TextEdge(object):
        the is_valid attribute.
        """
        if np.isclose(self.y0, y0, atol=edge_tol):
-            self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
+            self.x = (self.intersections * self.x + x) / \
+                float(self.intersections + 1)
            self.y0 = y0
            self.intersections += 1
            # a textedge is valid only if it extends uninterrupted
@ -141,13 +142,16 @@ class TextEdges(object):
        """
        intersections_sum = {
            "left": sum(
-                te.intersections for te in self._textedges["left"] if te.is_valid
+                te.intersections for te in self._textedges["left"]
+                if te.is_valid
            ),
            "right": sum(
-                te.intersections for te in self._textedges["right"] if te.is_valid
+                te.intersections for te in self._textedges["right"]
+                if te.is_valid
            ),
            "middle": sum(
-                te.intersections for te in self._textedges["middle"] if te.is_valid
+                te.intersections for te in self._textedges["middle"]
+                if te.is_valid
            ),
        }

@ -292,7 +296,10 @@ class Cell(object):

    def __repr__(self):
        return "<Cell x1={} y1={} x2={} y2={}>".format(
-            round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
+            round(self.x1, 2),
+            round(self.y1, 2),
+            round(self.x2, 2),
+            round(self.y2, 2)
        )

    @property
@ -342,7 +349,9 @@ class Table(object):
    def __init__(self, cols, rows):
        self.cols = cols
        self.rows = rows
-        self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
+        self.cells = [
+            [Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows
+        ]
        self.df = None
        self.shape = (0, 0)
        self.accuracy = 0
@ -579,7 +588,8 @@ class Table(object):
            Output filepath.

        """
-        kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
+        kw = {"encoding": "utf-8", "index": False, "header": False,
+              "quoting": 1}
        kw.update(kwargs)
        self.df.to_csv(path, **kw)

@ -616,6 +626,7 @@ class Table(object):
            "encoding": "utf-8",
        }
        kw.update(kwargs)
+        # pylint: disable=abstract-class-instantiated
        writer = pd.ExcelWriter(path)
        self.df.to_excel(writer, **kw)
        writer.save()
@ -692,7 +703,8 @@ class TableList(object):
        ext = kwargs.get("ext")
        for table in self._tables:
            filename = os.path.join(
-                "{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
+                "{}-page-{}-table-{}{}".format(root, table.page, table.order,
+                                               ext)
            )
            filepath = os.path.join(dirname, filename)
            to_format = self._format_func(table, f)
@ -707,7 +719,10 @@ class TableList(object):
        with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
            for table in self._tables:
                filename = os.path.join(
-                    "{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
+                    "{}-page-{}-table-{}{}".format(root,
+                                                   table.page,
+                                                   table.order,
+                                                   ext)
                )
                filepath = os.path.join(dirname, filename)
                z.write(filepath, os.path.basename(filepath))
@ -739,10 +754,12 @@ class TableList(object):
                self._compress_dir(**kwargs)
        elif f == "excel":
            filepath = os.path.join(dirname, basename)
+            # pylint: disable=abstract-class-instantiated
            writer = pd.ExcelWriter(filepath)
            for table in self._tables:
                sheet_name = "page-{}-table-{}".format(table.page, table.order)
-                table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
+                table.df.to_excel(writer, sheet_name=sheet_name,
+                                  encoding="utf-8")
            writer.save()
            if compress:
                zipname = os.path.join(os.path.dirname(path), root) + ".zip"
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -113,14 +113,20 @@ class PDFHandler(object):
            outfile.addPage(p)
            with open(fpath, "wb") as f:
                outfile.write(f)
-            layout, dim = get_page_layout(fpath)
+            layout, __ = get_page_layout(fpath)
            # fix rotated PDF
            chars = get_text_objects(layout, ltype="char")
            horizontal_text = get_text_objects(layout, ltype="horizontal_text")
            vertical_text = get_text_objects(layout, ltype="vertical_text")
            rotation = get_rotation(chars, horizontal_text, vertical_text)
            if rotation != "":
-                fpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
+                fpath_new = "".join(
+                    [
+                        froot.replace("page", "p"),
+                        "_rotated",
+                        fext
+                    ]
+                )
                os.rename(fpath, fpath_new)
                infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
                if infile.isEncrypted:
@ -136,7 +142,8 @@ class PDFHandler(object):
                    outfile.write(f)

    def parse(
-        self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
+        self, flavor="lattice", suppress_stdout=False, layout_kwargs=None,
+        **kwargs
    ):
        """Extracts tables by calling parser.get_tables on all single
        page PDFs.
@ -149,7 +156,7 @@ class PDFHandler(object):
        suppress_stdout : str (default: False)
            Suppress logs and warnings.
        layout_kwargs : dict, optional (default: {})
-            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
+            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs. # noqa
        kwargs : dict
            See camelot.read_pdf kwargs.

@ -159,17 +166,21 @@ class PDFHandler(object):
            List of tables found in PDF.

        """
+        layout_kwargs = layout_kwargs or {}
        tables = []
        with TemporaryDirectory() as tempdir:
            for p in self.pages:
                self._save_page(self.filepath, p, tempdir)
            pages = [
-                os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages
+                os.path.join(tempdir, "page-{0}.pdf".format(p))
+                for p in self.pages
            ]
-            parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
+            parser = Lattice(**kwargs) \
+                if flavor == "lattice" else Stream(**kwargs)
            for p in pages:
                t = parser.extract_tables(
-                    p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
+                    p, suppress_stdout=suppress_stdout,
+                    layout_kwargs=layout_kwargs
                )
                tables.extend(t)
        return TableList(sorted(tables))
--- a/camelot/io.py
+++ b/camelot/io.py
@ -12,7 +12,7 @@ def read_pdf(
    password=None,
    flavor="lattice",
    suppress_stdout=False,
-    layout_kwargs={},
+    layout_kwargs=None,
    **kwargs
 ):
    """Read PDF and return extracted tables.
@ -80,16 +80,16 @@ def read_pdf(
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.

-        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
    threshold_constant* : int, optional (default: -2)
        Constant subtracted from the mean or weighted mean.
        Normally, it is positive but may be zero or negative as well.

-        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
    iterations* : int, optional (default: 0)
        Number of times for erosion/dilation is applied.

-        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_. # noqa
    resolution* : int, optional (default: 300)
        Resolution used for PDF to PNG conversion.

@ -98,6 +98,7 @@ def read_pdf(
    tables : camelot.core.TableList

    """
+    layout_kwargs = layout_kwargs or {}
    if flavor not in ["lattice", "stream"]:
        raise NotImplementedError(
            "Unknown flavor specified." " Use either 'lattice' or 'stream'"
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -12,9 +12,18 @@ class BaseParser(object):
    def _generate_layout(self, filename, layout_kwargs):
        self.filename = filename
        self.layout_kwargs = layout_kwargs
-        self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)
+        self.layout, self.dimensions = get_page_layout(
+            filename,
+            **layout_kwargs
+        )
        self.images = get_text_objects(self.layout, ltype="image")
-        self.horizontal_text = get_text_objects(self.layout, ltype="horizontal_text")
-        self.vertical_text = get_text_objects(self.layout, ltype="vertical_text")
+        self.horizontal_text = get_text_objects(
+            self.layout,
+            ltype="horizontal_text"
+        )
+        self.vertical_text = get_text_objects(
+            self.layout,
+            ltype="vertical_text"
+        )
        self.pdf_width, self.pdf_height = self.dimensions
        self.rootname, __ = os.path.splitext(self.filename)
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -2,14 +2,10 @@

 from __future__ import division
 import os
-import sys
 import copy
-import locale
 import logging
 import warnings
-import subprocess

-import numpy as np
 import pandas as pd

 from .base import BaseParser
@ -80,7 +76,7 @@ class Lattice(BaseParser):
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.

-        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
    threshold_constant : int, optional (default: -2)
        Constant subtracted from the mean or weighted mean.
        Normally, it is positive but may be zero or negative as well.
@ -102,7 +98,7 @@ class Lattice(BaseParser):
        process_background=False,
        line_scale=15,
        copy_text=None,
-        shift_text=["l", "t"],
+        shift_text=None,
        split_text=False,
        flag_size=False,
        strip_text="",
@ -114,6 +110,7 @@ class Lattice(BaseParser):
        resolution=300,
        **kwargs
    ):
+        shift_text = shift_text or ["l", "t"]
        self.table_regions = table_regions
        self.table_areas = table_areas
        self.process_background = process_background
@ -217,8 +214,7 @@ class Lattice(BaseParser):
        )
        gs_call = gs_call.encode().split()
        null = open(os.devnull, "wb")
-        with Ghostscript(*gs_call, stdout=null) as gs:
-            pass
+        Ghostscript(*gs_call, stdout=null)
        null.close()

    def _generate_table_bbox(self):
@ -247,7 +243,8 @@ class Lattice(BaseParser):
        image_height_scaler = image_height / float(self.pdf_height)
        pdf_width_scaler = self.pdf_width / float(image_width)
        pdf_height_scaler = self.pdf_height / float(image_height)
-        image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
+        image_scalers = (image_width_scaler,
+                         image_height_scaler, self.pdf_height)
        pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)

        if self.table_areas is None:
@ -291,7 +288,11 @@ class Lattice(BaseParser):

        self.table_bbox_unscaled = copy.deepcopy(table_bbox)

-        self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
+        [
+            self.table_bbox,
+            self.vertical_segments,
+            self.horizontal_segments
+        ] = scale_image(
            table_bbox, vertical_segments, horizontal_segments, pdf_scalers
        )

@ -315,7 +316,10 @@ class Lattice(BaseParser):
        rows.extend([tk[1], tk[3]])
        # sort horizontal and vertical segments
        cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
-        rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
+        rows = merge_close_lines(
+            sorted(rows, reverse=True),
+            line_tol=self.line_tol
+        )
        # make grid using x and y coord of shortlisted rows and cols
        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
@ -359,7 +363,10 @@ class Lattice(BaseParser):
        accuracy = compute_accuracy([[100, pos_errors]])

        if self.copy_text is not None:
-            table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
+            table = Lattice._copy_spanning_text(
+                table,
+                copy_text=self.copy_text
+            )

        data = table.data
        table.df = pd.DataFrame(data)
@ -383,20 +390,28 @@ class Lattice(BaseParser):

        return table

-    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
+    def extract_tables(
+        self,
+        filename,
+        suppress_stdout=False,
+        layout_kwargs=None
+    ):
+        layout_kwargs = layout_kwargs or {}
        self._generate_layout(filename, layout_kwargs)
+        rootname = os.path.basename(self.rootname)
        if not suppress_stdout:
-            logger.info("Processing {}".format(os.path.basename(self.rootname)))
+            logger.info("Processing {rootname}".format(rootname=rootname))

        if not self.horizontal_text:
            if self.images:
                warnings.warn(
-                    "{} is image-based, camelot only works on"
-                    " text-based pages.".format(os.path.basename(self.rootname))
+                    "{rootname} is image-based, "
+                    "camelot only works on text-based pages."
+                    .format(rootname=rootname)
                )
            else:
                warnings.warn(
-                    "No tables found on {}".format(os.path.basename(self.rootname))
+                    "No tables found on {rootname}".format(rootname=rootname)
                )
            return []

@ -408,8 +423,10 @@ class Lattice(BaseParser):
        for table_idx, tk in enumerate(
            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
        ):
-            cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
-            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
+            cols, rows, v_s, h_s = self._generate_columns_and_rows(
+                table_idx, tk)
+            table = self._generate_table(
+                table_idx, cols, rows, v_s=v_s, h_s=h_s)
            table._bbox = tk
            _tables.append(table)

--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -10,7 +10,8 @@ import pandas as pd

 from .base import BaseParser
 from ..core import TextEdges, Table
-from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
+from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
+                     compute_whitespace)


 logger = logging.getLogger("camelot")
@ -70,6 +71,9 @@ class Stream(BaseParser):
    ):
        self.table_regions = table_regions
        self.table_areas = table_areas
+        self.table_bbox = None
+        self.t_bbox = None
+        self.textedges = []
        self.columns = columns
        self._validate_columns()
        self.split_text = split_text
@ -95,10 +99,10 @@ class Stream(BaseParser):
            Tuple (x0, y0, x1, y1) in pdf coordinate space.

        """
-        xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
-        ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
-        xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
-        ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
+        xmin = min(t.x0 for direction in t_bbox for t in t_bbox[direction])
+        ymin = min(t.y0 for direction in t_bbox for t in t_bbox[direction])
+        xmax = max(t.x1 for direction in t_bbox for t in t_bbox[direction])
+        ymax = max(t.y1 for direction in t_bbox for t in t_bbox[direction])
        text_bbox = (xmin, ymin, xmax, ymax)
        return text_bbox

@ -119,21 +123,25 @@ class Stream(BaseParser):
            Two-dimensional list of text objects grouped into rows.

        """
-        row_y = 0
+        row_y = None
        rows = []
        temp = []
-        for t in text:
+        non_empty_text = [t for t in text if t.get_text().strip()]
+        for t in non_empty_text:
            # is checking for upright necessary?
-            # if t.get_text().strip() and all([obj.upright for obj in t._objs if
-            # type(obj) is LTChar]):
-            if t.get_text().strip():
-                if not np.isclose(row_y, t.y0, atol=row_tol):
-                    rows.append(sorted(temp, key=lambda t: t.x0))
-                    temp = []
-                    row_y = t.y0
-                temp.append(t)
+            # if t.get_text().strip() and all([obj.upright \
+            #   for obj in t._objs
+            # if type(obj) is LTChar]):
+            if row_y is None:
+                row_y = t.y0
+            elif not np.isclose(row_y, t.y0, atol=row_tol):
+                rows.append(sorted(temp, key=lambda t: t.x0))
+                temp = []
+                # We update the row's bottom as we go, to be forgiving if there
+                # is a gradual change across multiple columns.
+                row_y = t.y0
+            temp.append(t)
        rows.append(sorted(temp, key=lambda t: t.x0))
-        __ = rows.pop(0)  # TODO: hacky
        return rows

    @staticmethod
@ -170,7 +178,8 @@ class Stream(BaseParser):
                        merged.append(higher)
                elif column_tol < 0:
                    if higher[0] <= lower[1]:
-                        if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
+                        if np.isclose(higher[0], lower[1],
+                                      atol=abs(column_tol)):
                            merged.append(higher)
                        else:
                            upper_bound = max(lower[1], higher[1])
@ -198,10 +207,13 @@ class Stream(BaseParser):

        """
        row_mids = [
-            sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) if len(r) > 0 else 0
+            sum((t.y0 + t.y1) / 2 for t in r) / len(r) if len(r) > 0 else 0
            for r in rows_grouped
        ]
-        rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
+        rows = [
+            (row_mids[i] + row_mids[i - 1]) / 2
+            for i in range(1, len(row_mids))
+        ]
        rows.insert(0, text_y_max)
        rows.append(text_y_min)
        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
@ -230,7 +242,9 @@ class Stream(BaseParser):
            text = Stream._group_rows(text, row_tol=row_tol)
            elements = [len(r) for r in text]
            new_cols = [
-                (t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
+                (t.x0, t.x1)
+                for r in text if len(r) == max(elements)
+                for t in r
            ]
            cols.extend(Stream._merge_columns(sorted(new_cols)))
        return cols
@ -262,12 +276,13 @@ class Stream(BaseParser):
    def _validate_columns(self):
        if self.table_areas is not None and self.columns is not None:
            if len(self.table_areas) != len(self.columns):
-                raise ValueError("Length of table_areas and columns" " should be equal")
+                raise ValueError("Length of table_areas and columns"
+                                 " should be equal")

    def _nurminen_table_detection(self, textlines):
        """A general implementation of the table detection algorithm
        described by Anssi Nurminen's master's thesis.
-        Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3
+        Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 # noqa

        Assumes that tables are situated relatively far apart
        vertically.
@ -284,7 +299,7 @@ class Stream(BaseParser):
        # guess table areas using textlines and relevant edges
        table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
        # treat whole page as table area if no table areas found
-        if not len(table_bbox):
+        if not table_bbox:
            table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}

        return table_bbox
@ -302,7 +317,8 @@ class Stream(BaseParser):
                    y1 = float(y1)
                    x2 = float(x2)
                    y2 = float(y2)
-                    region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text)
+                    region_text = text_in_bbox(
+                        (x1, y2, x2, y1), self.horizontal_text)
                    hor_text.extend(region_text)
            # find tables based on nurminen's detection algorithm
            table_bbox = self._nurminen_table_detection(hor_text)
@ -328,8 +344,10 @@ class Stream(BaseParser):

        self.t_bbox = t_bbox

-        text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
-        rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
+        text_x_min, text_y_min, text_x_max, text_y_max = \
+            self._text_bbox(self.t_bbox)
+        rows_grouped = self._group_rows(
+            self.t_bbox["horizontal"], row_tol=self.row_tol)
        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
        elements = [len(r) for r in rows_grouped]

@ -354,14 +372,23 @@ class Stream(BaseParser):
                # see if the list contains elements, if yes, then use
                # the mode after removing 1s
                elements = list(filter(lambda x: x != 1, elements))
-                if len(elements):
+                if elements:
                    ncols = max(set(elements), key=elements.count)
                else:
                    warnings.warn(
-                        "No tables found in table area {}".format(table_idx + 1)
+                        "No tables found in table area {}"
+                        .format(table_idx + 1)
                    )
-            cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
-            cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
+            cols = [
+                (t.x0, t.x1)
+                for r in rows_grouped
+                if len(r) == ncols
+                for t in r
+            ]
+            cols = self._merge_columns(
+                sorted(cols),
+                column_tol=self.column_tol
+            )
            inner_text = []
            for i in range(1, len(cols)):
                left = cols[i - 1][1]
@ -431,23 +458,30 @@ class Stream(BaseParser):

        return table

-    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
+    def extract_tables(self, filename, suppress_stdout=False,
+                       layout_kwargs=None):
+        layout_kwargs = layout_kwargs or {}
        self._generate_layout(filename, layout_kwargs)
        if not suppress_stdout:
-            logger.info("Processing {}".format(os.path.basename(self.rootname)))
+            logger.info("Processing {}".format(
+                os.path.basename(self.rootname)))

        if not self.horizontal_text:
            if self.images:
                warnings.warn(
                    "{} is image-based, camelot only works on"
-                    " text-based pages.".format(os.path.basename(self.rootname))
+                    " text-based pages.".format(
+                        os.path.basename(self.rootname))
                )
            else:
                warnings.warn(
-                    "No tables found on {}".format(os.path.basename(self.rootname))
+                    "No tables found on {}".format(
+                        os.path.basename(self.rootname))
                )
            return []

+        # Identify plausible areas within the doc where tables lie,
+        # populate table_bbox keys with these areas.
        self._generate_table_bbox()

        _tables = []
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -37,7 +37,7 @@ class PlotMethods(object):
            raise NotImplementedError(
                "Lattice flavor does not support kind='{}'".format(kind)
            )
-        elif table.flavor == "stream" and kind in ["joint", "line"]:
+        elif table.flavor == "stream" and kind in ["line"]:
            raise NotImplementedError(
                "Stream flavor does not support kind='{}'".format(kind)
            )
@ -64,7 +64,13 @@ class PlotMethods(object):
        for t in table._text:
            xs.extend([t[0], t[2]])
            ys.extend([t[1], t[3]])
-            ax.add_patch(patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1]))
+            ax.add_patch(
+                patches.Rectangle(
+                        (t[0], t[1]),
+                        t[2] - t[0],
+                        t[3] - t[1]
+                    )
+                )
        ax.set_xlim(min(xs) - 10, max(xs) + 10)
        ax.set_ylim(min(ys) - 10, max(ys) + 10)
        return fig
@ -132,7 +138,8 @@ class PlotMethods(object):
        for t in table_bbox.keys():
            ax.add_patch(
                patches.Rectangle(
-                    (t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red"
+                    (t[0], t[1]), t[2] - t[0], t[3] - t[1],
+                    fill=False, color="red"
                )
            )
            if not _FOR_LATTICE:
@ -164,7 +171,10 @@ class PlotMethods(object):
            xs.extend([t[0], t[2]])
            ys.extend([t[1], t[3]])
            ax.add_patch(
-                patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue")
+                patches.Rectangle(
+                    (t[0], t[1]), t[2] - t[0], t[3] - t[1],
+                    color="blue"
+                )
            )
        ax.set_xlim(min(xs) - 10, max(xs) + 10)
        ax.set_ylim(min(ys) - 10, max(ys) + 10)
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -30,6 +30,9 @@ from pdfminer.layout import (
 )


+# pylint: disable=import-error
+# PyLint will evaluate both branches, and will necessarily complain about one
+# of them.
 PY3 = sys.version_info[0] >= 3
 if PY3:
    from urllib.request import urlopen
@ -310,7 +313,8 @@ def get_rotation(chars, horizontal_text, vertical_text):
    if hlen < vlen:
        clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
        anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
-        rotation = "anticlockwise" if clockwise < anticlockwise else "clockwise"
+        rotation = "anticlockwise" if clockwise < anticlockwise \
+            else "clockwise"
    return rotation


@ -341,12 +345,16 @@ def segments_in_bbox(bbox, v_segments, h_segments):
    v_s = [
        v
        for v in v_segments
-        if v[1] > lb[1] - 2 and v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2
+        if v[1] > lb[1] - 2 and
+        v[3] < rt[1] + 2 and
+        lb[0] - 2 <= v[0] <= rt[0] + 2
    ]
    h_s = [
        h
        for h in h_segments
-        if h[0] > lb[0] - 2 and h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2
+        if h[0] > lb[0] - 2 and
+        h[2] < rt[0] + 2 and
+        lb[1] - 2 <= h[1] <= rt[1] + 2
    ]
    return v_s, h_s

@ -464,10 +472,10 @@ def flag_font_size(textline, direction, strip_text=""):
            for t in textline
            if not isinstance(t, LTAnno)
        ]
-    l = [np.round(size, decimals=6) for text, size in d]
-    if len(set(l)) > 1:
+    text_sizes = [np.round(size, decimals=6) for text, size in d]
+    if len(set(text_sizes)) > 1:
        flist = []
-        min_size = min(l)
+        min_size = min(text_sizes)
        for key, chars in groupby(d, itemgetter(1)):
            if key == min_size:
                fchars = [t[0] for t in chars]
@ -511,7 +519,6 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
        of row/column and text is the an lttextline substring.

    """
-    idx = 0
    cut_text = []
    bbox = textline.bbox
    try:
@ -528,7 +535,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
            ]
            r = r_idx[0]
            x_cuts = [
-                (c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right
+                (c, table.cells[r][c].x2)
+                for c in x_overlap
+                if table.cells[r][c].right
            ]
            if not x_cuts:
                x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
@ -561,7 +570,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
            ]
            c = c_idx[0]
            y_cuts = [
-                (r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom
+                (r, table.cells[r][c].y1)
+                for r in y_overlap
+                if table.cells[r][c].bottom
            ]
            if not y_cuts:
                y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
@ -644,9 +655,8 @@ def get_table_index(
    """
    r_idx, c_idx = [-1] * 2
    for r in range(len(table.rows)):
-        if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and (t.y0 + t.y1) / 2.0 > table.rows[
-            r
-        ][1]:
+        if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and \
+           (t.y0 + t.y1) / 2.0 > table.rows[r][1]:
            lt_col_overlap = []
            for c in table.cols:
                if c[0] <= t.x1 and c[1] >= t.x0:
@ -681,7 +691,9 @@ def get_table_index(
    X = 1.0 if abs(t.x0 - t.x1) == 0.0 else abs(t.x0 - t.x1)
    Y = 1.0 if abs(t.y0 - t.y1) == 0.0 else abs(t.y0 - t.y1)
    charea = X * Y
-    error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
+    error = (
+        (X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))
+    ) / charea

    if split_text:
        return (
@ -697,13 +709,16 @@ def get_table_index(
                    (
                        r_idx,
                        c_idx,
-                        flag_font_size(t._objs, direction, strip_text=strip_text),
+                        flag_font_size(t._objs,
+                                       direction,
+                                       strip_text=strip_text),
                    )
                ],
                error,
            )
        else:
-            return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], error
+            return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], \
+                error


 def compute_accuracy(error_weights):
@ -751,7 +766,6 @@ def compute_whitespace(d):

    """
    whitespace = 0
-    r_nempty_cells, c_nempty_cells = [], []
    for i in d:
        for j in i:
            if j.strip() == "":
@ -811,6 +825,7 @@ def get_page_layout(
            width = layout.bbox[2]
            height = layout.bbox[3]
            dim = (width, height)
+            break  # we assume a single page pdf
        return layout, dim


--- a/docs/user/install.rst
+++ b/docs/user/install.rst
@ -13,7 +13,7 @@ The easiest way to install Camelot is to install it with `conda`_, which is a pa

    $ conda install -c conda-forge camelot-py

-.. note:: Camelot is available for Python 2.7, 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
+.. note:: Camelot is available for Python 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.

 .. _conda: https://conda.io/docs/
 .. _Anaconda: http://docs.continuum.io/anaconda/
--- a/requirements.txt
+++ b/requirements.txt
@ -4,5 +4,5 @@ numpy>=1.13.3
 opencv-python>=3.4.2.17
 openpyxl>=2.5.8
 pandas>=0.23.4
-pdfminer.six>=20170720
+pdfminer.six>=20200402
 PyPDF2>=1.26.0
--- a/setup.py
+++ b/setup.py
@ -19,7 +19,7 @@ requires = [
    'numpy>=1.13.3',
    'openpyxl>=2.5.8',
    'pandas>=0.23.4',
-    'pdfminer.six>=20170720',
+    'pdfminer.six>=20200402',
    'PyPDF2>=1.26.0'
 ]

@ -69,9 +69,8 @@ def setup_package():
                    },
                    classifiers=[
                        # Trove classifiers
-                        # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
+                        # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa
                        'License :: OSI Approved :: MIT License',
-                        'Programming Language :: Python :: 2.7',
                        'Programming Language :: Python :: 3.5',
                        'Programming Language :: Python :: 3.6',
                        'Programming Language :: Python :: 3.7'
--- a/tests/data.py
+++ b/tests/data.py
@ -4,16 +4,6 @@ from __future__ import unicode_literals


 data_stream = [
-    [
-        "",
-        "Table: 5            Public Health Outlay 2012-13 (Budget Estimates)        (Rs. in 000)",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-    ],
    ["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
    ["", "", "", "", "", "Revenue &", "", ""],
    ["", "Medical &", "Family", "Medical &", "Family", "", "", ""],
@ -80,7 +70,8 @@ data_stream = [
        "5,000",
        "33,051,480",
    ],
-    ["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560", "4,508,180"],
+    ["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560",
+     "4,508,180"],
    [
        "Gujarat",
        "26,328,400",
@ -171,7 +162,8 @@ data_stream = [
        "313,762",
        "67,044,159",
    ],
-    ["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700", "0", "3,579,700"],
+    ["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700",
+     "0", "3,579,700"],
    [
        "Meghalaya",
        "2,894,093",
@ -236,7 +228,8 @@ data_stream = [

 data_stream_table_rotated = [
    [
-        "Table 21  Current use of contraception by background characteristics\u2014Continued",
+        "Table 21  Current use of contraception by background characteristics"
+        "\u2014Continued",
        "",
        "",
        "",
@ -330,7 +323,8 @@ data_stream_table_rotated = [
        "Total",
        "women",
    ],
-    ["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
+    ["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
+     "", ""],
    [
        "Scheduled caste",
        "74.8",
@ -407,7 +401,8 @@ data_stream_table_rotated = [
        "100.0",
        "3,319",
    ],
-    ["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
+    ["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "",
+     "", "", "", ""],
    [
        "Lowest",
        "64.5",
@ -830,7 +825,8 @@ data_stream_table_rotated = [

 data_stream_two_tables_1 = [
    [
-        "[In thousands (11,062.6 represents 11,062,600) For year ending December 31. Based on Uniform Crime Reporting (UCR)",
+        "Program. Represents arrests reported (not charged) by 12,910 "
+        "agencies with a total population of 247,526,916 as estimated",
        "",
        "",
        "",
@ -842,7 +838,8 @@ data_stream_two_tables_1 = [
        "",
    ],
    [
-        "Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated",
+        "by the FBI. Some persons may be arrested more than once during a "
+        "year, therefore, the data in this table, in some cases,",
        "",
        "",
        "",
@ -854,19 +851,8 @@ data_stream_two_tables_1 = [
        "",
    ],
    [
-        "by the FBI. Some persons may be arrested more than once during a year, therefore, the data in this table, in some cases,",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-    ],
-    [
-        "could represent multiple arrests of the same person. See text, this section and source]",
+        "could represent multiple arrests of the same person. See text, "
+        "this section and source]",
        "",
        "",
        "",
@ -903,7 +889,8 @@ data_stream_two_tables_1 = [
        "and over",
    ],
    [
-        "Total   .\n .\n .  .  .  .  .  .\n .  .\n .  .\n .  .\n .  .\n .  .\n .  .\n .  .\n .  .  .",
+        "Total   .\n .\n .  .  .  .  .  .\n .  .\n .  .\n .  .\n .  .\n .  "
+        ".\n .  .\n .  .\n .  .  .",
        "11,062 .6",
        "1,540 .0",
        "9,522 .6",
@ -915,7 +902,8 @@ data_stream_two_tables_1 = [
        "2,330 .9",
    ],
    [
-        "Violent crime  .  .  .  .  .  .  .  .\n .  .\n .  .\n .  .\n .  .\n .  .",
+        "Violent crime   .  .  .  .  .  .  .  .\n .  .\n .  .\n .  .\n .  "
+        ".\n .  .",
        "467 .9",
        "69 .1",
        "398 .8",
@ -976,7 +964,8 @@ data_stream_two_tables_1 = [
        "64.5",
    ],
    [
-        "Property crime  .  .  .  .\n .  .\n .  .  .\n .  .  .\n .\n .  .  .  .",
+        "Property crime  .  .  .  .\n .  .\n .  .  .\n .  .  .\n .\n .  .  "
+        ".  .",
        "1,396 .4",
        "338 .7",
        "1,057 .7",
@ -1060,7 +1049,8 @@ data_stream_two_tables_1 = [
        "25.5",
    ],
    [
-        "Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n.",
+        "Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
+        ".\n.\n.\n.",
        "173.7",
        "5.1",
        "168.5",
@ -1290,19 +1280,8 @@ data_stream_two_tables_1 = [
    ],
    [
        "",
-        "– Represents zero. X Not applicable. 1 Buying, receiving, possessing stolen property. 2 Except forcible rape and prostitution.",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-    ],
-    [
-        "",
-        "Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
+        "– Represents zero. X Not applicable. 1 Buying, receiving, "
+        "possessing stolen property. 2 Except forcible rape and prostitution.",
        "",
        "",
        "",
@ -1315,17 +1294,10 @@ data_stream_two_tables_1 = [
 ]

 data_stream_two_tables_2 = [
-    [
-        "",
-        "Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
-        "",
-        "",
-        "",
-        "",
-    ],
    ["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
    [
-        "[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies",
+        "[Based on Uniform Crime Reporting (UCR) Program. Represents "
+        "arrests reported (not charged) by 12,371 agencies",
        "",
        "",
        "",
@ -1333,7 +1305,8 @@ data_stream_two_tables_2 = [
        "",
    ],
    [
-        "with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]",
+        "with a total population of 239,839,971 as estimated by the FBI. "
+        "See headnote, Table 324]",
        "",
        "",
        "",
@ -1344,7 +1317,8 @@ data_stream_two_tables_2 = [
    ["Offense charged", "", "", "", "Indian/Alaskan", "Asian Pacific"],
    ["", "Total", "White", "Black", "Native", "Islander"],
    [
-        "Total  .\n .\n .\n .\n .  .\n .  .  .\n .  .  .\n .\n .  .  .\n .\n .  .  .\n .  .\n .\n .  .  .\n .\n .\n .\n .  .\n .  .\n .  .",
+        "Total  .\n .\n .\n .\n .  .\n .  .  .\n .  .  .\n .\n .  .  .\n "
+        ".\n .  .  .\n .  .\n .\n .  .  .\n .\n .\n .\n .  .\n .  .\n .  .",
        "10,690,561",
        "7,389,208",
        "3,027,153",
@ -1352,7 +1326,8 @@ data_stream_two_tables_2 = [
        "123,656",
    ],
    [
-        "Violent crime  .  .  .  .  .  .  .  .\n .  .\n .  .\n .  .\n .  .\n .\n .\n .  .\n .  .\n .\n .\n .\n .\n .  .",
+        "Violent crime   .  .  .  .  .  .  .  .\n .  .\n .  .\n .  .\n .  "
+        ".\n .\n .\n .  .\n .  .\n .\n .\n .\n .\n .  .",
        "456,965",
        "268,346",
        "177,766",
@ -1368,7 +1343,8 @@ data_stream_two_tables_2 = [
        "97",
    ],
    [
-        "Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
+        "Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
+        ".\n.\n.\n.\n.\n. .",
        "16,362",
        "10,644",
        "5,319",
@ -1376,7 +1352,8 @@ data_stream_two_tables_2 = [
        "230",
    ],
    [
-        "Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
+        "Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
+        ".\n.\n.\n. .\n.\n.\n. . . .",
        "100,496",
        "43,039",
        "55,742",
@ -1384,7 +1361,8 @@ data_stream_two_tables_2 = [
        "989",
    ],
    [
-        "Aggravated assault  . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. .\n.\n.\n.",
+        "Aggravated assault  . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. "
+        ".\n.\n.\n.",
        "330,368",
        "209,922",
        "111,904",
@ -1392,7 +1370,8 @@ data_stream_two_tables_2 = [
        "3,929",
    ],
    [
-        "Property crime  .  .  .  .  .\n .  .  .  .  .\n .\n .  .  .\n .\n .  .\n .\n .\n .\n .  .\n .\n .  .\n .\n .",
+        "Property crime  .  .  .  .  .\n .  .  .  .  .\n .\n .  .  .\n .\n "
+        ".  .\n .\n .\n .\n .  .\n .\n .  .\n .\n .",
        "1,364,409",
        "922,139",
        "406,382",
@ -1400,7 +1379,8 @@ data_stream_two_tables_2 = [
        "18,289",
    ],
    [
-        "Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n. . . .",
+        "Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. "
+        ".\n.\n.\n. .\n.\n. . . .",
        "234,551",
        "155,994",
        "74,419",
@ -1408,7 +1388,8 @@ data_stream_two_tables_2 = [
        "2,117",
    ],
    [
-        "Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
+        "Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
+        ".\n.\n.\n.\n.\n. .",
        "1,056,473",
        "719,983",
        "306,625",
@ -1416,7 +1397,8 @@ data_stream_two_tables_2 = [
        "15,219",
    ],
    [
-        "Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
+        "Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. "
+        ".\n.\n. .\n.",
        "63,919",
        "39,077",
        "23,184",
@ -1424,7 +1406,8 @@ data_stream_two_tables_2 = [
        "841",
    ],
    [
-        "Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . . . .",
+        "Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
+        ".\n.\n.\n. .\n.\n.\n. . . . . .",
        "9,466",
        "7,085",
        "2,154",
@ -1432,7 +1415,8 @@ data_stream_two_tables_2 = [
        "112",
    ],
    [
-        "Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
+        "Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. "
+        ".\n.\n.\n.\n. .\n.\n. .\n.",
        "1,032,502",
        "672,865",
        "332,435",
@ -1440,7 +1424,8 @@ data_stream_two_tables_2 = [
        "12,075",
    ],
    [
-        "Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. .\n. .\n.",
+        "Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. "
+        ".\n. .\n.",
        "67,054",
        "44,730",
        "21,251",
@ -1448,7 +1433,8 @@ data_stream_two_tables_2 = [
        "728",
    ],
    [
-        "Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. . . . . . .",
+        "Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
+        ".\n.\n.\n. . . . . . .",
        "161,233",
        "108,032",
        "50,367",
@ -1456,7 +1442,8 @@ data_stream_two_tables_2 = [
        "1,519",
    ],
    [
-        "Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n. .\n.\n.\n.\n.",
+        "Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
+        ".\n.\n. .\n.\n.\n.\n.",
        "13,960",
        "9,208",
        "4,429",
@ -1472,7 +1459,8 @@ data_stream_two_tables_2 = [
        "742",
    ],
    [
-        "Vandalism  . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
+        "Vandalism  . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
+        ".\n. .\n.\n.\n.\n. .",
        "212,173",
        "157,723",
        "48,746",
@ -1496,7 +1484,8 @@ data_stream_two_tables_2 = [
        "1,413",
    ],
    [
-        "Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
+        "Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
+        ".\n.\n.\n.\n.\n. .",
        "60,175",
        "44,240",
        "14,347",
@ -1504,7 +1493,8 @@ data_stream_two_tables_2 = [
        "873",
    ],
    [
-        "Drug abuse violations  . . . . . . . .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
+        "Drug abuse violations  . . . . . . . .\n. . .\n.\n.\n.\n. .\n. "
+        ".\n.\n.\n.\n.",
        "1,301,629",
        "845,974",
        "437,623",
@ -1512,7 +1502,8 @@ data_stream_two_tables_2 = [
        "9,444",
    ],
    [
-        "Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . .\n.\n.\n.\n.\n. .\n. .",
+        "Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . "
+        ".\n.\n.\n.\n.\n. .\n. .",
        "8,046",
        "2,290",
        "5,518",
@ -1528,7 +1519,8 @@ data_stream_two_tables_2 = [
        "624",
    ],
    [
-        "Driving under the influence . . . . . . .\n. .\n.\n. .\n.\n.\n.\n.\n. .",
+        "Driving under the influence . . . . . . .\n. .\n.\n. "
+        ".\n.\n.\n.\n.\n. .",
        "1,105,401",
        "954,444",
        "121,594",
@ -1536,7 +1528,8 @@ data_stream_two_tables_2 = [
        "14,460",
    ],
    [
-        "Liquor laws  . . . . . . . .\n. .\n. .\n. .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
+        "Liquor laws  . . . . . . . .\n. .\n. .\n. .\n. .\n. . "
+        ".\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
        "444,087",
        "373,189",
        "50,431",
@ -1544,7 +1537,8 @@ data_stream_two_tables_2 = [
        "5,591",
    ],
    [
-        "Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . .\n.\n.\n.\n.\n.\n.",
+        "Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . "
+        ".\n.\n.\n.\n.\n.\n.",
        "469,958",
        "387,542",
        "71,020",
@ -1552,7 +1546,8 @@ data_stream_two_tables_2 = [
        "2,844",
    ],
    [
-        "Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
+        "Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. "
+        ".\n.\n.\n.\n.",
        "515,689",
        "326,563",
        "176,169",
@ -1560,7 +1555,8 @@ data_stream_two_tables_2 = [
        "4,174",
    ],
    [
-        "Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
+        "Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
+        ".\n.\n.\n. .\n.\n.\n. . . .",
        "26,347",
        "14,581",
        "11,031",
@ -1568,7 +1564,8 @@ data_stream_two_tables_2 = [
        "192",
    ],
    [
-        "All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. .\n.",
+        "All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. "
+        ".\n.",
        "2,929,217",
        "1,937,221",
        "911,670",
@ -1576,7 +1573,8 @@ data_stream_two_tables_2 = [
        "36,446",
    ],
    [
-        "Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n.\n.\n. .\n. . . .",
+        "Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
+        ".\n.\n.\n.\n.\n. .\n. . . .",
        "1,513",
        "677",
        "828",
@ -1592,7 +1590,8 @@ data_stream_two_tables_2 = [
        "1,060",
    ],
    [
-        "Runaways  . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
+        "Runaways  . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
+        ".\n. .\n.\n.\n.\n. .",
        "73,616",
        "48,343",
        "19,670",
@ -1600,14 +1599,6 @@ data_stream_two_tables_2 = [
        "3,950",
    ],
    ["1 Except forcible rape and prostitution.", "", "", "", "", ""],
-    [
-        "",
-        "Source: U.S. Department of Justice, Federal Bureau of Investigation, “Crime in the United States, Arrests,” September 2010,",
-        "",
-        "",
-        "",
-        "",
-    ],
 ]

 data_stream_table_areas = [
@ -1634,10 +1625,12 @@ data_stream_columns = [
        "Nombre Localidad",
    ],
    ["Entidad", "", "Municipio", "", "Localidad", ""],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0094", "Granja Adelita"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0094",
+     "Granja Adelita"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0096", "Agua Azul"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0100", "Rancho Alegre"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0102", "Los Arbolitos [Rancho]"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0102",
+     "Los Arbolitos [Rancho]"],
    [
        "01",
        "Aguascalientes",
@ -1655,7 +1648,8 @@ data_stream_columns = [
        "0112",
        "Baj\xedo los V\xe1zquez",
    ],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0113", "Baj\xedo de Montoro"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0113",
+     "Baj\xedo de Montoro"],
    [
        "01",
        "Aguascalientes",
@ -1697,8 +1691,10 @@ data_stream_columns = [
        "Ca\xf1ada Honda [Estaci\xf3n]",
    ],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0127", "Los Ca\xf1os"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0128", "El Cari\xf1\xe1n"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0129", "El Carmen [Granja]"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0128",
+     "El Cari\xf1\xe1n"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0129",
+     "El Carmen [Granja]"],
    [
        "01",
        "Aguascalientes",
@ -1733,9 +1729,11 @@ data_stream_columns = [
        "El Colorado (El Soyatal)",
    ],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0146", "El Conejal"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0157", "Cotorina de Abajo"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0157",
+     "Cotorina de Abajo"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0162", "Coyotes"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0166", "La Huerta (La Cruz)"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0166",
+     "La Huerta (La Cruz)"],
    [
        "01",
        "Aguascalientes",
@ -1752,17 +1750,20 @@ data_stream_columns = [
        "0171",
        "Los Cuervos (Los Ojos de Agua)",
    ],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0172", "San Jos\xe9 [Granja]"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0172",
+     "San Jos\xe9 [Granja]"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0176", "La Chiripa"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0182", "Dolores"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0183", "Los Dolores"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0190", "El Duraznillo"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0191", "Los Dur\xf3n"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0197", "La Escondida"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0201", "Brande Vin [Bodegas]"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0201",
+     "Brande Vin [Bodegas]"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0207", "Valle Redondo"],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0209", "La Fortuna"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0212", "Lomas del Gachup\xedn"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0212",
+     "Lomas del Gachup\xedn"],
    [
        "01",
        "Aguascalientes",
@ -1772,22 +1773,12 @@ data_stream_columns = [
        "El Carmen (Gallinas G\xfceras) [Rancho]",
    ],
    ["01", "Aguascalientes", "001", "Aguascalientes", "0216", "La Gloria"],
-    ["01", "Aguascalientes", "001", "Aguascalientes", "0226", "Hacienda Nueva"],
+    ["01", "Aguascalientes", "001", "Aguascalientes", "0226",
+     "Hacienda Nueva"],
 ]

 data_stream_split_text = [
-    [
-        "FEB",
-        "RUAR",
-        "Y 2014 M27 (BUS)",
-        "",
-        "ALPHABETIC LISTING BY T",
-        "YPE",
-        "",
-        "",
-        "",
-        "ABLPDM27",
-    ],
+    ["FEB", "RUAR", "Y 2014 M27 (BUS)", "", "", "", "", "", "", ""],
    ["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
    ["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
    ["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
@ -1977,7 +1968,18 @@ data_stream_split_text = [
        "(872) 825-8309",
        "2014/04/11",
    ],
-    ["", "", "A SENSU JAPANESE", "", "7123 SOUTH 92ND EAST", "", "", "", "", ""],
+    [
+        "",
+        "",
+        "A SENSU JAPANESE",
+        "",
+        "7123 SOUTH 92ND EAST",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
    [
        "625422",
        "BAW",
@ -2029,7 +2031,18 @@ data_stream_split_text = [
        "(580) 928-2700",
        "2014/09/08",
    ],
-    ["", "", "ANDOLINI'S PIZZERIA &", "", "12140 EAST 96TH STREET", "", "", "", "", ""],
+    [
+        "",
+        "",
+        "ANDOLINI'S PIZZERIA &",
+        "",
+        "12140 EAST 96TH STREET",
+        "",
+        "",
+        "",
+        "",
+        "",
+    ],
    [
        "428377",
        "BAW",
@ -2148,7 +2161,8 @@ data_stream_flag_size = [
        "from SBI",
        "from",
    ],
-    ["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other", "NCDC"],
+    ["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other",
+     "NCDC"],
    ["", "", "", "", "", "& FIs", "", "", "", "Banks", ""],
    ["1", "2=", "3", "4", "5", "6=", "7", "8", "9", "10", "11"],
    ["", "(3 to 6)+14", "", "", "", "(7 to13)", "", "", "", "", ""],
@ -2165,7 +2179,8 @@ data_stream_flag_size = [
        "-",
        "0.25",
    ],
-    ["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-", "-", "-"],
+    ["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-",
+     "-", "-"],
    [
        "Assam",
        "12.69",
@ -2194,8 +2209,10 @@ data_stream_flag_size = [
    ],
    ["Chhattisgarh", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
    ["Goa", "1.4", "1.02", "-", "-", "0.38", "0.31", "-", "0.07", "-", "-"],
-    ["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11", "-", "0.44"],
-    ["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64", "-", "0.49"],
+    ["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11",
+     "-", "0.44"],
+    ["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64",
+     "-", "0.49"],
    [
        "Himachal Pradesh",
        "8.02",
@ -2223,7 +2240,8 @@ data_stream_flag_size = [
        "-",
    ],
    ["Jharkhand", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
-    ["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89", "-", "0.69"],
+    ["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89",
+     "-", "0.69"],
    [
        "Kerala",
        "29.03",
@ -2263,11 +2281,16 @@ data_stream_flag_size = [
        "0.02",
        "2.89",
    ],
-    ["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-", "0.09"],
-    ["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05", "-", "0.03"],
-    ["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-", "-", "0.03"],
-    ["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-", "0.04"],
-    ["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66", "-", "0.2"],
+    ["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-",
+     "0.09"],
+    ["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05",
+     "-", "0.03"],
+    ["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-",
+     "-", "0.03"],
+    ["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-",
+     "0.04"],
+    ["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66",
+     "-", "0.2"],
    [
        "Punjab",
        "19.18",
@ -2295,8 +2318,10 @@ data_stream_flag_size = [
        "0.81",
    ],
    ["Sikkim", "0.16", "-", "-", "-", "0.16", "0.03", "-", "-", "-", "0.01"],
-    ["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-", "0.68"],
-    ["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-", "0.02"],
+    ["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-",
+     "0.68"],
+    ["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-",
+     "0.02"],
    ["Uttaranchal", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
    [
        "Uttar Pradesh",
@ -2393,11 +2418,13 @@ data_stream_edge_tol = [
    ["Costs", "(0.21)"],
    ["T\notal investment result per unit", "3.78"],
    [
-        "1 The results cover the period from inception of the Fund at 8 April 2016 through 31 December 2016.",
+        "1 The results cover the period from inception of the Fund at "
+        "8 April 2016 through 31 December 2016.",
        "",
    ],
    [
-        "2 The result per unit is calculated using the total number of outstanding unit as per the end of the",
+        "2 The result per unit is calculated using the total number of "
+        "outstanding unit as per the end of the",
        "",
    ],
    ["period.", ""],
@ -2454,7 +2481,8 @@ data_lattice_table_rotated = [
        "Men",
        "Women",
    ],
-    ["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645", "2391"],
+    ["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645",
+     "2391"],
    [
        "Tamil Nadu",
        "7387",
@ -2503,11 +2531,16 @@ data_lattice_table_rotated = [
        "1417",
        "1599",
    ],
-    ["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122", "2503"],
-    ["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579", "1709"],
-    ["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093", "1628"],
-    ["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413", "2027"],
-    ["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185", "1366"],
+    ["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122",
+     "2503"],
+    ["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579",
+     "1709"],
+    ["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093",
+     "1628"],
+    ["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413",
+     "2027"],
+    ["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185",
+        "1366"],
    [
        "Pooled",
        "38742",
@ -2573,7 +2606,8 @@ data_lattice_two_tables_2 = [
 ]

 data_lattice_table_regions = [
-    ["Età dell’Assicurato \nall’epoca del decesso", "Misura % di \nmaggiorazione"],
+    ["Età dell’Assicurato \nall’epoca del decesso",
+     "Misura % di \nmaggiorazione"],
    ["18-75", "1,00%"],
    ["76-80", "0,50%"],
    ["81 in poi", "0,10%"],
@ -2596,10 +2630,12 @@ data_lattice_table_areas = [
    ["Kerala", "2400", "7.2", "0.5", "25.3", "20.1", "41.5", "5.5", ""],
    ["Tamil Nadu", "2400", "21.4", "2.3", "8.8", "35.5", "25.8", "6.2", ""],
    ["Karnataka", "2399", "37.4", "2.8", "12.5", "18.3", "23.1", "5.8", ""],
-    ["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9", ""],
+    ["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9",
+     ""],
    ["Maharashtra", "2400", "22.0", "0.9", "17.3", "20.3", "32.6", "7.0", ""],
    ["Gujarat", "2390", "28.6", "0.1", "14.4", "23.1", "26.9", "6.8", ""],
-    ["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6", ""],
+    ["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6",
+     ""],
    ["Orissa", "2405", "33.2", "1.0", "10.4", "25.7", "21.2", "8.5", ""],
    ["West Bengal", "2293", "41.7", "4.4", "13.2", "17.1", "21.2", "2.4", ""],
    ["Uttar Pradesh", "2400", "35.3", "2.1", "4.5", "23.3", "27.1", "7.6", ""],
@ -2650,7 +2686,8 @@ data_lattice_process_background = [
        "3,658",
        "3,183",
    ],
-    ["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173", "855"],
+    ["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173",
+     "855"],
    ["Total", "", "47", "92", "11.81", "22,455", "19,584", "10,644"],
 ]

@ -2689,7 +2726,8 @@ data_lattice_copy_text = [
    ["COHS", "San Mateo", "Health Plan of San Mateo", "113,202"],
    ["COHS", "Ventura", "Gold Coast Health Plan", "202,217"],
    ["COHS", "Total COHS Enrollment", "", "2,176,064"],
-    ["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "", "10,132,022"],
+    ["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "",
+     "10,132,022"],
    ["PCCM", "Los Angeles", "AIDS Healthcare Foundation", "828"],
    ["PCCM", "San Francisco", "Family Mosaic", "25"],
    ["PCCM", "Total PHP Enrollment", "", "853"],
@ -2721,7 +2759,8 @@ data_lattice_shift_text_left_top = [
    ],
    ["Blood Pressure #", "2400", "Men (≥ 18yrs)", "10%", "95%", "20%", "1728"],
    ["", "", "Women (≥ 18 yrs)", "", "", "", "1728"],
-    ["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
+    ["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%",
+     "1825"],
    ["", "", "Women (≥ 18 yrs)", "", "", "", "1825"],
    [
        "Knowledge &\nPractices on HTN &\nDM",
@ -2746,7 +2785,8 @@ data_lattice_shift_text_disable = [
        "Sample size\nper State",
    ],
    ["Anthropometry", "", "", "", "", "", ""],
-    ["Clinical Examination", "2400", "", "All the available individuals", "", "", ""],
+    ["Clinical Examination", "2400", "", "All the available individuals",
+     "", "", ""],
    ["History of morbidity", "", "", "", "", "", ""],
    [
        "Diet survey",
@ -2758,9 +2798,11 @@ data_lattice_shift_text_disable = [
        "",
    ],
    ["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
-    ["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
+    ["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
+     "1728"],
    ["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
-    ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
+    ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
+     "1825"],
    [
        "Knowledge &\nPractices on HTN &",
        "2400",
@ -2785,7 +2827,8 @@ data_lattice_shift_text_right_bottom = [
    ],
    ["Anthropometry", "", "", "", "", "", ""],
    ["Clinical Examination", "", "", "", "", "", ""],
-    ["History of morbidity", "2400", "", "", "", "", "All the available individuals"],
+    ["History of morbidity", "2400", "", "", "", "",
+     "All the available individuals"],
    [
        "Diet survey",
        "1200",
@ -2796,9 +2839,11 @@ data_lattice_shift_text_right_bottom = [
        "All the individuals partaking meals in the HH",
    ],
    ["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
-    ["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
+    ["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
+     "1728"],
    ["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
-    ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
+    ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
+     "1825"],
    ["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
    [
        "Knowledge &\nPractices on HTN &\nDM",
@ -2820,7 +2865,7 @@ data_arabic = [
 ]

 data_stream_layout_kwargs = [
-    ["V i n s   a u   Ve r r e", ""],
+    ["V i n s   a u   V e r r e", ""],
    ["Les Blancs", "12.5CL"],
    ["A.O.P Côtes du Rhône", ""],
    ["Domaine de la Guicharde «  Autour de la chapelle » 2016", "8 €"],
--- a/tests/files/baseline_plots/test_grid_plot.png
+++ b/tests/files/baseline_plots/test_grid_plot.png
--- a/tests/files/baseline_plots/test_joint_plot.png
+++ b/tests/files/baseline_plots/test_joint_plot.png
--- a/tests/files/baseline_plots/test_lattice_contour_plot.png
+++ b/tests/files/baseline_plots/test_lattice_contour_plot.png
--- a/tests/files/baseline_plots/test_line_plot.png
+++ b/tests/files/baseline_plots/test_line_plot.png
--- a/tests/files/baseline_plots/test_stream_contour_plot.png
+++ b/tests/files/baseline_plots/test_stream_contour_plot.png
--- a/tests/files/baseline_plots/test_stream_grid_plot.png
+++ b/tests/files/baseline_plots/test_stream_grid_plot.png
--- a/tests/files/baseline_plots/test_text_plot.png
+++ b/tests/files/baseline_plots/test_text_plot.png
--- a/tests/files/baseline_plots/test_textedge_plot.png
+++ b/tests/files/baseline_plots/test_textedge_plot.png
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -19,10 +19,16 @@ def test_help_output():
    output = result.output

    assert prog_name == "camelot"
-    assert result.output.startswith("Usage: %(prog_name)s [OPTIONS] COMMAND" % locals())
+    assert result.output.startswith(
+        "Usage: %(prog_name)s [OPTIONS] COMMAND" %
+        locals()
+    )
    assert all(
        v in result.output
-        for v in ["Options:", "--version", "--help", "Commands:", "lattice", "stream"]
+        for v in [
+            "Options:", "--version", "--help", "Commands:", "lattice",
+            "stream"
+        ]
    )


@ -120,21 +126,24 @@ def test_cli_output_format():
        # json
        result = runner.invoke(
            cli,
-            ["--format", "json", "--output", outfile.format("json"), "stream", infile],
+            ["--format", "json", "--output", outfile.format("json"), "stream",
+             infile],
        )
        assert result.exit_code == 0

        # excel
        result = runner.invoke(
            cli,
-            ["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile],
+            ["--format", "excel", "--output", outfile.format("xlsx"), "stream",
+             infile],
        )
        assert result.exit_code == 0

        # html
        result = runner.invoke(
            cli,
-            ["--format", "html", "--output", outfile.format("html"), "stream", infile],
+            ["--format", "html", "--output", outfile.format("html"), "stream",
+             infile],
        )
        assert result.exit_code == 0

@ -166,6 +175,10 @@ def test_cli_quiet():
        assert "No tables found on page-1" in result.output

        result = runner.invoke(
-            cli, ["--quiet", "--format", "csv", "--output", outfile, "stream", infile]
+            cli,
+            [
+                "--quiet", "--format", "csv", "--output", outfile, "stream",
+                infile
+            ]
        )
        assert "No tables found on page-1" not in result.output
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -11,12 +11,15 @@ from camelot.__version__ import generate_version

 from .data import *

+
 testdir = os.path.dirname(os.path.abspath(__file__))
 testdir = os.path.join(testdir, "files")


 def test_parsing_report():
-    parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
+    parsing_report = {
+        "accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1
+    }

    filename = os.path.join(testdir, "foo.pdf")
    tables = camelot.read_pdf(filename)
@ -28,9 +31,17 @@ def test_password():

    filename = os.path.join(testdir, "health_protected.pdf")
    tables = camelot.read_pdf(filename, password="ownerpass", flavor="stream")
+<<<<<<< HEAD
    assert_frame_equal(df, tables[0].df)

    tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
+=======
+    assert len(tables) == 1
+    assert_frame_equal(df, tables[0].df)
+
+    tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
+    assert len(tables) == 1
+>>>>>>> Fix unit tests, lint, drop Python 2 support
    assert_frame_equal(df, tables[0].df)


@ -229,9 +240,9 @@ def test_repr():
    tables = camelot.read_pdf(filename)
    assert repr(tables) == "<TableList n=1>"
    assert repr(tables[0]) == "<Table shape=(7, 7)>"
-    assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
-    )
+    assert \
+        repr(tables[0].cells[0][0]) == \
+        "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"


 def test_pages():
@ -239,22 +250,23 @@ def test_pages():
    tables = camelot.read_pdf(url)
    assert repr(tables) == "<TableList n=1>"
    assert repr(tables[0]) == "<Table shape=(7, 7)>"
-    assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
-    )
+    assert \
+        repr(tables[0].cells[0][0]) == \
+        "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"

    tables = camelot.read_pdf(url, pages="1-end")
    assert repr(tables) == "<TableList n=1>"
    assert repr(tables[0]) == "<Table shape=(7, 7)>"
-    assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
-    )
+    assert \
+        repr(tables[0].cells[0][0]) == \
+        "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"

    tables = camelot.read_pdf(url, pages="all")
    assert repr(tables) == "<TableList n=1>"
    assert repr(tables[0]) == "<Table shape=(7, 7)>"
    assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0]) ==
+        "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
    )


@ -264,7 +276,8 @@ def test_url():
    assert repr(tables) == "<TableList n=1>"
    assert repr(tables[0]) == "<Table shape=(7, 7)>"
    assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0]) ==
+        "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
    )


@ -284,7 +297,12 @@ def test_table_order():
        return t

    table_list = TableList(
-        [_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
+        [
+            _make_table(2, 1),
+            _make_table(1, 1),
+            _make_table(3, 4),
+            _make_table(1, 2)
+        ]
    )

    assert [(t.page, t.order) for t in sorted(table_list)] == [
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@ -4,13 +4,30 @@ import os

 import pytest

+import matplotlib
+
 import camelot

+# The version of Matplotlib has an impact on some of the tests.  Unfortunately,
+# we can't enforce usage of a recent version of MatplotLib without dropping
+# support for Python 3.6.
+# To check the version of matplotlib installed:
+#   pip freeze | grep matplotlib
+# To force upgrade:
+#   pip install --upgrade --force-reinstall matplotlib
+# To force usage of a Python 3.6 compatible version:
+#   pip install "matplotlib==2.2.5"
+# This condition can be removed in favor of a version requirement bump for
+# matplotlib once support for Python 3.5 is dropped.
+
+LEGACY_MATPLOTLIB = matplotlib.__version__ < "3.2.1"

 testdir = os.path.dirname(os.path.abspath(__file__))
 testdir = os.path.join(testdir, "files")


+@pytest.mark.skipif(LEGACY_MATPLOTLIB,
+                    reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare(
    baseline_dir="files/baseline_plots", remove_text=True)
 def test_text_plot():
@ -26,6 +43,15 @@ def test_grid_plot():
    tables = camelot.read_pdf(filename)
    return camelot.plot(tables[0], kind='grid')

+@pytest.mark.skipif(LEGACY_MATPLOTLIB,
+                    reason="depends on a recent version of MatPlotLib")
+@pytest.mark.mpl_image_compare(
+    baseline_dir="files/baseline_plots", remove_text=True)
+def test_stream_grid_plot():
+    filename = os.path.join(testdir, "foo.pdf")
+    tables = camelot.read_pdf(filename, flavor="stream")
+    return camelot.plot(tables[0], kind='grid')
+

@pytest.mark.mpl_image_compare(
    baseline_dir="files/baseline_plots", remove_text=True)
@ -35,6 +61,8 @@ def test_lattice_contour_plot():
    return camelot.plot(tables[0], kind='contour')


+@pytest.mark.skipif(LEGACY_MATPLOTLIB,
+                    reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare(
    baseline_dir="files/baseline_plots", remove_text=True)
 def test_stream_contour_plot():
@ -51,6 +79,8 @@ def test_line_plot():
    return camelot.plot(tables[0], kind='line')


+@pytest.mark.skipif(LEGACY_MATPLOTLIB,
+                    reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare(
    baseline_dir="files/baseline_plots", remove_text=True)
 def test_joint_plot():
@ -59,6 +89,8 @@ def test_joint_plot():
    return camelot.plot(tables[0], kind='joint')


+@pytest.mark.skipif(LEGACY_MATPLOTLIB,
+                    reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare(
    baseline_dir="files/baseline_plots", remove_text=True)
 def test_textedge_plot():