From f54e1563e1e63cb64bc71c51e4f727bad0435a1b Mon Sep 17 00:00:00 2001
From: Francois Huet <francoishuet@Carta.local>
Date: Mon, 6 Apr 2020 12:47:23 -0700
Subject: [PATCH] Lint and address PDFMiner version impact on tests

---
 camelot/core.py           | 126 +++++++++++++++++++++++---------------
 camelot/parsers/stream.py |  51 +++++++++------
 tests/data.py             |  35 +++++++----
 tests/test_common.py      |  50 ++++++++++++---
 tests/test_plotting.py    |  21 +++++++
 5 files changed, 197 insertions(+), 86 deletions(-)

diff --git a/camelot/core.py b/camelot/core.py
index a0cc079..fe52411 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -17,6 +17,7 @@ TEXTEDGE_REQUIRED_ELEMENTS = 4
 # maximum number of columns over which a header can spread
 MAX_COL_SPREAD_IN_HEADER = 3
 
+
 class TextEdge(object):
     """Defines a text edge coordinates relative to a left-bottom
     origin. (PDF coordinate space)
@@ -64,7 +65,8 @@ class TextEdge(object):
         the is_valid attribute.
         """
         if np.isclose(self.y0, y0, atol=edge_tol):
-            self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
+            self.x = (self.intersections * self.x + x) / \
+                float(self.intersections + 1)
             self.y0 = y0
             self.intersections += 1
             # a textedge is valid only if it extends uninterrupted
@@ -140,26 +142,38 @@ class TextEdges(object):
         """
         intersections_sum = {
             "left": sum(
-                te.intersections for te in self._textedges["left"] if te.is_valid
+                te.intersections for te in self._textedges["left"]
+                if te.is_valid
             ),
             "right": sum(
-                te.intersections for te in self._textedges["right"] if te.is_valid
+                te.intersections for te in self._textedges["right"]
+                if te.is_valid
             ),
             "middle": sum(
-                te.intersections for te in self._textedges["middle"] if te.is_valid
+                te.intersections for te in self._textedges["middle"]
+                if te.is_valid
             ),
         }
 
         # TODO: naive
         # get vertical textedges that intersect maximum number of
         # times with horizontal textlines
-        relevant_align = max(intersections_sum.items(), key=itemgetter(1))[0]
-        return list(filter(lambda te: te.is_valid, self._textedges[relevant_align]))
+        relevant_align = max(
+            intersections_sum.items(),
+            key=itemgetter(1)
+        )[0]
+        return list(filter(
+            lambda te: te.is_valid,
+            self._textedges[relevant_align])
+        )
 
-    def _expand_area_for_header(self, area, textlines, col_anchors, average_row_height):
-        """The core algorithm is based on fairly strict alignment of text. It works
-        ok for the table body, but might fail on tables' headers since they
-        tend to be in a different font, alignment (e.g. vertical), etc.
+    @staticmethod
+    def _expand_area_for_header(area, textlines, col_anchors,
+                                average_row_height):
+        """The core algorithm is based on fairly strict alignment of text.
+        It works ok for the table body, but might fail on tables' headers
+        since they tend to be in a different font, alignment (e.g. vertical),
+        etc.
         The section below tries to identify whether what's above the bbox
         identified so far has the characteristics of a table header:
         Close to the top of the body, with cells that fit within the bounds
@@ -174,10 +188,12 @@ class TextEdges(object):
             crossed by an element covering left to right.
             """
             indexLeft = 0
-            while indexLeft < len(col_anchors) and col_anchors[indexLeft] < left:
+            while indexLeft < len(col_anchors) \
+                    and col_anchors[indexLeft] < left:
                 indexLeft += 1
             indexRight = indexLeft
-            while indexRight < len(col_anchors) and col_anchors[indexRight] < right:
+            while indexRight < len(col_anchors) \
+                    and col_anchors[indexRight] < right:
                 indexRight += 1
 
             return indexRight - indexLeft
@@ -193,14 +209,14 @@ class TextEdges(object):
                 # higher than the table, directly within its bounds
                 if te.y0 > top and te.x0 > left and te.x1 < right:
                     all_above.append(te)
-                    if closest_above == None or closest_above.y0 > te.y0:
+                    if closest_above is None or closest_above.y0 > te.y0:
                         closest_above = te
 
             if closest_above and \
                     closest_above.y0 < top + average_row_height:
-                # b/ We have a candidate cell that is within the correct vertical band,
-                # and directly above the table. Starting from this anchor, we list
-                # all the textlines within the same row.
+                # b/ We have a candidate cell that is within the correct
+                # vertical band, and directly above the table. Starting from
+                # this anchor, we list all the textlines within the same row.
                 tls_in_new_row = []
                 top = closest_above.y1
                 pushed_up = True
@@ -222,18 +238,20 @@ class TextEdges(object):
                                 top = te.y1
                                 pushed_up = True
 
-                # Get the x-ranges for all the textlines, and merge the x-ranges that overlap
+                # Get the x-ranges for all the textlines, and merge the
+                # x-ranges that overlap
                 zones = zones + \
                     list(map(lambda tl: [tl.x0, tl.x1], tls_in_new_row))
                 zones.sort(key=lambda z: z[0])  # Sort by left coordinate
-                # Starting from the right, if two zones overlap horizontally, merge them
+                # Starting from the right, if two zones overlap horizontally,
+                # merge them
                 merged_something = True
                 while merged_something:
                     merged_something = False
                     for i in range(len(zones) - 1, 0, -1):
                         zone_right = zones[i]
                         zone_left = zones[i-1]
-                        if (zone_left[1] >= zone_right[0]):
+                        if zone_left[1] >= zone_right[0]:
                             zone_left[1] = max(zone_right[1], zone_left[1])
                             zones.pop(i)
                             merged_something = True
@@ -248,8 +266,8 @@ class TextEdges(object):
                     )
                 )
                 if max_spread <= MAX_COL_SPREAD_IN_HEADER:
-                    # Combined, the elements we've identified don't cross more than the
-                    # authorized number of columns.
+                    # Combined, the elements we've identified don't cross more
+                    # than the authorized number of columns.
                     # We're trying to avoid
                     # 0: <BAD: Added header spans too broad>
                     # 1: <A1>    <B1>    <C1>    <D1>    <E1>
@@ -257,7 +275,8 @@ class TextEdges(object):
                     # if len(zones) > TEXTEDGE_REQUIRED_ELEMENTS:
                     new_area = (left, bottom, right, top)
 
-                    # At this stage we've identified a plausible row (or beginning of one).
+                    # At this stage we've identified a plausible row (or the
+                    # beginning of one).
                     keep_searching = True
 
         return new_area
@@ -272,26 +291,26 @@ class TextEdges(object):
 
         table_areas = {}
         for te in relevant_textedges:
-                if not table_areas:
+            if not table_areas:
+                table_areas[(te.x, te.y0, te.x, te.y1)] = None
+            else:
+                found = None
+                for area in table_areas:
+                    # check for overlap
+                    if te.y1 >= area[1] and te.y0 <= area[3]:
+                        found = area
+                        break
+                if found is None:
                     table_areas[(te.x, te.y0, te.x, te.y1)] = None
                 else:
-                    found = None
-                    for area in table_areas:
-                        # check for overlap
-                        if te.y1 >= area[1] and te.y0 <= area[3]:
-                            found = area
-                            break
-                    if found is None:
-                        table_areas[(te.x, te.y0, te.x, te.y1)] = None
-                    else:
-                        table_areas.pop(found)
-                        updated_area = (
-                            found[0],
-                            min(te.y0, found[1]),
-                            max(found[2], te.x),
-                            max(found[3], te.y1),
-                        )
-                        table_areas[updated_area] = None
+                    table_areas.pop(found)
+                    updated_area = (
+                        found[0],
+                        min(te.y0, found[1]),
+                        max(found[2], te.x),
+                        max(found[3], te.y1),
+                    )
+                    table_areas[updated_area] = None
 
         # extend table areas based on textlines that overlap
         # vertically. it's possible that these textlines were
@@ -318,8 +337,8 @@ class TextEdges(object):
                 )
                 table_areas[updated_area] = None
 
-        # Apply a heuristic to salvage headers which formatting might be off compared to
-        # the rest of the table.
+        # Apply a heuristic to salvage headers which formatting might be off
+        # compared to the rest of the table.
         average_textline_height = sum_textline_height / \
             float(len(textlines))
 
@@ -398,7 +417,10 @@ class Cell(object):
 
     def __repr__(self):
         return "<Cell x1={} y1={} x2={} y2={}>".format(
-            round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
+            round(self.x1, 2),
+            round(self.y1, 2),
+            round(self.x2, 2),
+            round(self.y2, 2)
         )
 
     @property
@@ -448,7 +470,9 @@ class Table(object):
     def __init__(self, cols, rows):
         self.cols = cols
         self.rows = rows
-        self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
+        self.cells = [
+            [Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows
+        ]
         self.df = None
         self.shape = (0, 0)
         self.accuracy = 0
@@ -685,7 +709,8 @@ class Table(object):
             Output filepath.
 
         """
-        kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
+        kw = {"encoding": "utf-8", "index": False, "header": False,
+              "quoting": 1}
         kw.update(kwargs)
         self.df.to_csv(path, **kw)
 
@@ -798,7 +823,8 @@ class TableList(object):
         ext = kwargs.get("ext")
         for table in self._tables:
             filename = os.path.join(
-                "{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
+                "{}-page-{}-table-{}{}".format(root, table.page, table.order,
+                                               ext)
             )
             filepath = os.path.join(dirname, filename)
             to_format = self._format_func(table, f)
@@ -813,7 +839,10 @@ class TableList(object):
         with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
             for table in self._tables:
                 filename = os.path.join(
-                    "{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
+                    "{}-page-{}-table-{}{}".format(root,
+                                                   table.page,
+                                                   table.order,
+                                                   ext)
                 )
                 filepath = os.path.join(dirname, filename)
                 z.write(filepath, os.path.basename(filepath))
@@ -848,7 +877,8 @@ class TableList(object):
             writer = pd.ExcelWriter(filepath)
             for table in self._tables:
                 sheet_name = "page-{}-table-{}".format(table.page, table.order)
-                table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
+                table.df.to_excel(writer, sheet_name=sheet_name,
+                                  encoding="utf-8")
             writer.save()
             if compress:
                 zipname = os.path.join(os.path.dirname(path), root) + ".zip"
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 33e3692..c939c8f 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -10,7 +10,8 @@ import pandas as pd
 
 from .base import BaseParser
 from ..core import TextEdges, Table
-from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
+from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
+                     compute_whitespace)
 
 
 logger = logging.getLogger("camelot")
@@ -124,8 +125,8 @@ class Stream(BaseParser):
         temp = []
         for t in text:
             # is checking for upright necessary?
-            # if t.get_text().strip() and all([obj.upright for obj in t._objs if
-            # type(obj) is LTChar]):
+            # if t.get_text().strip() and all([obj.upright for obj in t._objs
+            # if type(obj) is LTChar]):
             if t.get_text().strip():
                 if not np.isclose(row_y, t.y0, atol=row_tol):
                     rows.append(sorted(temp, key=lambda t: t.x0))
@@ -170,7 +171,8 @@ class Stream(BaseParser):
                         merged.append(higher)
                 elif column_tol < 0:
                     if higher[0] <= lower[1]:
-                        if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
+                        if np.isclose(higher[0], lower[1],
+                                      atol=abs(column_tol)):
                             merged.append(higher)
                         else:
                             upper_bound = max(lower[1], higher[1])
@@ -200,8 +202,8 @@ class Stream(BaseParser):
         """
         row_boundaries = [
             [
-                max([t.y1 for t in r]),
-                min([t.y0 for t in r])
+                max(t.y1 for t in r),
+                min(t.y0 for t in r)
             ]
             for r in rows_grouped
         ]
@@ -236,7 +238,9 @@ class Stream(BaseParser):
             text = Stream._group_rows(text, row_tol=row_tol)
             elements = [len(r) for r in text]
             new_cols = [
-                (t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
+                (t.x0, t.x1)
+                for r in text if len(r) == max(elements)
+                for t in r
             ]
             cols.extend(Stream._merge_columns(sorted(new_cols)))
         return cols
@@ -268,7 +272,8 @@ class Stream(BaseParser):
     def _validate_columns(self):
         if self.table_areas is not None and self.columns is not None:
             if len(self.table_areas) != len(self.columns):
-                raise ValueError("Length of table_areas and columns" " should be equal")
+                raise ValueError("Length of table_areas and columns"
+                                 " should be equal")
 
     def _nurminen_table_detection(self, textlines):
         """A general implementation of the table detection algorithm
@@ -290,7 +295,7 @@ class Stream(BaseParser):
         # guess table areas using textlines and relevant edges
         table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
         # treat whole page as table area if no table areas found
-        if not len(table_bbox):
+        if not table_bbox:
             table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
 
         return table_bbox
@@ -339,7 +344,8 @@ class Stream(BaseParser):
 
         self.t_bbox = t_bbox
 
-        text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
+        text_x_min, text_y_min, text_x_max, text_y_max = \
+            self._text_bbox(self.t_bbox)
         rows_grouped = self._group_rows(t_bbox_all, row_tol=self.row_tol)
         rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
         elements = [len(r) for r in rows_grouped]
@@ -365,14 +371,19 @@ class Stream(BaseParser):
                 # see if the list contains elements, if yes, then use
                 # the mode after removing 1s
                 elements = list(filter(lambda x: x != 1, elements))
-                if len(elements):
+                if elements:
                     ncols = max(set(elements), key=elements.count)
                 else:
                     warnings.warn(
-                        "No tables found in table area {}".format(table_idx + 1)
+                        "No tables found in table area {}"
+                        .format(table_idx + 1)
                     )
-            cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
-            cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
+            cols = [
+                (t.x0, t.x1) for r in rows_grouped if len(r) == ncols
+                for t in r
+            ]
+            cols = self._merge_columns(sorted(cols),
+                                       column_tol=self.column_tol)
             inner_text = []
             for i in range(1, len(cols)):
                 left = cols[i - 1][1]
@@ -442,20 +453,24 @@ class Stream(BaseParser):
 
         return table
 
-    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
+    def extract_tables(self, filename, suppress_stdout=False,
+                       layout_kwargs={}):
         self._generate_layout(filename, layout_kwargs)
         if not suppress_stdout:
-            logger.info("Processing {}".format(os.path.basename(self.rootname)))
+            logger.info("Processing {}".format(
+                os.path.basename(self.rootname)))
 
         if not self.horizontal_text:
             if self.images:
                 warnings.warn(
                     "{} is image-based, camelot only works on"
-                    " text-based pages.".format(os.path.basename(self.rootname))
+                    " text-based pages.".format(
+                        os.path.basename(self.rootname))
                 )
             else:
                 warnings.warn(
-                    "No tables found on {}".format(os.path.basename(self.rootname))
+                    "No tables found on {}".format(
+                        os.path.basename(self.rootname))
                 )
             return []
 
diff --git a/tests/data.py b/tests/data.py
index f11aba4..889f98a 100755
--- a/tests/data.py
+++ b/tests/data.py
@@ -2742,21 +2742,28 @@ data_stream_vertical_headers = [
         '', 'Daniel G. Gauthier', 'Craig M. Clemens', 'Craig Johnston',
         'Carolyn Brummund', 'Adam Brege', 'David Bielusiak', ''],
     ['Alcona', '963', '439', '55', '26', '47', '164', '173', '111', '', '268',
-        '', '272', '275', '269', '', '271', '', '224', '76', '', '', '', '', ''],
-    ['Caledonia', '923', '393', '40', '23', '45', '158', '150', '103', '', '244',
-        '', '247', '254', '255', '', '244', '', '139', '143', '', '', '', '', ''],
+        '', '272', '275', '269', '', '271', '', '224', '76', '', '', '', '',
+        ''],
+    ['Caledonia', '923', '393', '40', '23', '45', '158', '150', '103', '',
+        '244', '', '247', '254', '255', '', '244', '', '139', '143', '', '',
+        '', '', ''],
     ['Curtis', '1026', '349', '30', '30', '25', '102', '95', '84', '', '159',
         '', '164', '162', '161', '', '157', '', '', '', '', '', '', '', ''],
-    ['Greenbush', '1212', '423', '56', '26', '40', '126', '104', '131', '', '208',
-        '', '213', '214', '215', '', '208', '', '', '', '', '208', '', '', ''],
+    ['Greenbush', '1212', '423', '56', '26', '40', '126', '104', '131', '',
+        '208', '', '213', '214', '215', '', '208', '', '', '', '', '208', '',
+        '', ''],
     ['Gustin', '611', '180', '22', '35', '17', '55', '73', '45', '', '108',
-        '', '104', '111', '111', '', '109', '', '', '', '', '', '81', '42', ''],
-    ['Harrisville', '1142', '430', '45', '90', '29', '101', '155', '94', '', '226',
-        '', '226', '232', '244', '', '226', '', '', '', '232', '', '', '', ''],
+        '', '104', '111', '111', '', '109', '', '', '', '', '', '81', '42',
+        ''],
+    ['Harrisville', '1142', '430', '45', '90', '29', '101', '155', '94', '',
+        '226', '', '226', '232', '244', '', '226', '', '', '', '232', '', '',
+        '', ''],
     ['Hawes', '884', '293', '38', '36', '27', '109', '121', '84', '', '192',
-        '', '195', '195', '193', '', '184', '', '', '', '', '', '118', '87', ''],
+        '', '195', '195', '193', '', '184', '', '', '', '', '', '118', '87',
+        ''],
     ['Haynes', '626', '275', '31', '20', '32', '104', '121', '53', '', '163',
-        '', '163', '173', '161', '', '152', '', '', '', '76', '', '69', '31', ''],
+        '', '163', '173', '161', '', '152', '', '', '', '76', '', '69', '31',
+        ''],
     ['Mikado', '781', '208', '19', '39', '17', '81', '90', '63', '', '149',
         '', '149', '145', '147', '', '143', '', '', '', '', '113', '', '', ''],
     ['Millen', '353', '139', '7', '16', '13', '38', '49', '19', '', '62',
@@ -2764,7 +2771,9 @@ data_stream_vertical_headers = [
     ['Mitchell', '327', '96', '12', '17', '7', '29', '41', '17', '', '57',
         '', '55', '57', '60', '', '56', '', '', '', '', '', '', '', ''],
     ['City Harrisville', '389', '171', '16', '15', '18', '35', '49', '31', '',
-        '78', '', '80', '82', '81', '', '77', '', '', '', '73', '', '', '', ''],
-    ['Totals', '9237', '3396', '371', '373', '317', '1102', '1221', '835', '0', '1914', '0',
-        '1934', '1967', '1963', '0', '1889', '0', '363', '219', '381', '321', '268', '160', '0']
+        '78', '', '80', '82', '81', '', '77', '', '', '', '73', '', '', '',
+        ''],
+    ['Totals', '9237', '3396', '371', '373', '317', '1102', '1221', '835', '0',
+        '1914', '0', '1934', '1967', '1963', '0', '1889', '0', '363', '219',
+        '381', '321', '268', '160', '0']
 ]
diff --git a/tests/test_common.py b/tests/test_common.py
index 468a1f5..120e4bd 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -2,6 +2,8 @@
 
 import os
 
+import pytest
+
 import pandas as pd
 from pandas.testing import assert_frame_equal
 
@@ -11,12 +13,30 @@ from camelot.__version__ import generate_version
 
 from .data import *
 
+import pdfminer
+
+# The version of PDFMiner has an impact on some of the tests.  Unfortunately,
+# we can't enforce usage of a recent version of PDFMiner without dropping
+# support for Python 2.
+# To check the version of pdfminer.six installed:
+#   pip freeze | grep pdfminer.six
+# To force upgrade:
+#   pip install --upgrade --force-reinstall pdfminer.six
+# To force usage of a Python 2 compatible version:
+#   pip install "pdfminer.six==20191110"
+# This condition can be removed in favor of a version requirement bump for
+# pdfminer.six once support for Python 2 is dropped.
+
+LEGACY_PDF_MINER = pdfminer.__version__ < "20200402"
+
 testdir = os.path.dirname(os.path.abspath(__file__))
 testdir = os.path.join(testdir, "files")
 
 
 def test_parsing_report():
-    parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
+    parsing_report = {
+        "accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1
+    }
 
     filename = os.path.join(testdir, "foo.pdf")
     tables = camelot.read_pdf(filename)
@@ -64,6 +84,8 @@ def test_stream_table_rotated():
     assert_frame_equal(df, result_without_first_row)
 
 
+@pytest.mark.skipif(LEGACY_PDF_MINER,
+                    reason="depends on a recent version of PDFMiner")
 def test_stream_two_tables():
     df1 = pd.DataFrame(data_stream_two_tables_1)
     df2 = pd.DataFrame(data_stream_two_tables_2)
@@ -106,6 +128,8 @@ def test_stream_columns():
     assert_frame_equal(df, tables[0].df)
 
 
+@pytest.mark.skipif(LEGACY_PDF_MINER,
+                    reason="depends on a recent version of PDFMiner")
 def test_stream_split_text():
     df = pd.DataFrame(data_stream_split_text)
 
@@ -143,6 +167,8 @@ def test_stream_edge_tol():
     assert_frame_equal(df, tables[0].df)
 
 
+@pytest.mark.skipif(LEGACY_PDF_MINER,
+                    reason="depends on a recent version of PDFMiner")
 def test_stream_layout_kwargs():
     df = pd.DataFrame(data_stream_layout_kwargs)
 
@@ -248,7 +274,8 @@ def test_repr():
     assert repr(tables) == "<TableList n=1>"
     assert repr(tables[0]) == "<Table shape=(7, 7)>"
     assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0])
+        == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
     )
 
 
@@ -258,21 +285,24 @@ def test_pages():
     assert repr(tables) == "<TableList n=1>"
     assert repr(tables[0]) == "<Table shape=(7, 7)>"
     assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0])
+        == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
     )
 
     tables = camelot.read_pdf(url, pages="1-end")
     assert repr(tables) == "<TableList n=1>"
     assert repr(tables[0]) == "<Table shape=(7, 7)>"
     assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0])
+        == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
     )
 
     tables = camelot.read_pdf(url, pages="all")
     assert repr(tables) == "<TableList n=1>"
     assert repr(tables[0]) == "<Table shape=(7, 7)>"
     assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0])
+        == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
     )
 
 
@@ -282,7 +312,8 @@ def test_url():
     assert repr(tables) == "<TableList n=1>"
     assert repr(tables[0]) == "<Table shape=(7, 7)>"
     assert (
-        repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
+        repr(tables[0].cells[0][0])
+        == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
     )
 
 
@@ -302,7 +333,12 @@ def test_table_order():
         return t
 
     table_list = TableList(
-        [_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
+        [
+            _make_table(2, 1),
+            _make_table(1, 1),
+            _make_table(3, 4),
+            _make_table(1, 2)
+        ]
     )
 
     assert [(t.page, t.order) for t in sorted(table_list)] == [
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index f267e29..7646894 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -4,13 +4,30 @@ import os
 
 import pytest
 
+import pdfminer
+
 import camelot
 
+# The version of PDFMiner has an impact on some of the tests.  Unfortunately,
+# we can't enforce usage of a recent version of PDFMiner without dropping
+# support for Python 2.
+# To check the version of pdfminer.six installed:
+#   pip freeze | grep pdfminer.six
+# To force upgrade:
+#   pip install --upgrade --force-reinstall pdfminer.six
+# To force usage of a Python 2 compatible version:
+#   pip install "pdfminer.six==20191110"
+# This condition can be removed in favor of a version requirement bump for
+# pdfminer.six once support for Python 2 is dropped.
+
+LEGACY_PDF_MINER = pdfminer.__version__ < "20200402"
 
 testdir = os.path.dirname(os.path.abspath(__file__))
 testdir = os.path.join(testdir, "files")
 
 
+@pytest.mark.skipif(LEGACY_PDF_MINER,
+                    reason="depends on a recent version of PDFMiner")
 @pytest.mark.mpl_image_compare(
     baseline_dir="files/baseline_plots", remove_text=True)
 def test_text_plot():
@@ -35,6 +52,8 @@ def test_lattice_contour_plot():
     return camelot.plot(tables[0], kind='contour')
 
 
+@pytest.mark.skipif(LEGACY_PDF_MINER,
+                    reason="depends on a recent version of PDFMiner")
 @pytest.mark.mpl_image_compare(
     baseline_dir="files/baseline_plots", remove_text=True)
 def test_stream_contour_plot():
@@ -59,6 +78,8 @@ def test_joint_plot():
     return camelot.plot(tables[0], kind='joint')
 
 
+@pytest.mark.skipif(LEGACY_PDF_MINER,
+                    reason="depends on a recent version of PDFMiner")
 @pytest.mark.mpl_image_compare(
     baseline_dir="files/baseline_plots", remove_text=True)
 def test_textedge_plot():