From e1572a10c908dfa411bc204fde3d69867456cf18 Mon Sep 17 00:00:00 2001
From: Frh <francois.huet+github@gmail.com>
Date: Sat, 25 Apr 2020 22:47:23 -0700
Subject: [PATCH] Linting

---
 camelot/cli.py             |  2 +-
 camelot/core.py            | 10 ++---
 camelot/parsers/base.py    | 75 +++++++++++++++++++++--------------
 camelot/parsers/hybrid.py  | 81 ++++++++++++++++++--------------------
 camelot/parsers/lattice.py | 35 ++++++++--------
 camelot/parsers/stream.py  | 23 ++++++-----
 camelot/plotting.py        |  2 +-
 camelot/utils.py           | 37 +++++++++--------
 8 files changed, 141 insertions(+), 124 deletions(-)

diff --git a/camelot/cli.py b/camelot/cli.py
index e276f01..1e85b30 100644
--- a/camelot/cli.py
+++ b/camelot/cli.py
@@ -18,7 +18,7 @@ logger = logging.getLogger("camelot")
 logger.setLevel(logging.INFO)
 
 
-class Config(object):
+class Config():
     def __init__(self):
         self.config = {}
 
diff --git a/camelot/core.py b/camelot/core.py
index 9263628..0cd7fa6 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -31,7 +31,7 @@ VERTICAL_ALIGNMENTS = ["top", "bottom", "center"]
 ALL_ALIGNMENTS = HORIZONTAL_ALIGNMENTS + VERTICAL_ALIGNMENTS
 
 
-class TextAlignment(object):
+class TextAlignment():
     """Represents a list of textlines sharing an alignment on a coordinate.
 
     The alignment can be left/right/middle or top/bottom/center.
@@ -137,7 +137,7 @@ class TextEdge(TextAlignment):
                 self.is_valid = True
 
 
-class TextAlignments(object):
+class TextAlignments():
     """Defines a dict of text edges across reference alignments.
     """
 
@@ -327,7 +327,7 @@ class TextEdges(TextAlignments):
         return table_areas_padded
 
 
-class Cell(object):
+class Cell():
     """Defines a cell in a table with coordinates relative to a
     left-bottom origin. (PDF coordinate space)
 
@@ -409,7 +409,7 @@ class Cell(object):
         return self.top + self.bottom + self.left + self.right
 
 
-class Table(object):
+class Table():
     """Defines a table with coordinates relative to a left-bottom
     origin. (PDF coordinate space)
 
@@ -815,7 +815,7 @@ class Table(object):
         return self
 
 
-class TableList(object):
+class TableList():
     """Defines a list of camelot.core.Table objects. Each table can
     be accessed using its index.
 
diff --git a/camelot/parsers/base.py b/camelot/parsers/base.py
index 6816b62..4c18d77 100644
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@@ -19,24 +19,24 @@ from ..utils import (
 from ..core import Table
 
 
-class BaseParser(object):
+class BaseParser():
     """Defines a base parser.
     """
     def __init__(
-        self,
-        parser_id,
-        table_regions=None,
-        table_areas=None,
-        copy_text=None,
-        split_text=False,
-        strip_text="",
-        shift_text=None,
-        flag_size=False,
-        debug=False
-    ):
+            self,
+            parser_id,
+            table_regions=None,
+            table_areas=None,
+            copy_text=None,
+            split_text=False,
+            strip_text="",
+            shift_text=None,
+            flag_size=False,
+            debug=False):
         self.id = parser_id
         self.table_regions = table_regions
         self.table_areas = table_areas
+        self.table_bbox = {}
 
         self.copy_text = copy_text
         self.split_text = split_text
@@ -49,7 +49,9 @@ class BaseParser(object):
         self.t_bbox = None
 
         # For plotting details of parsing algorithms
-        self.parse_details = {} if debug else None
+        self.parse_details = {}
+        if not debug:
+            self.parse_details = None
 
     def prepare_page_parse(self, filename, layout, dimensions,
                            page_idx, layout_kwargs):
@@ -177,6 +179,18 @@ class BaseParser(object):
                         table.cells[r_idx][c_idx].text = text
         return pos_errors
 
+    def _generate_columns_and_rows(self, bbox, table_idx):
+        # Pure virtual, must be defined by the derived parser
+        raise NotImplementedError()
+
+    def _generate_table(self, table_idx, cols, rows, **kwargs):
+        # Pure virtual, must be defined by the derived parser
+        raise NotImplementedError()
+
+    def _generate_table_bbox(self):
+        # Pure virtual, must be defined by the derived parser
+        raise NotImplementedError()
+
     def extract_tables(self):
         if self._document_has_no_text():
             return []
@@ -188,8 +202,12 @@ class BaseParser(object):
         _tables = []
         # sort tables based on y-coord
         for table_idx, bbox in enumerate(
-            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
-        ):
+                sorted(
+                        self.table_bbox.keys(),
+                        key=lambda x: x[1],
+                        reverse=True
+                    )
+                ):
             cols, rows, v_s, h_s = self._generate_columns_and_rows(
                 bbox,
                 table_idx
@@ -232,20 +250,19 @@ class TextBaseParser(BaseParser):
     """
 
     def __init__(
-        self,
-        parser_id,
-        table_regions=None,
-        table_areas=None,
-        columns=None,
-        flag_size=False,
-        split_text=False,
-        strip_text="",
-        edge_tol=50,
-        row_tol=2,
-        column_tol=0,
-        debug=False,
-        **kwargs
-    ):
+            self,
+            parser_id,
+            table_regions=None,
+            table_areas=None,
+            columns=None,
+            flag_size=False,
+            split_text=False,
+            strip_text="",
+            edge_tol=50,
+            row_tol=2,
+            column_tol=0,
+            debug=False,
+            **kwargs):
         super().__init__(
             parser_id,
             table_regions=table_regions,
diff --git a/camelot/parsers/hybrid.py b/camelot/parsers/hybrid.py
index 6c399ef..bff0d58 100644
--- a/camelot/parsers/hybrid.py
+++ b/camelot/parsers/hybrid.py
@@ -3,9 +3,9 @@
 
 from __future__ import division
 
-import numpy as np
 import copy
 import math
+import numpy as np
 
 from .base import TextBaseParser
 from ..core import (
@@ -16,6 +16,7 @@ from ..core import (
 )
 from ..utils import (
     bbox_from_str,
+    expand_bbox_with_textline,
     text_in_bbox,
     bbox_from_textlines,
     distance_tl_to_bbox,
@@ -25,6 +26,23 @@ from ..utils import (
 # maximum number of columns over which a header can spread
 MAX_COL_SPREAD_IN_HEADER = 3
 
+# Minimum number of textlines in a table
+MINIMUM_TEXTLINES_IN_TABLE = 6
+
+
+def column_spread(left, right, col_anchors):
+    """Get the number of columns crossed by a segment [left, right]."""
+    index_left = 0
+    while index_left < len(col_anchors) \
+            and col_anchors[index_left] < left:
+        index_left += 1
+    index_right = index_left
+    while index_right < len(col_anchors) \
+            and col_anchors[index_right] < right:
+        index_right += 1
+
+    return index_right - index_left
+
 
 def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
     """Expand a bbox vertically up by looking for plausible headers.
@@ -40,19 +58,6 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
     (left, bottom, right, top) = body_bbox
     zones = []
 
-    def column_spread(left, right, col_anchors):
-        """Get the number of columns crossed by a segment [left, right]."""
-        indexLeft = 0
-        while indexLeft < len(col_anchors) \
-                and col_anchors[indexLeft] < left:
-            indexLeft += 1
-        indexRight = indexLeft
-        while indexRight < len(col_anchors) \
-                and col_anchors[indexRight] < right:
-            indexRight += 1
-
-        return indexRight - indexLeft
-
     keep_searching = True
     while keep_searching:
         keep_searching = False
@@ -127,9 +132,8 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
             # columns.
             # This is to avoid picking unrelated paragraphs.
             if max_spread <= min(
-                MAX_COL_SPREAD_IN_HEADER,
-                math.ceil(len(col_anchors) / 2)
-            ):
+                    MAX_COL_SPREAD_IN_HEADER,
+                    math.ceil(len(col_anchors) / 2)):
                 # Combined, the elements we've identified don't cross more
                 # than the authorized number of columns.
                 # We're trying to avoid
@@ -145,7 +149,7 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
     return new_bbox
 
 
-class AlignmentCounter(object):
+class AlignmentCounter():
     """
     For a given textline, represent all other textlines aligned with it.
 
@@ -260,7 +264,7 @@ class TextNetworks(TextAlignments):
         removed_singletons = True
         while removed_singletons:
             removed_singletons = False
-            for alignment_id, textalignments in self._text_alignments.items():
+            for textalignments in self._text_alignments.values():
                 # For each alignment edge, remove items if they are singletons
                 # either horizontally or vertically
                 for ta in textalignments:
@@ -283,7 +287,7 @@ class TextNetworks(TextAlignments):
         return max(
             self._textline_to_alignments.keys(),
             key=lambda textline:
-                self._textline_to_alignments[textline].alignment_score(),
+            self._textline_to_alignments[textline].alignment_score(),
             default=None
         )
 
@@ -308,8 +312,8 @@ class TextNetworks(TextAlignments):
         # Retrieve the list of textlines it's aligned with, across both
         # axis
         best_alignment = self._textline_to_alignments[most_aligned_tl]
-        ref_h_alignment_id, ref_h_textlines = best_alignment.max_h()
-        ref_v_alignment_id, ref_v_textlines = best_alignment.max_v()
+        __, ref_h_textlines = best_alignment.max_h()
+        __, ref_v_textlines = best_alignment.max_v()
         if len(ref_v_textlines) <= 1 or len(ref_h_textlines) <= 1:
             return None
 
@@ -375,7 +379,6 @@ class TextNetworks(TextAlignments):
         else:
             parse_details_search = None
 
-        MINIMUM_TEXTLINES_IN_TABLE = 6
         bbox = (most_aligned_tl.x0, most_aligned_tl.y0,
                 most_aligned_tl.x1, most_aligned_tl.y1)
 
@@ -402,12 +405,7 @@ class TextNetworks(TextAlignments):
                 # if the textline is close.
                 if h_distance < max_h_gap and v_distance < max_v_gap:
                     tls_in_bbox.append(tl)
-                    bbox = (
-                        min(bbox[0], tl.x0),
-                        min(bbox[1], tl.y0),
-                        max(bbox[2], tl.x1),
-                        max(bbox[3], tl.y1)
-                    )
+                    bbox = expand_bbox_with_textline(bbox, tl)
                     del tls_search_space[i]
         if len(tls_in_bbox) > MINIMUM_TEXTLINES_IN_TABLE:
             return bbox
@@ -461,19 +459,18 @@ class Hybrid(TextBaseParser):
     """
 
     def __init__(
-        self,
-        table_regions=None,
-        table_areas=None,
-        columns=None,
-        flag_size=False,
-        split_text=False,
-        strip_text="",
-        edge_tol=None,
-        row_tol=2,
-        column_tol=0,
-        debug=False,
-        **kwargs
-    ):
+            self,
+            table_regions=None,
+            table_areas=None,
+            columns=None,
+            flag_size=False,
+            split_text=False,
+            strip_text="",
+            edge_tol=None,
+            row_tol=2,
+            column_tol=0,
+            debug=False,
+            **kwargs):
         super().__init__(
             "hybrid",
             table_regions=table_regions,
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index b8b82ed..84ce5a2 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -84,24 +84,23 @@ class Lattice(BaseParser):
     """
 
     def __init__(
-        self,
-        table_regions=None,
-        table_areas=None,
-        process_background=False,
-        line_scale=15,
-        copy_text=None,
-        shift_text=None,
-        split_text=False,
-        flag_size=False,
-        strip_text="",
-        line_tol=2,
-        joint_tol=2,
-        threshold_blocksize=15,
-        threshold_constant=-2,
-        iterations=0,
-        resolution=300,
-        **kwargs
-    ):
+            self,
+            table_regions=None,
+            table_areas=None,
+            process_background=False,
+            line_scale=15,
+            copy_text=None,
+            shift_text=None,
+            split_text=False,
+            flag_size=False,
+            strip_text="",
+            line_tol=2,
+            joint_tol=2,
+            threshold_blocksize=15,
+            threshold_constant=-2,
+            iterations=0,
+            resolution=300,
+            **kwargs):
         super().__init__(
             "lattice",
             table_regions=table_regions,
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 8b72e09..988490f 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -50,18 +50,17 @@ class Stream(TextBaseParser):
     """
 
     def __init__(
-        self,
-        table_regions=None,
-        table_areas=None,
-        columns=None,
-        flag_size=False,
-        split_text=False,
-        strip_text="",
-        edge_tol=50,
-        row_tol=2,
-        column_tol=0,
-        **kwargs
-    ):
+            self,
+            table_regions=None,
+            table_areas=None,
+            columns=None,
+            flag_size=False,
+            split_text=False,
+            strip_text="",
+            edge_tol=50,
+            row_tol=2,
+            column_tol=0,
+            **kwargs):
         super().__init__(
             "stream",
             table_regions=table_regions,
diff --git a/camelot/plotting.py b/camelot/plotting.py
index 12ba457..d3d7064 100644
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@@ -136,7 +136,7 @@ def prepare_plot(table, ax=None, to_pdf_scale=True):
     return ax
 
 
-class PlotMethods(object):
+class PlotMethods():
     def __call__(self, table, kind="text", filename=None, ax=None):
         """Plot elements found on PDF page based on kind
         specified, useful for debugging and playing with different
diff --git a/camelot/utils.py b/camelot/utils.py
index a675580..cf85eb1 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -156,7 +156,7 @@ def remove_extra(kwargs, flavor="lattice"):
 
 # https://stackoverflow.com/a/22726782
 # and https://stackoverflow.com/questions/10965479
-class TemporaryDirectory(object):
+class TemporaryDirectory():
     def __enter__(self):
         self.name = tempfile.mkdtemp()
         # Only delete the temporary directory upon
@@ -488,6 +488,17 @@ def text_in_bbox_per_axis(bbox, horizontal_text, vertical_text):
     return t_bbox
 
 
+def expand_bbox_with_textline(bbox, textline):
+    """Expand (if needed) a bbox so that it fits the parameter textline.
+    """
+    return (
+        min(bbox[0], textline.x0),
+        min(bbox[1], textline.y0),
+        max(bbox[2], textline.x1),
+        max(bbox[3], textline.y1)
+    )
+
+
 def bbox_from_textlines(textlines):
     """Returns the smallest bbox containing all the text objects passed as
     a parameters.
@@ -514,12 +525,7 @@ def bbox_from_textlines(textlines):
     )
 
     for tl in textlines[1:]:
-        bbox = (
-            min(bbox[0], tl.x0),
-            min(bbox[1], tl.y0),
-            max(bbox[2], tl.x1),
-            max(bbox[3], tl.y1)
-        )
+        bbox = expand_bbox_with_textline(bbox, tl)
     return bbox
 
 
@@ -1039,13 +1045,12 @@ def compute_whitespace(d):
 
 
 def get_page_layout(
-    filename,
-    char_margin=1.0,
-    line_margin=0.5,
-    word_margin=0.1,
-    detect_vertical=True,
-    all_texts=True,
-):
+        filename,
+        char_margin=1.0,
+        line_margin=0.5,
+        word_margin=0.1,
+        detect_vertical=True,
+        all_texts=True):
     """Returns a PDFMiner LTPage object and page dimension of a single
     page pdf. See https://euske.github.io/pdfminer/ to get definitions
     of kwargs.
@@ -1163,14 +1168,14 @@ def compare_tables(left, right):
     diff_cols = right.shape[1]-left.shape[1]
     diff_rows = right.shape[0]-left.shape[0]
     differences = []
-    if (diff_rows):
+    if diff_rows:
         differences.append(
             "{diff_rows} {more_fewer} rows".format(
                 diff_rows=abs(diff_rows),
                 more_fewer='more' if diff_rows > 0 else 'fewer'
             )
         )
-    if (diff_cols):
+    if diff_cols:
         differences.append(
             "{diff_cols} {more_fewer} columns".format(
                 diff_cols=abs(diff_cols),