Linting

2020-04-25 22:47:23 -07:00 · 2020-04-25 22:47:23 -07:00 · e1572a10c9
parent f7aafcd05c
commit e1572a10c9
8 changed files with 141 additions and 124 deletions
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -18,7 +18,7 @@ logger = logging.getLogger("camelot")
 logger.setLevel(logging.INFO)
-class Config(object):
+class Config():
    def __init__(self):
        self.config = {}
--- a/camelot/core.py
+++ b/camelot/core.py
@ -31,7 +31,7 @@ VERTICAL_ALIGNMENTS = ["top", "bottom", "center"]
 ALL_ALIGNMENTS = HORIZONTAL_ALIGNMENTS + VERTICAL_ALIGNMENTS
-class TextAlignment(object):
+class TextAlignment():
    """Represents a list of textlines sharing an alignment on a coordinate.
    The alignment can be left/right/middle or top/bottom/center.
@ -137,7 +137,7 @@ class TextEdge(TextAlignment):
                self.is_valid = True
-class TextAlignments(object):
+class TextAlignments():
    """Defines a dict of text edges across reference alignments.
    """
@ -327,7 +327,7 @@ class TextEdges(TextAlignments):
        return table_areas_padded
-class Cell(object):
+class Cell():
    """Defines a cell in a table with coordinates relative to a
    left-bottom origin. (PDF coordinate space)
@ -409,7 +409,7 @@ class Cell(object):
        return self.top + self.bottom + self.left + self.right
-class Table(object):
+class Table():
    """Defines a table with coordinates relative to a left-bottom
    origin. (PDF coordinate space)
@ -815,7 +815,7 @@ class Table(object):
        return self
-class TableList(object):
+class TableList():
    """Defines a list of camelot.core.Table objects. Each table can
    be accessed using its index.
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -19,7 +19,7 @@ from ..utils import (
 from ..core import Table
-class BaseParser(object):
+class BaseParser():
    """Defines a base parser.
    """
    def __init__(
@ -32,11 +32,11 @@ class BaseParser(object):
            strip_text="",
            shift_text=None,
            flag_size=False,
-        debug=False
+            debug=False):
    ):
        self.id = parser_id
        self.table_regions = table_regions
        self.table_areas = table_areas
        self.table_bbox = {}
        self.copy_text = copy_text
        self.split_text = split_text
@ -49,7 +49,9 @@ class BaseParser(object):
        self.t_bbox = None
        # For plotting details of parsing algorithms
-        self.parse_details = {} if debug else None
+        self.parse_details = {}
        if not debug:
            self.parse_details = None
    def prepare_page_parse(self, filename, layout, dimensions,
                           page_idx, layout_kwargs):
@ -177,6 +179,18 @@ class BaseParser(object):
                        table.cells[r_idx][c_idx].text = text
        return pos_errors
    def _generate_columns_and_rows(self, bbox, table_idx):
        # Pure virtual, must be defined by the derived parser
        raise NotImplementedError()
    def _generate_table(self, table_idx, cols, rows, **kwargs):
        # Pure virtual, must be defined by the derived parser
        raise NotImplementedError()
    def _generate_table_bbox(self):
        # Pure virtual, must be defined by the derived parser
        raise NotImplementedError()
    def extract_tables(self):
        if self._document_has_no_text():
            return []
@ -188,7 +202,11 @@ class BaseParser(object):
        _tables = []
        # sort tables based on y-coord
        for table_idx, bbox in enumerate(
-            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+                sorted(
                        self.table_bbox.keys(),
                        key=lambda x: x[1],
                        reverse=True
                    )
                ):
            cols, rows, v_s, h_s = self._generate_columns_and_rows(
                bbox,
@ -244,8 +262,7 @@ class TextBaseParser(BaseParser):
            row_tol=2,
            column_tol=0,
            debug=False,
-        **kwargs
+            **kwargs):
    ):
        super().__init__(
            parser_id,
            table_regions=table_regions,
--- a/camelot/parsers/hybrid.py
+++ b/camelot/parsers/hybrid.py
@ -3,9 +3,9 @@
 from __future__ import division
 import numpy as np
 import copy
 import math
 import numpy as np
 from .base import TextBaseParser
 from ..core import (
@ -16,6 +16,7 @@ from ..core import (
 )
 from ..utils import (
    bbox_from_str,
    expand_bbox_with_textline,
    text_in_bbox,
    bbox_from_textlines,
    distance_tl_to_bbox,
@ -25,6 +26,23 @@ from ..utils import (
 # maximum number of columns over which a header can spread
 MAX_COL_SPREAD_IN_HEADER = 3
 # Minimum number of textlines in a table
 MINIMUM_TEXTLINES_IN_TABLE = 6
 def column_spread(left, right, col_anchors):
    """Get the number of columns crossed by a segment [left, right]."""
    index_left = 0
    while index_left < len(col_anchors) \
            and col_anchors[index_left] < left:
        index_left += 1
    index_right = index_left
    while index_right < len(col_anchors) \
            and col_anchors[index_right] < right:
        index_right += 1
    return index_right - index_left
 def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
    """Expand a bbox vertically up by looking for plausible headers.
@ -40,19 +58,6 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
    (left, bottom, right, top) = body_bbox
    zones = []
    def column_spread(left, right, col_anchors):
        """Get the number of columns crossed by a segment [left, right]."""
        indexLeft = 0
        while indexLeft < len(col_anchors) \
                and col_anchors[indexLeft] < left:
            indexLeft += 1
        indexRight = indexLeft
        while indexRight < len(col_anchors) \
                and col_anchors[indexRight] < right:
            indexRight += 1
        return indexRight - indexLeft
    keep_searching = True
    while keep_searching:
        keep_searching = False
@ -128,8 +133,7 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
            # This is to avoid picking unrelated paragraphs.
            if max_spread <= min(
                    MAX_COL_SPREAD_IN_HEADER,
-                math.ceil(len(col_anchors) / 2)
+                    math.ceil(len(col_anchors) / 2)):
            ):
                # Combined, the elements we've identified don't cross more
                # than the authorized number of columns.
                # We're trying to avoid
@ -145,7 +149,7 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):
    return new_bbox
-class AlignmentCounter(object):
+class AlignmentCounter():
    """
    For a given textline, represent all other textlines aligned with it.
@ -260,7 +264,7 @@ class TextNetworks(TextAlignments):
        removed_singletons = True
        while removed_singletons:
            removed_singletons = False
-            for alignment_id, textalignments in self._text_alignments.items():
+            for textalignments in self._text_alignments.values():
                # For each alignment edge, remove items if they are singletons
                # either horizontally or vertically
                for ta in textalignments:
@ -308,8 +312,8 @@ class TextNetworks(TextAlignments):
        # Retrieve the list of textlines it's aligned with, across both
        # axis
        best_alignment = self._textline_to_alignments[most_aligned_tl]
-        ref_h_alignment_id, ref_h_textlines = best_alignment.max_h()
+        __, ref_h_textlines = best_alignment.max_h()
-        ref_v_alignment_id, ref_v_textlines = best_alignment.max_v()
+        __, ref_v_textlines = best_alignment.max_v()
        if len(ref_v_textlines) <= 1 or len(ref_h_textlines) <= 1:
            return None
@ -375,7 +379,6 @@ class TextNetworks(TextAlignments):
        else:
            parse_details_search = None
        MINIMUM_TEXTLINES_IN_TABLE = 6
        bbox = (most_aligned_tl.x0, most_aligned_tl.y0,
                most_aligned_tl.x1, most_aligned_tl.y1)
@ -402,12 +405,7 @@ class TextNetworks(TextAlignments):
                # if the textline is close.
                if h_distance < max_h_gap and v_distance < max_v_gap:
                    tls_in_bbox.append(tl)
-                    bbox = (
+                    bbox = expand_bbox_with_textline(bbox, tl)
                        min(bbox[0], tl.x0),
                        min(bbox[1], tl.y0),
                        max(bbox[2], tl.x1),
                        max(bbox[3], tl.y1)
                    )
                    del tls_search_space[i]
        if len(tls_in_bbox) > MINIMUM_TEXTLINES_IN_TABLE:
            return bbox
@ -472,8 +470,7 @@ class Hybrid(TextBaseParser):
            row_tol=2,
            column_tol=0,
            debug=False,
-        **kwargs
+            **kwargs):
    ):
        super().__init__(
            "hybrid",
            table_regions=table_regions,
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -100,8 +100,7 @@ class Lattice(BaseParser):
            threshold_constant=-2,
            iterations=0,
            resolution=300,
-        **kwargs
+            **kwargs):
    ):
        super().__init__(
            "lattice",
            table_regions=table_regions,
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -60,8 +60,7 @@ class Stream(TextBaseParser):
            edge_tol=50,
            row_tol=2,
            column_tol=0,
-        **kwargs
+            **kwargs):
    ):
        super().__init__(
            "stream",
            table_regions=table_regions,
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -136,7 +136,7 @@ def prepare_plot(table, ax=None, to_pdf_scale=True):
    return ax
-class PlotMethods(object):
+class PlotMethods():
    def __call__(self, table, kind="text", filename=None, ax=None):
        """Plot elements found on PDF page based on kind
        specified, useful for debugging and playing with different
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -156,7 +156,7 @@ def remove_extra(kwargs, flavor="lattice"):
 # https://stackoverflow.com/a/22726782
 # and https://stackoverflow.com/questions/10965479
-class TemporaryDirectory(object):
+class TemporaryDirectory():
    def __enter__(self):
        self.name = tempfile.mkdtemp()
        # Only delete the temporary directory upon
@ -488,6 +488,17 @@ def text_in_bbox_per_axis(bbox, horizontal_text, vertical_text):
    return t_bbox
 def expand_bbox_with_textline(bbox, textline):
    """Expand (if needed) a bbox so that it fits the parameter textline.
    """
    return (
        min(bbox[0], textline.x0),
        min(bbox[1], textline.y0),
        max(bbox[2], textline.x1),
        max(bbox[3], textline.y1)
    )
 def bbox_from_textlines(textlines):
    """Returns the smallest bbox containing all the text objects passed as
    a parameters.
@ -514,12 +525,7 @@ def bbox_from_textlines(textlines):
    )
    for tl in textlines[1:]:
-        bbox = (
+        bbox = expand_bbox_with_textline(bbox, tl)
            min(bbox[0], tl.x0),
            min(bbox[1], tl.y0),
            max(bbox[2], tl.x1),
            max(bbox[3], tl.y1)
        )
    return bbox
@ -1044,8 +1050,7 @@ def get_page_layout(
        line_margin=0.5,
        word_margin=0.1,
        detect_vertical=True,
-    all_texts=True,
+        all_texts=True):
 ):
    """Returns a PDFMiner LTPage object and page dimension of a single
    page pdf. See https://euske.github.io/pdfminer/ to get definitions
    of kwargs.
@ -1163,14 +1168,14 @@ def compare_tables(left, right):
    diff_cols = right.shape[1]-left.shape[1]
    diff_rows = right.shape[0]-left.shape[0]
    differences = []
-    if (diff_rows):
+    if diff_rows:
        differences.append(
            "{diff_rows} {more_fewer} rows".format(
                diff_rows=abs(diff_rows),
                more_fewer='more' if diff_rows > 0 else 'fewer'
            )
        )
-    if (diff_cols):
+    if diff_cols:
        differences.append(
            "{diff_cols} {more_fewer} columns".format(
                diff_cols=abs(diff_cols),