From 20f18b478f59524d154b220b0a77c06cf2da8a2a Mon Sep 17 00:00:00 2001
From: Frh <francois.huet+github@gmail.com>
Date: Sun, 19 Apr 2020 14:30:32 -0700
Subject: [PATCH] Lint, refactor

---
 camelot/core.py            | 59 +++++++++++++++++++-------------------
 camelot/handlers.py        | 12 ++++++--
 camelot/parsers/base.py    |  6 ++--
 camelot/parsers/lattice.py | 18 +-----------
 camelot/parsers/stream.py  | 14 ++-------
 camelot/utils.py           |  4 +--
 6 files changed, 47 insertions(+), 66 deletions(-)

diff --git a/camelot/core.py b/camelot/core.py
index 94d49e9..5712e65 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -4,7 +4,6 @@ import os
 import sqlite3
 import zipfile
 import tempfile
-from itertools import chain
 from operator import itemgetter
 
 import numpy as np
@@ -191,26 +190,26 @@ class TextEdges(object):
 
         table_areas = {}
         for te in relevant_textedges:
-                if not table_areas:
+            if not table_areas:
+                table_areas[(te.x, te.y0, te.x, te.y1)] = None
+            else:
+                found = None
+                for area in table_areas:
+                    # check for overlap
+                    if te.y1 >= area[1] and te.y0 <= area[3]:
+                        found = area
+                        break
+                if found is None:
                     table_areas[(te.x, te.y0, te.x, te.y1)] = None
                 else:
-                    found = None
-                    for area in table_areas:
-                        # check for overlap
-                        if te.y1 >= area[1] and te.y0 <= area[3]:
-                            found = area
-                            break
-                    if found is None:
-                        table_areas[(te.x, te.y0, te.x, te.y1)] = None
-                    else:
-                        table_areas.pop(found)
-                        updated_area = (
-                            found[0],
-                            min(te.y0, found[1]),
-                            max(found[2], te.x),
-                            max(found[3], te.y1),
-                        )
-                        table_areas[updated_area] = None
+                    table_areas.pop(found)
+                    updated_area = (
+                        found[0],
+                        min(te.y0, found[1]),
+                        max(found[2], te.x),
+                        max(found[3], te.y1),
+                    )
+                    table_areas[updated_area] = None
 
         # extend table areas based on textlines that overlap
         # vertically. it's possible that these textlines were
@@ -736,17 +735,19 @@ class Table(object):
         """
         for f in copy_text:
             if f == "h":
-                for i in range(len(self.cells)):
-                    for j in range(len(self.cells[i])):
-                        if self.cells[i][j].text.strip() == "":
-                            if self.cells[i][j].hspan and not self.cells[i][j].left:
-                                self.cells[i][j].text = self.cells[i][j - 1].text
+                for i, row in enumerate(self.cells):
+                    for j, cell in enumerate(row):
+                        if cell.text.strip() == "" and \
+                           cell.hspan and \
+                           not cell.left:
+                            cell.text = self.cells[i][j - 1].text
             elif f == "v":
-                for i in range(len(self.cells)):
-                    for j in range(len(self.cells[i])):
-                        if self.cells[i][j].text.strip() == "":
-                            if self.cells[i][j].vspan and not self.cells[i][j].top:
-                                self.cells[i][j].text = self.cells[i - 1][j].text
+                for i, row in enumerate(self.cells):
+                    for j, cell in enumerate(row):
+                        if cell.text.strip() == "" and \
+                           cell.vspan and \
+                           not cell.top:
+                            cell.text = self.cells[i - 1][j].text
         return self
 
 
diff --git a/camelot/handlers.py b/camelot/handlers.py
index 7a9f2ff..64b6197 100644
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@@ -2,6 +2,7 @@
 
 import os
 import sys
+import logging
 
 from PyPDF2 import PdfFileReader, PdfFileWriter
 
@@ -16,6 +17,8 @@ from .utils import (
     download_url,
 )
 
+logger = logging.getLogger("camelot")
+
 PARSERS = {
     "lattice": Lattice,
     "stream": Stream
@@ -199,10 +202,13 @@ class PDFHandler(object):
                 layout_kwargs=layout_kwargs
             )
             parser._generate_layout(source_file, layout, dimensions,
-                                page_idx, layout_kwargs)
+                                    page_idx, layout_kwargs)
+            rootname = os.path.basename(parser.rootname)
+            if not suppress_stdout:
+                logger.info(
+                    "Processing {rootname}".format(rootname=rootname))
             t = parser.extract_tables(
-                source_file,
-                suppress_stdout=suppress_stdout
+                source_file
             )
             tables.extend(t)
         return TableList(sorted(tables))
diff --git a/camelot/parsers/base.py b/camelot/parsers/base.py
index 19deceb..b364f04 100644
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@@ -12,7 +12,8 @@ from ..core import Table
 class BaseParser(object):
     """Defines a base parser.
     """
-    def __init__(self,
+    def __init__(
+        self,
         parser_id,
         table_regions=None,
         table_areas=None,
@@ -33,6 +34,7 @@ class BaseParser(object):
 
         self.flag_size = flag_size
 
+        self.rootname = None
         self.t_bbox = None
 
         # For plotting details of parsing algorithms
@@ -79,7 +81,6 @@ class BaseParser(object):
         table.order = table_idx + 1
         return table
 
-
     @staticmethod
     def _reduce_index(t, idx, shift_text):
         """Reduces index of a text object if it lies within a spanning
@@ -112,4 +113,3 @@ class BaseParser(object):
                     for r_idx, c_idx, text in indices:
                         table.cells[r_idx][c_idx].text = text
         return pos_errors
-
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index c0f3e9b..cefc27f 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -2,15 +2,9 @@
 
 from __future__ import division
 import os
-import sys
 import copy
-import locale
-import logging
 import warnings
-import subprocess
 
-import numpy as np
-import pandas as pd
 
 from .base import BaseParser
 from ..utils import (
@@ -21,8 +15,6 @@ from ..utils import (
     segments_in_bbox,
     text_in_bbox,
     merge_close_lines,
-    get_table_index,
-    compute_accuracy,
 )
 from ..image_processing import (
     adaptive_threshold,
@@ -32,9 +24,6 @@ from ..image_processing import (
 )
 
 
-logger = logging.getLogger("camelot")
-
-
 class Lattice(BaseParser):
     """Lattice method of parsing looks for lines between text
     to parse the table.
@@ -322,13 +311,8 @@ class Lattice(BaseParser):
 
         return table
 
-    def extract_tables(self, filename, suppress_stdout=False):
-        # FRHTODO: move extract table core to the base class
+    def extract_tables(self, filename):
         rootname = os.path.basename(self.rootname)
-        if not suppress_stdout:
-            logger.info(
-                "Processing {rootname}".format(rootname=rootname))
-
         if not self.horizontal_text:
             if self.images:
                 warnings.warn(
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 2df3093..351d7d3 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -2,19 +2,13 @@
 
 from __future__ import division
 import os
-import logging
 import warnings
 
 import numpy as np
-import pandas as pd
 
 from .base import BaseParser
 from ..core import TextEdges
-from ..utils import (text_in_bbox, compute_accuracy,
-                     compute_whitespace)
-
-
-logger = logging.getLogger("camelot")
+from ..utils import (text_in_bbox)
 
 
 class Stream(BaseParser):
@@ -432,11 +426,7 @@ class Stream(BaseParser):
 
         return table
 
-    def extract_tables(self, filename, suppress_stdout=False):
-        if not suppress_stdout:
-            logger.info("Processing {}".format(
-                os.path.basename(self.rootname)))
-
+    def extract_tables(self, filename):
         if not self.horizontal_text:
             if self.images:
                 warnings.warn(
diff --git a/camelot/utils.py b/camelot/utils.py
index cc4a58c..2c66e7c 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -1044,14 +1044,14 @@ def compare_tables(left, right):
         differences.append(
             "{diff_rows} {more_fewer} rows".format(
                 diff_rows=abs(diff_rows),
-                more_fewer='more' if diff_rows>0 else 'fewer'
+                more_fewer='more' if diff_rows > 0 else 'fewer'
             )
         )
     if (diff_cols):
         differences.append(
             "{diff_cols} {more_fewer} columns".format(
                 diff_cols=abs(diff_cols),
-                more_fewer='more' if diff_cols>0 else 'fewer'
+                more_fewer='more' if diff_cols > 0 else 'fewer'
             )
         )
     if differences: