More linting, refactor

2020-04-19 14:42:18 -07:00 · 2020-04-19 14:42:18 -07:00 · c27a8026d6
parent 50f11867af
commit c27a8026d6
5 changed files with 30 additions and 33 deletions
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -25,7 +25,7 @@ PARSERS = {
 }


-class PDFHandler(object):
+class PDFHandler():
    """Handles all operations like temp directory creation, splitting
    file into single page PDFs, parsing each PDF and then removing the
    temp directory.
@ -201,8 +201,8 @@ class PDFHandler(object):
                page_idx,
                layout_kwargs=layout_kwargs
            )
-            parser._generate_layout(source_file, layout, dimensions,
-                                    page_idx, layout_kwargs)
+            parser.prepare_page_parse(source_file, layout, dimensions,
+                                      page_idx, layout_kwargs)
            rootname = os.path.basename(parser.rootname)
            if not suppress_stdout:
                logger.info(
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-

 import os
+import warnings

 from ..utils import (
    get_text_objects,
@ -40,8 +41,8 @@ class BaseParser(object):
        # For plotting details of parsing algorithms
        self.debug_info = {}

-    def _generate_layout(self, filename, layout, dimensions,
-                         page_idx, layout_kwargs):
+    def prepare_page_parse(self, filename, layout, dimensions,
+                           page_idx, layout_kwargs):
        self.filename = filename
        self.layout_kwargs = layout_kwargs
        self.layout = layout
@ -59,6 +60,22 @@ class BaseParser(object):
        self.pdf_width, self.pdf_height = self.dimensions
        self.rootname, __ = os.path.splitext(self.filename)

+    def _document_has_no_text(self):
+        if not self.horizontal_text:
+            rootname = os.path.basename(self.rootname)
+            if self.images:
+                warnings.warn(
+                    "{rootname} is image-based, "
+                    "camelot only works on text-based pages."
+                    .format(rootname=rootname)
+                )
+            else:
+                warnings.warn(
+                    "No tables found on {rootname}".format(rootname=rootname)
+                )
+            return True
+        return False
+
    """Initialize new table object, ready to be populated

    Parameters
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -3,7 +3,6 @@
 from __future__ import division
 import os
 import copy
-import warnings


 from .base import BaseParser
@ -312,18 +311,7 @@ class Lattice(BaseParser):
        return table

    def extract_tables(self, filename):
-        rootname = os.path.basename(self.rootname)
-        if not self.horizontal_text:
-            if self.images:
-                warnings.warn(
-                    "{rootname} is image-based, "
-                    "camelot only works on text-based pages."
-                    .format(rootname=rootname)
-                )
-            else:
-                warnings.warn(
-                    "No tables found on {rootname}".format(rootname=rootname)
-                )
+        if self._document_has_no_text():
            return []

        self._generate_table_bbox()
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-

 from __future__ import division
-import os
 import warnings

 import numpy as np
@ -427,18 +426,7 @@ class Stream(BaseParser):
        return table

    def extract_tables(self, filename):
-        if not self.horizontal_text:
-            if self.images:
-                warnings.warn(
-                    "{} is image-based, camelot only works on"
-                    " text-based pages.".format(
-                        os.path.basename(self.rootname))
-                )
-            else:
-                warnings.warn(
-                    "No tables found on {}".format(
-                        os.path.basename(self.rootname))
-                )
+        if self._document_has_no_text():
            return []

        # Identify plausible areas within the doc where tables lie,
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -1107,11 +1107,15 @@ def compare_tables(left, right):
                    diff_df = diff_df.append(lrow, ignore_index=True)
                    diff_df = diff_df.append(srow, ignore_index=True)
                    diff_df.insert(0, 'Table', [name_table1, name_table2])
-                    print(f"Row {index} differs:")
+                    print("Row {index} differs:".format(index=index))
                    print(diff_df.values)
                    break
            else:
-                print(f"Row {index} unique to {name_table1}: {lrow}")
+                print("Row {index} unique to {name_table1}: {lrow}".format(
+                    index=index,
+                    name_table1=name_table1,
+                    lrow=lrow
+                ))
                break
    else:
        print("Tables have different shapes")