More linting, refactor

2020-04-19 14:42:18 -07:00 · 2020-04-19 14:42:18 -07:00 · c27a8026d6
parent 50f11867af
commit c27a8026d6
5 changed files with 30 additions and 33 deletions
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -25,7 +25,7 @@ PARSERS = {
 }
-class PDFHandler(object):
+class PDFHandler():
    """Handles all operations like temp directory creation, splitting
    file into single page PDFs, parsing each PDF and then removing the
    temp directory.
@ -201,8 +201,8 @@ class PDFHandler(object):
                page_idx,
                layout_kwargs=layout_kwargs
            )
-            parser._generate_layout(source_file, layout, dimensions,
+            parser.prepare_page_parse(source_file, layout, dimensions,
-                                    page_idx, layout_kwargs)
+                                      page_idx, layout_kwargs)
            rootname = os.path.basename(parser.rootname)
            if not suppress_stdout:
                logger.info(
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import os
 import warnings
 from ..utils import (
    get_text_objects,
@ -40,8 +41,8 @@ class BaseParser(object):
        # For plotting details of parsing algorithms
        self.debug_info = {}
-    def _generate_layout(self, filename, layout, dimensions,
+    def prepare_page_parse(self, filename, layout, dimensions,
-                         page_idx, layout_kwargs):
+                           page_idx, layout_kwargs):
        self.filename = filename
        self.layout_kwargs = layout_kwargs
        self.layout = layout
@ -59,6 +60,22 @@ class BaseParser(object):
        self.pdf_width, self.pdf_height = self.dimensions
        self.rootname, __ = os.path.splitext(self.filename)
    def _document_has_no_text(self):
        if not self.horizontal_text:
            rootname = os.path.basename(self.rootname)
            if self.images:
                warnings.warn(
                    "{rootname} is image-based, "
                    "camelot only works on text-based pages."
                    .format(rootname=rootname)
                )
            else:
                warnings.warn(
                    "No tables found on {rootname}".format(rootname=rootname)
                )
            return True
        return False
    """Initialize new table object, ready to be populated
    Parameters
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -3,7 +3,6 @@
 from __future__ import division
 import os
 import copy
 import warnings
 from .base import BaseParser
@ -312,18 +311,7 @@ class Lattice(BaseParser):
        return table
    def extract_tables(self, filename):
-        rootname = os.path.basename(self.rootname)
+        if self._document_has_no_text():
        if not self.horizontal_text:
            if self.images:
                warnings.warn(
                    "{rootname} is image-based, "
                    "camelot only works on text-based pages."
                    .format(rootname=rootname)
                )
            else:
                warnings.warn(
                    "No tables found on {rootname}".format(rootname=rootname)
                )
            return []
        self._generate_table_bbox()
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import division
 import os
 import warnings
 import numpy as np
@ -427,18 +426,7 @@ class Stream(BaseParser):
        return table
    def extract_tables(self, filename):
-        if not self.horizontal_text:
+        if self._document_has_no_text():
            if self.images:
                warnings.warn(
                    "{} is image-based, camelot only works on"
                    " text-based pages.".format(
                        os.path.basename(self.rootname))
                )
            else:
                warnings.warn(
                    "No tables found on {}".format(
                        os.path.basename(self.rootname))
                )
            return []
        # Identify plausible areas within the doc where tables lie,
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -1107,11 +1107,15 @@ def compare_tables(left, right):
                    diff_df = diff_df.append(lrow, ignore_index=True)
                    diff_df = diff_df.append(srow, ignore_index=True)
                    diff_df.insert(0, 'Table', [name_table1, name_table2])
-                    print(f"Row {index} differs:")
+                    print("Row {index} differs:".format(index=index))
                    print(diff_df.values)
                    break
            else:
-                print(f"Row {index} unique to {name_table1}: {lrow}")
+                print("Row {index} unique to {name_table1}: {lrow}".format(
                    index=index,
                    name_table1=name_table1,
                    lrow=lrow
                ))
                break
    else:
        print("Tables have different shapes")