[MRG + 1] Make pep8 (#125)

* Make setup.py pep8 Add new line at end of file, fix bare except, remove unused import. * Make tests/*.py pep8 Add some newlines at and of files and a visual indent. * Make docs/*.py pep8 Fix block comments and add new lines at end of files. * Make camelot/*.py pep8 Fixed unused import, a few weirdly ordered imports, a docstring typo and many new lines at the end of lines. * Fix imports Fix import order and remove a couple more unused imports. * Fix indents Fix indentation (no opening delimiter alignment). * Add newlines
2018-10-05 12:25:43 +01:00
parent 6e8079df84
commit 90aaba6eec
20 changed files with 107 additions and 111 deletions
@@ -2,6 +2,9 @@

 import logging

+from .__version__ import __version__
+from .io import read_pdf
+

 # set up logging
 logger = logging.getLogger('camelot')
@@ -12,8 +15,3 @@ handler = logging.StreamHandler()
 handler.setFormatter(formatter)

 logger.addHandler(handler)
-
-
-from .__version__ import __version__
-
-from .io import read_pdf
@@ -8,4 +8,4 @@ __url__ = 'http://camelot-py.readthedocs.io/'
 __version__ = '.'.join(map(str, VERSION))
 __author__ = 'Vinayak Mehta'
 __author_email__ = 'vmehta94@gmail.com'
-__license__ = 'MIT License'
+__license__ = 'MIT License'
@@ -2,17 +2,18 @@

 import logging

-logger = logging.getLogger('camelot')
-logger.setLevel(logging.INFO)
-
 import click

 from . import __version__
 from .io import read_pdf


+logger = logging.getLogger('camelot')
+logger.setLevel(logging.INFO)
+
+
 class Config(object):
-    def  __init__(self):
+    def __init__(self):
        self.config = {}

    def set_config(self, key, value):
@@ -152,4 +153,4 @@ def stream(c, *args, **kwargs):
            raise click.UsageError('Please specify output file path using --output')
        if f is None:
            raise click.UsageError('Please specify output file format using --format')
-        tables.export(output, f=f, compress=compress)
+        tables.export(output, f=f, compress=compress)
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-

 import os
-import json
 import zipfile
 import tempfile

@@ -519,4 +518,4 @@ class TableList(object):
            if compress:
                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
-                    z.write(filepath, os.path.basename(filepath))
+                    z.write(filepath, os.path.basename(filepath))
@@ -145,4 +145,4 @@ class PDFHandler(object):
            for p in pages:
                t = parser.extract_tables(p)
                tables.extend(t)
-        return TableList(tables)
+        return TableList(tables)
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-

 from __future__ import division
-from itertools import groupby
-from operator import itemgetter

 import cv2
 import numpy as np
@@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    if process_background:
-        threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        threshold = cv2.adaptiveThreshold(
+            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, blocksize, c)
    else:
-        threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
+        threshold = cv2.adaptiveThreshold(
+            np.invert(gray), 255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
    return img, threshold

@@ -197,4 +197,4 @@ def find_table_joints(contours, vertical, horizontal):
            joint_coords.append((c1, c2))
        tables[(x, y + h, x + w, y)] = joint_coords

-    return tables
+    return tables
@@ -89,4 +89,4 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
    p = PDFHandler(filepath, pages)
    kwargs = remove_extra(kwargs, flavor=flavor)
    tables = p.parse(flavor=flavor, **kwargs)
-    return tables
+    return tables
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-

 from .stream import Stream
-from .lattice import Lattice
+from .lattice import Lattice
@@ -18,4 +18,4 @@ class BaseParser(object):
        self.horizontal_text = get_text_objects(self.layout, ltype="lh")
        self.vertical_text = get_text_objects(self.layout, ltype="lv")
        self.pdf_width, self.pdf_height = self.dimensions
-        self.rootname, __ = os.path.splitext(self.filename)
+        self.rootname, __ = os.path.splitext(self.filename)
@@ -201,8 +201,9 @@ class Lattice(BaseParser):
            if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
                gs_call.insert(0, 'gs')
            else:
-                gs_call.insert(0, 'gsc')
-        subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
+                gs_call.insert(0, "gsc")
+        subprocess.call(
+            gs_call, stdout=open(os.devnull, 'w'),
            stderr=subprocess.STDOUT)

    def _generate_table_bbox(self):
@@ -339,10 +340,10 @@ class Lattice(BaseParser):

        _tables = []
        # sort tables based on y-coord
-        for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
-                key=lambda x: x[1], reverse=True)):
+        for table_idx, tk in enumerate(sorted(
+                self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
            cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
            _tables.append(table)

-        return _tables
+        return _tables
@@ -116,7 +116,7 @@ class Stream(BaseParser):
                    row_y = t.y0
                temp.append(t)
        rows.append(sorted(temp, key=lambda t: t.x0))
-        __ = rows.pop(0) # hacky
+        __ = rows.pop(0)  # hacky
        return rows

    @staticmethod
@@ -211,7 +211,7 @@ class Stream(BaseParser):
            text = Stream._group_rows(text, row_close_tol=row_close_tol)
            elements = [len(r) for r in text]
            new_cols = [(t.x0, t.x1)
-                for r in text if len(r) == max(elements) for t in r]
+                        for r in text if len(r) == max(elements) for t in r]
            cols.extend(Stream._merge_columns(sorted(new_cols)))
        return cols

@@ -357,10 +357,10 @@ class Stream(BaseParser):

        _tables = []
        # sort tables based on y-coord
-        for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
-                key=lambda x: x[1], reverse=True)):
+        for table_idx, tk in enumerate(sorted(
+                self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
            cols, rows = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows)
            _tables.append(table)

-        return _tables
+        return _tables
@@ -41,16 +41,16 @@ def plot_table(table):
        for cell in row:
            if cell.left:
                plt.plot([cell.lb[0], cell.lt[0]],
-                            [cell.lb[1], cell.lt[1]])
+                         [cell.lb[1], cell.lt[1]])
            if cell.right:
                plt.plot([cell.rb[0], cell.rt[0]],
-                            [cell.rb[1], cell.rt[1]])
+                         [cell.rb[1], cell.rt[1]])
            if cell.top:
                plt.plot([cell.lt[0], cell.rt[0]],
-                            [cell.lt[1], cell.rt[1]])
+                         [cell.lt[1], cell.rt[1]])
            if cell.bottom:
                plt.plot([cell.lb[0], cell.rb[0]],
-                            [cell.lb[1], cell.rb[1]])
+                         [cell.lb[1], cell.rb[1]])
    plt.show()


@@ -105,4 +105,4 @@ def plot_line(segments):
        plt.plot([v[0], v[2]], [v[1], v[3]])
    for h in horizontal:
        plt.plot([h[0], h[2]], [h[1], h[3]])
-    plt.show()
+    plt.show()
@@ -1,5 +1,4 @@
 from __future__ import division
-import os
 import shutil
 import tempfile
 import warnings
@@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfpage import PDFTextExtractionNotAllowed
 from pdfminer.pdfinterp import PDFResourceManager
 from pdfminer.pdfinterp import PDFPageInterpreter
-from pdfminer.pdfdevice import PDFDevice
 from pdfminer.converter import PDFPageAggregator
 from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
                             LTTextLineVertical)
@@ -278,8 +276,8 @@ def text_in_bbox(bbox, text):
    lb = (bbox[0], bbox[1])
    rt = (bbox[2], bbox[3])
    t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
-                <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
-                <= rt[1] + 2]
+              <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
+              <= rt[1] + 2]
    return t_bbox


@@ -640,4 +638,4 @@ def get_text_objects(layout, ltype="char", t=None):
                t += get_text_objects(obj, ltype=ltype)
    except AttributeError:
        pass
-    return t
+    return t
@@ -10,21 +10,21 @@ class FlaskyStyle(Style):

    styles = {
        # No corresponding class for the following:
-        #Text:                     "", # class:  ''
-        Whitespace:                "underline #f8f8f8",      # class: 'w'
-        Error:                     "#a40000 border:#ef2929", # class: 'err'
-        Other:                     "#000000",                # class 'x'
+        # Text:                    "", # class:  ''
+        Whitespace:                "underline #f8f8f8",       # class: 'w'
+        Error:                     "#a40000 border:#ef2929",  # class: 'err'
+        Other:                     "#000000",                 # class 'x'

-        Comment:                   "italic #8f5902", # class: 'c'
-        Comment.Preproc:           "noitalic",       # class: 'cp'
+        Comment:                   "italic #8f5902",  # class: 'c'
+        Comment.Preproc:           "noitalic",        # class: 'cp'

-        Keyword:                   "bold #004461",   # class: 'k'
-        Keyword.Constant:          "bold #004461",   # class: 'kc'
-        Keyword.Declaration:       "bold #004461",   # class: 'kd'
-        Keyword.Namespace:         "bold #004461",   # class: 'kn'
-        Keyword.Pseudo:            "bold #004461",   # class: 'kp'
-        Keyword.Reserved:          "bold #004461",   # class: 'kr'
-        Keyword.Type:              "bold #004461",   # class: 'kt'
+        Keyword:                   "bold #004461",    # class: 'k'
+        Keyword.Constant:          "bold #004461",    # class: 'kc'
+        Keyword.Declaration:       "bold #004461",    # class: 'kd'
+        Keyword.Namespace:         "bold #004461",    # class: 'kn'
+        Keyword.Pseudo:            "bold #004461",    # class: 'kp'
+        Keyword.Reserved:          "bold #004461",    # class: 'kr'
+        Keyword.Type:              "bold #004461",    # class: 'kt'

        Operator:                  "#582800",   # class: 'o'
        Operator.Word:             "bold #004461",   # class: 'ow' - like keywords
@@ -34,53 +34,53 @@ class FlaskyStyle(Style):
        # because special names such as Name.Class, Name.Function, etc.
        # are not recognized as such later in the parsing, we choose them
        # to look the same as ordinary variables.
-        Name:                      "#000000",        # class: 'n'
-        Name.Attribute:            "#c4a000",        # class: 'na' - to be revised
-        Name.Builtin:              "#004461",        # class: 'nb'
-        Name.Builtin.Pseudo:       "#3465a4",        # class: 'bp'
-        Name.Class:                "#000000",        # class: 'nc' - to be revised
-        Name.Constant:             "#000000",        # class: 'no' - to be revised
-        Name.Decorator:            "#888",           # class: 'nd' - to be revised
-        Name.Entity:               "#ce5c00",        # class: 'ni'
-        Name.Exception:            "bold #cc0000",   # class: 'ne'
-        Name.Function:             "#000000",        # class: 'nf'
-        Name.Property:             "#000000",        # class: 'py'
-        Name.Label:                "#f57900",        # class: 'nl'
-        Name.Namespace:            "#000000",        # class: 'nn' - to be revised
-        Name.Other:                "#000000",        # class: 'nx'
-        Name.Tag:                  "bold #004461",   # class: 'nt' - like a keyword
-        Name.Variable:             "#000000",        # class: 'nv' - to be revised
-        Name.Variable.Class:       "#000000",        # class: 'vc' - to be revised
-        Name.Variable.Global:      "#000000",        # class: 'vg' - to be revised
-        Name.Variable.Instance:    "#000000",        # class: 'vi' - to be revised
+        Name:                      "#000000",         # class: 'n'
+        Name.Attribute:            "#c4a000",         # class: 'na' - to be revised
+        Name.Builtin:              "#004461",         # class: 'nb'
+        Name.Builtin.Pseudo:       "#3465a4",         # class: 'bp'
+        Name.Class:                "#000000",         # class: 'nc' - to be revised
+        Name.Constant:             "#000000",         # class: 'no' - to be revised
+        Name.Decorator:            "#888",            # class: 'nd' - to be revised
+        Name.Entity:               "#ce5c00",         # class: 'ni'
+        Name.Exception:            "bold #cc0000",    # class: 'ne'
+        Name.Function:             "#000000",         # class: 'nf'
+        Name.Property:             "#000000",         # class: 'py'
+        Name.Label:                "#f57900",         # class: 'nl'
+        Name.Namespace:            "#000000",         # class: 'nn' - to be revised
+        Name.Other:                "#000000",         # class: 'nx'
+        Name.Tag:                  "bold #004461",    # class: 'nt' - like a keyword
+        Name.Variable:             "#000000",         # class: 'nv' - to be revised
+        Name.Variable.Class:       "#000000",         # class: 'vc' - to be revised
+        Name.Variable.Global:      "#000000",         # class: 'vg' - to be revised
+        Name.Variable.Instance:    "#000000",         # class: 'vi' - to be revised

-        Number:                    "#990000",        # class: 'm'
+        Number:                    "#990000",         # class: 'm'

-        Literal:                   "#000000",        # class: 'l'
-        Literal.Date:              "#000000",        # class: 'ld'
+        Literal:                   "#000000",         # class: 'l'
+        Literal.Date:              "#000000",         # class: 'ld'

-        String:                    "#4e9a06",        # class: 's'
-        String.Backtick:           "#4e9a06",        # class: 'sb'
-        String.Char:               "#4e9a06",        # class: 'sc'
-        String.Doc:                "italic #8f5902", # class: 'sd' - like a comment
-        String.Double:             "#4e9a06",        # class: 's2'
-        String.Escape:             "#4e9a06",        # class: 'se'
-        String.Heredoc:            "#4e9a06",        # class: 'sh'
-        String.Interpol:           "#4e9a06",        # class: 'si'
-        String.Other:              "#4e9a06",        # class: 'sx'
-        String.Regex:              "#4e9a06",        # class: 'sr'
-        String.Single:             "#4e9a06",        # class: 's1'
-        String.Symbol:             "#4e9a06",        # class: 'ss'
+        String:                    "#4e9a06",         # class: 's'
+        String.Backtick:           "#4e9a06",         # class: 'sb'
+        String.Char:               "#4e9a06",         # class: 'sc'
+        String.Doc:                "italic #8f5902",  # class: 'sd' - like a comment
+        String.Double:             "#4e9a06",         # class: 's2'
+        String.Escape:             "#4e9a06",         # class: 'se'
+        String.Heredoc:            "#4e9a06",         # class: 'sh'
+        String.Interpol:           "#4e9a06",         # class: 'si'
+        String.Other:              "#4e9a06",         # class: 'sx'
+        String.Regex:              "#4e9a06",         # class: 'sr'
+        String.Single:             "#4e9a06",         # class: 's1'
+        String.Symbol:             "#4e9a06",         # class: 'ss'

-        Generic:                   "#000000",        # class: 'g'
-        Generic.Deleted:           "#a40000",        # class: 'gd'
-        Generic.Emph:              "italic #000000", # class: 'ge'
-        Generic.Error:             "#ef2929",        # class: 'gr'
-        Generic.Heading:           "bold #000080",   # class: 'gh'
-        Generic.Inserted:          "#00A000",        # class: 'gi'
-        Generic.Output:            "#888",           # class: 'go'
-        Generic.Prompt:            "#745334",        # class: 'gp'
-        Generic.Strong:            "bold #000000",   # class: 'gs'
-        Generic.Subheading:        "bold #800080",   # class: 'gu'
-        Generic.Traceback:         "bold #a40000",   # class: 'gt'
-    }
+        Generic:                   "#000000",         # class: 'g'
+        Generic.Deleted:           "#a40000",         # class: 'gd'
+        Generic.Emph:              "italic #000000",  # class: 'ge'
+        Generic.Error:             "#ef2929",         # class: 'gr'
+        Generic.Heading:           "bold #000080",    # class: 'gh'
+        Generic.Inserted:          "#00A000",         # class: 'gi'
+        Generic.Output:            "#888",            # class: 'go'
+        Generic.Prompt:            "#745334",         # class: 'gp'
+        Generic.Strong:            "bold #000000",    # class: 'gs'
+        Generic.Subheading:        "bold #800080",    # class: 'gu'
+        Generic.Traceback:         "bold #a40000",    # class: 'gt'
+    }
@@ -358,4 +358,4 @@ texinfo_documents = [
 intersphinx_mapping = {
    'https://docs.python.org/2': None,
    'http://pandas.pydata.org/pandas-docs/stable': None
-}
+}
@@ -2,7 +2,6 @@

 import os
 from setuptools import find_packages
-from pkg_resources import parse_version


 here = os.path.abspath(os.path.dirname(__file__))
@@ -56,11 +55,11 @@ def setup_package():

    try:
        from setuptools import setup
-    except:
+    except ImportError:
        from distutils.core import setup

    setup(**metadata)


 if __name__ == '__main__':
-    setup_package()
+    setup_package()
@@ -373,4 +373,4 @@ data_lattice_shift_text_right_bottom = [
    ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
    ["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
    ["Knowledge &Practices on HTN &DM", "2400", "Women (≥ 18 yrs)", "-", "-", "-", "1728"]
-]
+]
@@ -76,4 +76,4 @@ def test_cli_output_format():
        # zip
        result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
                                     'stream', infile])
-        assert result.exit_code == 0
+        assert result.exit_code == 0
@@ -82,8 +82,8 @@ def test_stream_flag_size():
 def test_lattice():
    df = pd.DataFrame(data_lattice)

-    filename = os.path.join(testdir,
-        "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
+    filename = os.path.join(
+        testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
    tables = camelot.read_pdf(filename, pages="2")
    assert df.equals(tables[0].df)

@@ -137,4 +137,4 @@ def test_lattice_shift_text():
    assert df_disable.equals(tables[0].df)

    tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b'])
-    assert df_rb.equals(tables[0].df)
+    assert df_rb.equals(tables[0].df)
@@ -50,4 +50,4 @@ def test_no_tables_found():
            tables = camelot.read_pdf(filename)
        except Exception as e:
            assert type(e).__name__ == 'UserWarning'
-            assert str(e) == 'No tables found on page-1'
+            assert str(e) == 'No tables found on page-1'