[MRG + 1] Make pep8 (#125)

* Make setup.py pep8 Add new line at end of file, fix bare except, remove unused import. * Make tests/*.py pep8 Add some newlines at and of files and a visual indent. * Make docs/*.py pep8 Fix block comments and add new lines at end of files. * Make camelot/*.py pep8 Fixed unused import, a few weirdly ordered imports, a docstring typo and many new lines at the end of lines. * Fix imports Fix import order and remove a couple more unused imports. * Fix indents Fix indentation (no opening delimiter alignment). * Add newlines
2018-10-05 12:25:43 +01:00 · 2018-10-05 12:25:43 +01:00 · 90aaba6eec
parent 6e8079df84
commit 90aaba6eec
20 changed files with 107 additions and 111 deletions
--- a/camelot/init.py
+++ b/camelot/init.py
@ -2,6 +2,9 @@
 import logging
 from .__version__ import __version__
 from .io import read_pdf
 # set up logging
 logger = logging.getLogger('camelot')
@ -12,8 +15,3 @@ handler = logging.StreamHandler()
 handler.setFormatter(formatter)
 logger.addHandler(handler)
 from .__version__ import __version__
 from .io import read_pdf
--- a/camelot/version.py
+++ b/camelot/version.py
@ -8,4 +8,4 @@ __url__ = 'http://camelot-py.readthedocs.io/'
 __version__ = '.'.join(map(str, VERSION))
 __author__ = 'Vinayak Mehta'
 __author_email__ = 'vmehta94@gmail.com'
-__license__ = 'MIT License'
+__license__ = 'MIT License'
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -2,17 +2,18 @@
 import logging
 logger = logging.getLogger('camelot')
 logger.setLevel(logging.INFO)
 import click
 from . import __version__
 from .io import read_pdf
 logger = logging.getLogger('camelot')
 logger.setLevel(logging.INFO)
 class Config(object):
-    def  __init__(self):
+    def __init__(self):
        self.config = {}
    def set_config(self, key, value):
@ -152,4 +153,4 @@ def stream(c, *args, **kwargs):
            raise click.UsageError('Please specify output file path using --output')
        if f is None:
            raise click.UsageError('Please specify output file format using --format')
-        tables.export(output, f=f, compress=compress)
+        tables.export(output, f=f, compress=compress)
--- a/camelot/core.py
+++ b/camelot/core.py
@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 import os
 import json
 import zipfile
 import tempfile
@ -519,4 +518,4 @@ class TableList(object):
            if compress:
                zipname = os.path.join(os.path.dirname(path), root) + '.zip'
                with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
-                    z.write(filepath, os.path.basename(filepath))
+                    z.write(filepath, os.path.basename(filepath))
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -145,4 +145,4 @@ class PDFHandler(object):
            for p in pages:
                t = parser.extract_tables(p)
                tables.extend(t)
-        return TableList(tables)
+        return TableList(tables)
--- a/camelot/image_processing.py
+++ b/camelot/image_processing.py
@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import division
 from itertools import groupby
 from operator import itemgetter
 import cv2
 import numpy as np
@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    if process_background:
-        threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        threshold = cv2.adaptiveThreshold(
            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY, blocksize, c)
    else:
-        threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
+        threshold = cv2.adaptiveThreshold(
            np.invert(gray), 255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
    return img, threshold
@ -197,4 +197,4 @@ def find_table_joints(contours, vertical, horizontal):
            joint_coords.append((c1, c2))
        tables[(x, y + h, x + w, y)] = joint_coords
-    return tables
+    return tables
--- a/camelot/io.py
+++ b/camelot/io.py
@ -89,4 +89,4 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
    p = PDFHandler(filepath, pages)
    kwargs = remove_extra(kwargs, flavor=flavor)
    tables = p.parse(flavor=flavor, **kwargs)
-    return tables
+    return tables
--- a/camelot/parsers/init.py
+++ b/camelot/parsers/init.py
@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 from .stream import Stream
-from .lattice import Lattice
+from .lattice import Lattice
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -18,4 +18,4 @@ class BaseParser(object):
        self.horizontal_text = get_text_objects(self.layout, ltype="lh")
        self.vertical_text = get_text_objects(self.layout, ltype="lv")
        self.pdf_width, self.pdf_height = self.dimensions
-        self.rootname, __ = os.path.splitext(self.filename)
+        self.rootname, __ = os.path.splitext(self.filename)
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -201,8 +201,9 @@ class Lattice(BaseParser):
            if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
                gs_call.insert(0, 'gs')
            else:
-                gs_call.insert(0, 'gsc')
+                gs_call.insert(0, "gsc")
-        subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
+        subprocess.call(
            gs_call, stdout=open(os.devnull, 'w'),
            stderr=subprocess.STDOUT)
    def _generate_table_bbox(self):
@ -339,10 +340,10 @@ class Lattice(BaseParser):
        _tables = []
        # sort tables based on y-coord
-        for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
+        for table_idx, tk in enumerate(sorted(
-                key=lambda x: x[1], reverse=True)):
+                self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
            cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
            _tables.append(table)
-        return _tables
+        return _tables
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -116,7 +116,7 @@ class Stream(BaseParser):
                    row_y = t.y0
                temp.append(t)
        rows.append(sorted(temp, key=lambda t: t.x0))
-        __ = rows.pop(0) # hacky
+        __ = rows.pop(0)  # hacky
        return rows
    @staticmethod
@ -211,7 +211,7 @@ class Stream(BaseParser):
            text = Stream._group_rows(text, row_close_tol=row_close_tol)
            elements = [len(r) for r in text]
            new_cols = [(t.x0, t.x1)
-                for r in text if len(r) == max(elements) for t in r]
+                        for r in text if len(r) == max(elements) for t in r]
            cols.extend(Stream._merge_columns(sorted(new_cols)))
        return cols
@ -357,10 +357,10 @@ class Stream(BaseParser):
        _tables = []
        # sort tables based on y-coord
-        for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
+        for table_idx, tk in enumerate(sorted(
-                key=lambda x: x[1], reverse=True)):
+                self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
            cols, rows = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows)
            _tables.append(table)
-        return _tables
+        return _tables
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -41,16 +41,16 @@ def plot_table(table):
        for cell in row:
            if cell.left:
                plt.plot([cell.lb[0], cell.lt[0]],
-                            [cell.lb[1], cell.lt[1]])
+                         [cell.lb[1], cell.lt[1]])
            if cell.right:
                plt.plot([cell.rb[0], cell.rt[0]],
-                            [cell.rb[1], cell.rt[1]])
+                         [cell.rb[1], cell.rt[1]])
            if cell.top:
                plt.plot([cell.lt[0], cell.rt[0]],
-                            [cell.lt[1], cell.rt[1]])
+                         [cell.lt[1], cell.rt[1]])
            if cell.bottom:
                plt.plot([cell.lb[0], cell.rb[0]],
-                            [cell.lb[1], cell.rb[1]])
+                         [cell.lb[1], cell.rb[1]])
    plt.show()
@ -105,4 +105,4 @@ def plot_line(segments):
        plt.plot([v[0], v[2]], [v[1], v[3]])
    for h in horizontal:
        plt.plot([h[0], h[2]], [h[1], h[3]])
-    plt.show()
+    plt.show()
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -1,5 +1,4 @@
 from __future__ import division
 import os
 import shutil
 import tempfile
 import warnings
@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfpage import PDFTextExtractionNotAllowed
 from pdfminer.pdfinterp import PDFResourceManager
 from pdfminer.pdfinterp import PDFPageInterpreter
 from pdfminer.pdfdevice import PDFDevice
 from pdfminer.converter import PDFPageAggregator
 from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
                             LTTextLineVertical)
@ -278,8 +276,8 @@ def text_in_bbox(bbox, text):
    lb = (bbox[0], bbox[1])
    rt = (bbox[2], bbox[3])
    t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
-                <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
+              <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
-                <= rt[1] + 2]
+              <= rt[1] + 2]
    return t_bbox
@ -640,4 +638,4 @@ def get_text_objects(layout, ltype="char", t=None):
                t += get_text_objects(obj, ltype=ltype)
    except AttributeError:
        pass
-    return t
+    return t
--- a/docs/_themes/flask_theme_support.py
+++ b/docs/_themes/flask_theme_support.py
@ -10,21 +10,21 @@ class FlaskyStyle(Style):
    styles = {
        # No corresponding class for the following:
-        #Text:                     "", # class:  ''
+        # Text:                    "", # class:  ''
-        Whitespace:                "underline #f8f8f8",      # class: 'w'
+        Whitespace:                "underline #f8f8f8",       # class: 'w'
-        Error:                     "#a40000 border:#ef2929", # class: 'err'
+        Error:                     "#a40000 border:#ef2929",  # class: 'err'
-        Other:                     "#000000",                # class 'x'
+        Other:                     "#000000",                 # class 'x'
-        Comment:                   "italic #8f5902", # class: 'c'
+        Comment:                   "italic #8f5902",  # class: 'c'
-        Comment.Preproc:           "noitalic",       # class: 'cp'
+        Comment.Preproc:           "noitalic",        # class: 'cp'
-        Keyword:                   "bold #004461",   # class: 'k'
+        Keyword:                   "bold #004461",    # class: 'k'
-        Keyword.Constant:          "bold #004461",   # class: 'kc'
+        Keyword.Constant:          "bold #004461",    # class: 'kc'
-        Keyword.Declaration:       "bold #004461",   # class: 'kd'
+        Keyword.Declaration:       "bold #004461",    # class: 'kd'
-        Keyword.Namespace:         "bold #004461",   # class: 'kn'
+        Keyword.Namespace:         "bold #004461",    # class: 'kn'
-        Keyword.Pseudo:            "bold #004461",   # class: 'kp'
+        Keyword.Pseudo:            "bold #004461",    # class: 'kp'
-        Keyword.Reserved:          "bold #004461",   # class: 'kr'
+        Keyword.Reserved:          "bold #004461",    # class: 'kr'
-        Keyword.Type:              "bold #004461",   # class: 'kt'
+        Keyword.Type:              "bold #004461",    # class: 'kt'
        Operator:                  "#582800",   # class: 'o'
        Operator.Word:             "bold #004461",   # class: 'ow' - like keywords
@ -34,53 +34,53 @@ class FlaskyStyle(Style):
        # because special names such as Name.Class, Name.Function, etc.
        # are not recognized as such later in the parsing, we choose them
        # to look the same as ordinary variables.
-        Name:                      "#000000",        # class: 'n'
+        Name:                      "#000000",         # class: 'n'
-        Name.Attribute:            "#c4a000",        # class: 'na' - to be revised
+        Name.Attribute:            "#c4a000",         # class: 'na' - to be revised
-        Name.Builtin:              "#004461",        # class: 'nb'
+        Name.Builtin:              "#004461",         # class: 'nb'
-        Name.Builtin.Pseudo:       "#3465a4",        # class: 'bp'
+        Name.Builtin.Pseudo:       "#3465a4",         # class: 'bp'
-        Name.Class:                "#000000",        # class: 'nc' - to be revised
+        Name.Class:                "#000000",         # class: 'nc' - to be revised
-        Name.Constant:             "#000000",        # class: 'no' - to be revised
+        Name.Constant:             "#000000",         # class: 'no' - to be revised
-        Name.Decorator:            "#888",           # class: 'nd' - to be revised
+        Name.Decorator:            "#888",            # class: 'nd' - to be revised
-        Name.Entity:               "#ce5c00",        # class: 'ni'
+        Name.Entity:               "#ce5c00",         # class: 'ni'
-        Name.Exception:            "bold #cc0000",   # class: 'ne'
+        Name.Exception:            "bold #cc0000",    # class: 'ne'
-        Name.Function:             "#000000",        # class: 'nf'
+        Name.Function:             "#000000",         # class: 'nf'
-        Name.Property:             "#000000",        # class: 'py'
+        Name.Property:             "#000000",         # class: 'py'
-        Name.Label:                "#f57900",        # class: 'nl'
+        Name.Label:                "#f57900",         # class: 'nl'
-        Name.Namespace:            "#000000",        # class: 'nn' - to be revised
+        Name.Namespace:            "#000000",         # class: 'nn' - to be revised
-        Name.Other:                "#000000",        # class: 'nx'
+        Name.Other:                "#000000",         # class: 'nx'
-        Name.Tag:                  "bold #004461",   # class: 'nt' - like a keyword
+        Name.Tag:                  "bold #004461",    # class: 'nt' - like a keyword
-        Name.Variable:             "#000000",        # class: 'nv' - to be revised
+        Name.Variable:             "#000000",         # class: 'nv' - to be revised
-        Name.Variable.Class:       "#000000",        # class: 'vc' - to be revised
+        Name.Variable.Class:       "#000000",         # class: 'vc' - to be revised
-        Name.Variable.Global:      "#000000",        # class: 'vg' - to be revised
+        Name.Variable.Global:      "#000000",         # class: 'vg' - to be revised
-        Name.Variable.Instance:    "#000000",        # class: 'vi' - to be revised
+        Name.Variable.Instance:    "#000000",         # class: 'vi' - to be revised
-        Number:                    "#990000",        # class: 'm'
+        Number:                    "#990000",         # class: 'm'
-        Literal:                   "#000000",        # class: 'l'
+        Literal:                   "#000000",         # class: 'l'
-        Literal.Date:              "#000000",        # class: 'ld'
+        Literal.Date:              "#000000",         # class: 'ld'
-        String:                    "#4e9a06",        # class: 's'
+        String:                    "#4e9a06",         # class: 's'
-        String.Backtick:           "#4e9a06",        # class: 'sb'
+        String.Backtick:           "#4e9a06",         # class: 'sb'
-        String.Char:               "#4e9a06",        # class: 'sc'
+        String.Char:               "#4e9a06",         # class: 'sc'
-        String.Doc:                "italic #8f5902", # class: 'sd' - like a comment
+        String.Doc:                "italic #8f5902",  # class: 'sd' - like a comment
-        String.Double:             "#4e9a06",        # class: 's2'
+        String.Double:             "#4e9a06",         # class: 's2'
-        String.Escape:             "#4e9a06",        # class: 'se'
+        String.Escape:             "#4e9a06",         # class: 'se'
-        String.Heredoc:            "#4e9a06",        # class: 'sh'
+        String.Heredoc:            "#4e9a06",         # class: 'sh'
-        String.Interpol:           "#4e9a06",        # class: 'si'
+        String.Interpol:           "#4e9a06",         # class: 'si'
-        String.Other:              "#4e9a06",        # class: 'sx'
+        String.Other:              "#4e9a06",         # class: 'sx'
-        String.Regex:              "#4e9a06",        # class: 'sr'
+        String.Regex:              "#4e9a06",         # class: 'sr'
-        String.Single:             "#4e9a06",        # class: 's1'
+        String.Single:             "#4e9a06",         # class: 's1'
-        String.Symbol:             "#4e9a06",        # class: 'ss'
+        String.Symbol:             "#4e9a06",         # class: 'ss'
-        Generic:                   "#000000",        # class: 'g'
+        Generic:                   "#000000",         # class: 'g'
-        Generic.Deleted:           "#a40000",        # class: 'gd'
+        Generic.Deleted:           "#a40000",         # class: 'gd'
-        Generic.Emph:              "italic #000000", # class: 'ge'
+        Generic.Emph:              "italic #000000",  # class: 'ge'
-        Generic.Error:             "#ef2929",        # class: 'gr'
+        Generic.Error:             "#ef2929",         # class: 'gr'
-        Generic.Heading:           "bold #000080",   # class: 'gh'
+        Generic.Heading:           "bold #000080",    # class: 'gh'
-        Generic.Inserted:          "#00A000",        # class: 'gi'
+        Generic.Inserted:          "#00A000",         # class: 'gi'
-        Generic.Output:            "#888",           # class: 'go'
+        Generic.Output:            "#888",            # class: 'go'
-        Generic.Prompt:            "#745334",        # class: 'gp'
+        Generic.Prompt:            "#745334",         # class: 'gp'
-        Generic.Strong:            "bold #000000",   # class: 'gs'
+        Generic.Strong:            "bold #000000",    # class: 'gs'
-        Generic.Subheading:        "bold #800080",   # class: 'gu'
+        Generic.Subheading:        "bold #800080",    # class: 'gu'
-        Generic.Traceback:         "bold #a40000",   # class: 'gt'
+        Generic.Traceback:         "bold #a40000",    # class: 'gt'
-    }
+    }
--- a/docs/conf.py
+++ b/docs/conf.py
@ -358,4 +358,4 @@ texinfo_documents = [
 intersphinx_mapping = {
    'https://docs.python.org/2': None,
    'http://pandas.pydata.org/pandas-docs/stable': None
-}
+}
--- a/setup.py
+++ b/setup.py
@ -2,7 +2,6 @@
 import os
 from setuptools import find_packages
 from pkg_resources import parse_version
 here = os.path.abspath(os.path.dirname(__file__))
@ -56,11 +55,11 @@ def setup_package():
    try:
        from setuptools import setup
-    except:
+    except ImportError:
        from distutils.core import setup
    setup(**metadata)
 if __name__ == '__main__':
-    setup_package()
+    setup_package()
--- a/tests/data.py
+++ b/tests/data.py
@ -373,4 +373,4 @@ data_lattice_shift_text_right_bottom = [
    ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
    ["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
    ["Knowledge &Practices on HTN &DM", "2400", "Women (≥ 18 yrs)", "-", "-", "-", "1728"]
-]
+]
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@ -76,4 +76,4 @@ def test_cli_output_format():
        # zip
        result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
                                     'stream', infile])
-        assert result.exit_code == 0
+        assert result.exit_code == 0
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -82,8 +82,8 @@ def test_stream_flag_size():
 def test_lattice():
    df = pd.DataFrame(data_lattice)
-    filename = os.path.join(testdir,
+    filename = os.path.join(
-        "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
+        testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
    tables = camelot.read_pdf(filename, pages="2")
    assert df.equals(tables[0].df)
@ -137,4 +137,4 @@ def test_lattice_shift_text():
    assert df_disable.equals(tables[0].df)
    tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b'])
-    assert df_rb.equals(tables[0].df)
+    assert df_rb.equals(tables[0].df)
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@ -50,4 +50,4 @@ def test_no_tables_found():
            tables = camelot.read_pdf(filename)
        except Exception as e:
            assert type(e).__name__ == 'UserWarning'
-            assert str(e) == 'No tables found on page-1'
+            assert str(e) == 'No tables found on page-1'