From 90aaba6eec4e12fe97ff2c876566c9832bf430c6 Mon Sep 17 00:00:00 2001 From: Oshawk Date: Fri, 5 Oct 2018 12:25:43 +0100 Subject: [PATCH] [MRG + 1] Make pep8 (#125) * Make setup.py pep8 Add new line at end of file, fix bare except, remove unused import. * Make tests/*.py pep8 Add some newlines at and of files and a visual indent. * Make docs/*.py pep8 Fix block comments and add new lines at end of files. * Make camelot/*.py pep8 Fixed unused import, a few weirdly ordered imports, a docstring typo and many new lines at the end of lines. * Fix imports Fix import order and remove a couple more unused imports. * Fix indents Fix indentation (no opening delimiter alignment). * Add newlines --- camelot/__init__.py | 8 +- camelot/__version__.py | 2 +- camelot/cli.py | 11 +-- camelot/core.py | 3 +- camelot/handlers.py | 2 +- camelot/image_processing.py | 10 +-- camelot/io.py | 2 +- camelot/parsers/__init__.py | 2 +- camelot/parsers/base.py | 2 +- camelot/parsers/lattice.py | 11 +-- camelot/parsers/stream.py | 10 +-- camelot/plotting.py | 10 +-- camelot/utils.py | 8 +- docs/_themes/flask_theme_support.py | 118 ++++++++++++++-------------- docs/conf.py | 2 +- setup.py | 5 +- tests/data.py | 2 +- tests/test_cli.py | 2 +- tests/test_common.py | 6 +- tests/test_errors.py | 2 +- 20 files changed, 107 insertions(+), 111 deletions(-) diff --git a/camelot/__init__.py b/camelot/__init__.py index f2c7e32..d8ff6a5 100644 --- a/camelot/__init__.py +++ b/camelot/__init__.py @@ -2,6 +2,9 @@ import logging +from .__version__ import __version__ +from .io import read_pdf + # set up logging logger = logging.getLogger('camelot') @@ -12,8 +15,3 @@ handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler) - - -from .__version__ import __version__ - -from .io import read_pdf \ No newline at end of file diff --git a/camelot/__version__.py b/camelot/__version__.py index 0405e32..e424199 100644 --- a/camelot/__version__.py +++ b/camelot/__version__.py @@ -8,4 +8,4 @@ __url__ = 'http://camelot-py.readthedocs.io/' __version__ = '.'.join(map(str, VERSION)) __author__ = 'Vinayak Mehta' __author_email__ = 'vmehta94@gmail.com' -__license__ = 'MIT License' \ No newline at end of file +__license__ = 'MIT License' diff --git a/camelot/cli.py b/camelot/cli.py index 2c187ba..6a7a08b 100644 --- a/camelot/cli.py +++ b/camelot/cli.py @@ -2,17 +2,18 @@ import logging -logger = logging.getLogger('camelot') -logger.setLevel(logging.INFO) - import click from . import __version__ from .io import read_pdf +logger = logging.getLogger('camelot') +logger.setLevel(logging.INFO) + + class Config(object): - def __init__(self): + def __init__(self): self.config = {} def set_config(self, key, value): @@ -152,4 +153,4 @@ def stream(c, *args, **kwargs): raise click.UsageError('Please specify output file path using --output') if f is None: raise click.UsageError('Please specify output file format using --format') - tables.export(output, f=f, compress=compress) \ No newline at end of file + tables.export(output, f=f, compress=compress) diff --git a/camelot/core.py b/camelot/core.py index 2dfe445..0a1898c 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import os -import json import zipfile import tempfile @@ -519,4 +518,4 @@ class TableList(object): if compress: zipname = os.path.join(os.path.dirname(path), root) + '.zip' with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z: - z.write(filepath, os.path.basename(filepath)) \ No newline at end of file + z.write(filepath, os.path.basename(filepath)) diff --git a/camelot/handlers.py b/camelot/handlers.py index c557584..d50f313 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -145,4 +145,4 @@ class PDFHandler(object): for p in pages: t = parser.extract_tables(p) tables.extend(t) - return TableList(tables) \ No newline at end of file + return TableList(tables) diff --git a/camelot/image_processing.py b/camelot/image_processing.py index 5f7b247..eb23101 100644 --- a/camelot/image_processing.py +++ b/camelot/image_processing.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import division -from itertools import groupby -from operator import itemgetter import cv2 import numpy as np @@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if process_background: - threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + threshold = cv2.adaptiveThreshold( + gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c) else: - threshold = cv2.adaptiveThreshold(np.invert(gray), 255, + threshold = cv2.adaptiveThreshold( + np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c) return img, threshold @@ -197,4 +197,4 @@ def find_table_joints(contours, vertical, horizontal): joint_coords.append((c1, c2)) tables[(x, y + h, x + w, y)] = joint_coords - return tables \ No newline at end of file + return tables diff --git a/camelot/io.py b/camelot/io.py index bdbcc69..90adc96 100644 --- a/camelot/io.py +++ b/camelot/io.py @@ -89,4 +89,4 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs): p = PDFHandler(filepath, pages) kwargs = remove_extra(kwargs, flavor=flavor) tables = p.parse(flavor=flavor, **kwargs) - return tables \ No newline at end of file + return tables diff --git a/camelot/parsers/__init__.py b/camelot/parsers/__init__.py index 9366b78..5cc6605 100644 --- a/camelot/parsers/__init__.py +++ b/camelot/parsers/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from .stream import Stream -from .lattice import Lattice \ No newline at end of file +from .lattice import Lattice diff --git a/camelot/parsers/base.py b/camelot/parsers/base.py index bd3de99..ebc4564 100644 --- a/camelot/parsers/base.py +++ b/camelot/parsers/base.py @@ -18,4 +18,4 @@ class BaseParser(object): self.horizontal_text = get_text_objects(self.layout, ltype="lh") self.vertical_text = get_text_objects(self.layout, ltype="lv") self.pdf_width, self.pdf_height = self.dimensions - self.rootname, __ = os.path.splitext(self.filename) \ No newline at end of file + self.rootname, __ = os.path.splitext(self.filename) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index b75da6a..e3c0a6d 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -201,8 +201,9 @@ class Lattice(BaseParser): if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower(): gs_call.insert(0, 'gs') else: - gs_call.insert(0, 'gsc') - subprocess.call(gs_call, stdout=open(os.devnull, 'w'), + gs_call.insert(0, "gsc") + subprocess.call( + gs_call, stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT) def _generate_table_bbox(self): @@ -339,10 +340,10 @@ class Lattice(BaseParser): _tables = [] # sort tables based on y-coord - for table_idx, tk in enumerate(sorted(self.table_bbox.keys(), - key=lambda x: x[1], reverse=True)): + for table_idx, tk in enumerate(sorted( + self.table_bbox.keys(), key=lambda x: x[1], reverse=True)): cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk) table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s) _tables.append(table) - return _tables \ No newline at end of file + return _tables diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index d78743a..2792d82 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -116,7 +116,7 @@ class Stream(BaseParser): row_y = t.y0 temp.append(t) rows.append(sorted(temp, key=lambda t: t.x0)) - __ = rows.pop(0) # hacky + __ = rows.pop(0) # hacky return rows @staticmethod @@ -211,7 +211,7 @@ class Stream(BaseParser): text = Stream._group_rows(text, row_close_tol=row_close_tol) elements = [len(r) for r in text] new_cols = [(t.x0, t.x1) - for r in text if len(r) == max(elements) for t in r] + for r in text if len(r) == max(elements) for t in r] cols.extend(Stream._merge_columns(sorted(new_cols))) return cols @@ -357,10 +357,10 @@ class Stream(BaseParser): _tables = [] # sort tables based on y-coord - for table_idx, tk in enumerate(sorted(self.table_bbox.keys(), - key=lambda x: x[1], reverse=True)): + for table_idx, tk in enumerate(sorted( + self.table_bbox.keys(), key=lambda x: x[1], reverse=True)): cols, rows = self._generate_columns_and_rows(table_idx, tk) table = self._generate_table(table_idx, cols, rows) _tables.append(table) - return _tables \ No newline at end of file + return _tables diff --git a/camelot/plotting.py b/camelot/plotting.py index b3cf1b5..bef06f2 100644 --- a/camelot/plotting.py +++ b/camelot/plotting.py @@ -41,16 +41,16 @@ def plot_table(table): for cell in row: if cell.left: plt.plot([cell.lb[0], cell.lt[0]], - [cell.lb[1], cell.lt[1]]) + [cell.lb[1], cell.lt[1]]) if cell.right: plt.plot([cell.rb[0], cell.rt[0]], - [cell.rb[1], cell.rt[1]]) + [cell.rb[1], cell.rt[1]]) if cell.top: plt.plot([cell.lt[0], cell.rt[0]], - [cell.lt[1], cell.rt[1]]) + [cell.lt[1], cell.rt[1]]) if cell.bottom: plt.plot([cell.lb[0], cell.rb[0]], - [cell.lb[1], cell.rb[1]]) + [cell.lb[1], cell.rb[1]]) plt.show() @@ -105,4 +105,4 @@ def plot_line(segments): plt.plot([v[0], v[2]], [v[1], v[3]]) for h in horizontal: plt.plot([h[0], h[2]], [h[1], h[3]]) - plt.show() \ No newline at end of file + plt.show() diff --git a/camelot/utils.py b/camelot/utils.py index b42b4f8..2d735c8 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -1,5 +1,4 @@ from __future__ import division -import os import shutil import tempfile import warnings @@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage from pdfminer.pdfpage import PDFTextExtractionNotAllowed from pdfminer.pdfinterp import PDFResourceManager from pdfminer.pdfinterp import PDFPageInterpreter -from pdfminer.pdfdevice import PDFDevice from pdfminer.converter import PDFPageAggregator from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal, LTTextLineVertical) @@ -278,8 +276,8 @@ def text_in_bbox(bbox, text): lb = (bbox[0], bbox[1]) rt = (bbox[2], bbox[3]) t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0 - <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0 - <= rt[1] + 2] + <= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0 + <= rt[1] + 2] return t_bbox @@ -640,4 +638,4 @@ def get_text_objects(layout, ltype="char", t=None): t += get_text_objects(obj, ltype=ltype) except AttributeError: pass - return t \ No newline at end of file + return t diff --git a/docs/_themes/flask_theme_support.py b/docs/_themes/flask_theme_support.py index 8a3278e..fde6274 100644 --- a/docs/_themes/flask_theme_support.py +++ b/docs/_themes/flask_theme_support.py @@ -10,21 +10,21 @@ class FlaskyStyle(Style): styles = { # No corresponding class for the following: - #Text: "", # class: '' - Whitespace: "underline #f8f8f8", # class: 'w' - Error: "#a40000 border:#ef2929", # class: 'err' - Other: "#000000", # class 'x' + # Text: "", # class: '' + Whitespace: "underline #f8f8f8", # class: 'w' + Error: "#a40000 border:#ef2929", # class: 'err' + Other: "#000000", # class 'x' - Comment: "italic #8f5902", # class: 'c' - Comment.Preproc: "noitalic", # class: 'cp' + Comment: "italic #8f5902", # class: 'c' + Comment.Preproc: "noitalic", # class: 'cp' - Keyword: "bold #004461", # class: 'k' - Keyword.Constant: "bold #004461", # class: 'kc' - Keyword.Declaration: "bold #004461", # class: 'kd' - Keyword.Namespace: "bold #004461", # class: 'kn' - Keyword.Pseudo: "bold #004461", # class: 'kp' - Keyword.Reserved: "bold #004461", # class: 'kr' - Keyword.Type: "bold #004461", # class: 'kt' + Keyword: "bold #004461", # class: 'k' + Keyword.Constant: "bold #004461", # class: 'kc' + Keyword.Declaration: "bold #004461", # class: 'kd' + Keyword.Namespace: "bold #004461", # class: 'kn' + Keyword.Pseudo: "bold #004461", # class: 'kp' + Keyword.Reserved: "bold #004461", # class: 'kr' + Keyword.Type: "bold #004461", # class: 'kt' Operator: "#582800", # class: 'o' Operator.Word: "bold #004461", # class: 'ow' - like keywords @@ -34,53 +34,53 @@ class FlaskyStyle(Style): # because special names such as Name.Class, Name.Function, etc. # are not recognized as such later in the parsing, we choose them # to look the same as ordinary variables. - Name: "#000000", # class: 'n' - Name.Attribute: "#c4a000", # class: 'na' - to be revised - Name.Builtin: "#004461", # class: 'nb' - Name.Builtin.Pseudo: "#3465a4", # class: 'bp' - Name.Class: "#000000", # class: 'nc' - to be revised - Name.Constant: "#000000", # class: 'no' - to be revised - Name.Decorator: "#888", # class: 'nd' - to be revised - Name.Entity: "#ce5c00", # class: 'ni' - Name.Exception: "bold #cc0000", # class: 'ne' - Name.Function: "#000000", # class: 'nf' - Name.Property: "#000000", # class: 'py' - Name.Label: "#f57900", # class: 'nl' - Name.Namespace: "#000000", # class: 'nn' - to be revised - Name.Other: "#000000", # class: 'nx' - Name.Tag: "bold #004461", # class: 'nt' - like a keyword - Name.Variable: "#000000", # class: 'nv' - to be revised - Name.Variable.Class: "#000000", # class: 'vc' - to be revised - Name.Variable.Global: "#000000", # class: 'vg' - to be revised - Name.Variable.Instance: "#000000", # class: 'vi' - to be revised + Name: "#000000", # class: 'n' + Name.Attribute: "#c4a000", # class: 'na' - to be revised + Name.Builtin: "#004461", # class: 'nb' + Name.Builtin.Pseudo: "#3465a4", # class: 'bp' + Name.Class: "#000000", # class: 'nc' - to be revised + Name.Constant: "#000000", # class: 'no' - to be revised + Name.Decorator: "#888", # class: 'nd' - to be revised + Name.Entity: "#ce5c00", # class: 'ni' + Name.Exception: "bold #cc0000", # class: 'ne' + Name.Function: "#000000", # class: 'nf' + Name.Property: "#000000", # class: 'py' + Name.Label: "#f57900", # class: 'nl' + Name.Namespace: "#000000", # class: 'nn' - to be revised + Name.Other: "#000000", # class: 'nx' + Name.Tag: "bold #004461", # class: 'nt' - like a keyword + Name.Variable: "#000000", # class: 'nv' - to be revised + Name.Variable.Class: "#000000", # class: 'vc' - to be revised + Name.Variable.Global: "#000000", # class: 'vg' - to be revised + Name.Variable.Instance: "#000000", # class: 'vi' - to be revised - Number: "#990000", # class: 'm' + Number: "#990000", # class: 'm' - Literal: "#000000", # class: 'l' - Literal.Date: "#000000", # class: 'ld' + Literal: "#000000", # class: 'l' + Literal.Date: "#000000", # class: 'ld' - String: "#4e9a06", # class: 's' - String.Backtick: "#4e9a06", # class: 'sb' - String.Char: "#4e9a06", # class: 'sc' - String.Doc: "italic #8f5902", # class: 'sd' - like a comment - String.Double: "#4e9a06", # class: 's2' - String.Escape: "#4e9a06", # class: 'se' - String.Heredoc: "#4e9a06", # class: 'sh' - String.Interpol: "#4e9a06", # class: 'si' - String.Other: "#4e9a06", # class: 'sx' - String.Regex: "#4e9a06", # class: 'sr' - String.Single: "#4e9a06", # class: 's1' - String.Symbol: "#4e9a06", # class: 'ss' + String: "#4e9a06", # class: 's' + String.Backtick: "#4e9a06", # class: 'sb' + String.Char: "#4e9a06", # class: 'sc' + String.Doc: "italic #8f5902", # class: 'sd' - like a comment + String.Double: "#4e9a06", # class: 's2' + String.Escape: "#4e9a06", # class: 'se' + String.Heredoc: "#4e9a06", # class: 'sh' + String.Interpol: "#4e9a06", # class: 'si' + String.Other: "#4e9a06", # class: 'sx' + String.Regex: "#4e9a06", # class: 'sr' + String.Single: "#4e9a06", # class: 's1' + String.Symbol: "#4e9a06", # class: 'ss' - Generic: "#000000", # class: 'g' - Generic.Deleted: "#a40000", # class: 'gd' - Generic.Emph: "italic #000000", # class: 'ge' - Generic.Error: "#ef2929", # class: 'gr' - Generic.Heading: "bold #000080", # class: 'gh' - Generic.Inserted: "#00A000", # class: 'gi' - Generic.Output: "#888", # class: 'go' - Generic.Prompt: "#745334", # class: 'gp' - Generic.Strong: "bold #000000", # class: 'gs' - Generic.Subheading: "bold #800080", # class: 'gu' - Generic.Traceback: "bold #a40000", # class: 'gt' - } \ No newline at end of file + Generic: "#000000", # class: 'g' + Generic.Deleted: "#a40000", # class: 'gd' + Generic.Emph: "italic #000000", # class: 'ge' + Generic.Error: "#ef2929", # class: 'gr' + Generic.Heading: "bold #000080", # class: 'gh' + Generic.Inserted: "#00A000", # class: 'gi' + Generic.Output: "#888", # class: 'go' + Generic.Prompt: "#745334", # class: 'gp' + Generic.Strong: "bold #000000", # class: 'gs' + Generic.Subheading: "bold #800080", # class: 'gu' + Generic.Traceback: "bold #a40000", # class: 'gt' + } diff --git a/docs/conf.py b/docs/conf.py index ff195c8..c649055 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -358,4 +358,4 @@ texinfo_documents = [ intersphinx_mapping = { 'https://docs.python.org/2': None, 'http://pandas.pydata.org/pandas-docs/stable': None -} \ No newline at end of file +} diff --git a/setup.py b/setup.py index 8d5fe82..02a8199 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ import os from setuptools import find_packages -from pkg_resources import parse_version here = os.path.abspath(os.path.dirname(__file__)) @@ -56,11 +55,11 @@ def setup_package(): try: from setuptools import setup - except: + except ImportError: from distutils.core import setup setup(**metadata) if __name__ == '__main__': - setup_package() \ No newline at end of file + setup_package() diff --git a/tests/data.py b/tests/data.py index 8642e8a..3a04d24 100755 --- a/tests/data.py +++ b/tests/data.py @@ -373,4 +373,4 @@ data_lattice_shift_text_right_bottom = [ ["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"], ["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"], ["Knowledge &Practices on HTN &DM", "2400", "Women (≥ 18 yrs)", "-", "-", "-", "1728"] -] \ No newline at end of file +] diff --git a/tests/test_cli.py b/tests/test_cli.py index da572dd..b89eab3 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -76,4 +76,4 @@ def test_cli_output_format(): # zip result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'), 'stream', infile]) - assert result.exit_code == 0 \ No newline at end of file + assert result.exit_code == 0 diff --git a/tests/test_common.py b/tests/test_common.py index d18090d..7039657 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -82,8 +82,8 @@ def test_stream_flag_size(): def test_lattice(): df = pd.DataFrame(data_lattice) - filename = os.path.join(testdir, - "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf") + filename = os.path.join( + testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf") tables = camelot.read_pdf(filename, pages="2") assert df.equals(tables[0].df) @@ -137,4 +137,4 @@ def test_lattice_shift_text(): assert df_disable.equals(tables[0].df) tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b']) - assert df_rb.equals(tables[0].df) \ No newline at end of file + assert df_rb.equals(tables[0].df) diff --git a/tests/test_errors.py b/tests/test_errors.py index c8a1f98..a6ac35a 100755 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -50,4 +50,4 @@ def test_no_tables_found(): tables = camelot.read_pdf(filename) except Exception as e: assert type(e).__name__ == 'UserWarning' - assert str(e) == 'No tables found on page-1' \ No newline at end of file + assert str(e) == 'No tables found on page-1'