[MRG + 1] Make pep8 (#125)

* Make setup.py pep8

Add new line at end of file, fix bare except, remove unused import.

* Make tests/*.py pep8

Add some newlines at and of files and a visual indent.

* Make docs/*.py pep8

Fix block comments and add new lines at end of files.

* Make camelot/*.py pep8

Fixed unused import, a few weirdly ordered imports, a docstring typo and  many new lines at the end of lines.

* Fix imports

Fix import order and remove a couple more unused imports.

* Fix indents

Fix indentation (no opening delimiter alignment).

* Add newlines
pull/2/head
Oshawk 2018-10-05 12:25:43 +01:00 committed by Vinayak Mehta
parent 6e8079df84
commit 90aaba6eec
20 changed files with 107 additions and 111 deletions

View File

@ -2,6 +2,9 @@
import logging
from .__version__ import __version__
from .io import read_pdf
# set up logging
logger = logging.getLogger('camelot')
@ -12,8 +15,3 @@ handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
from .__version__ import __version__
from .io import read_pdf

View File

@ -8,4 +8,4 @@ __url__ = 'http://camelot-py.readthedocs.io/'
__version__ = '.'.join(map(str, VERSION))
__author__ = 'Vinayak Mehta'
__author_email__ = 'vmehta94@gmail.com'
__license__ = 'MIT License'
__license__ = 'MIT License'

View File

@ -2,17 +2,18 @@
import logging
logger = logging.getLogger('camelot')
logger.setLevel(logging.INFO)
import click
from . import __version__
from .io import read_pdf
logger = logging.getLogger('camelot')
logger.setLevel(logging.INFO)
class Config(object):
def __init__(self):
def __init__(self):
self.config = {}
def set_config(self, key, value):
@ -152,4 +153,4 @@ def stream(c, *args, **kwargs):
raise click.UsageError('Please specify output file path using --output')
if f is None:
raise click.UsageError('Please specify output file format using --format')
tables.export(output, f=f, compress=compress)
tables.export(output, f=f, compress=compress)

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
import os
import json
import zipfile
import tempfile
@ -519,4 +518,4 @@ class TableList(object):
if compress:
zipname = os.path.join(os.path.dirname(path), root) + '.zip'
with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
z.write(filepath, os.path.basename(filepath))
z.write(filepath, os.path.basename(filepath))

View File

@ -145,4 +145,4 @@ class PDFHandler(object):
for p in pages:
t = parser.extract_tables(p)
tables.extend(t)
return TableList(tables)
return TableList(tables)

View File

@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import division
from itertools import groupby
from operator import itemgetter
import cv2
import numpy as np
@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
if process_background:
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
threshold = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, blocksize, c)
else:
threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
threshold = cv2.adaptiveThreshold(
np.invert(gray), 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
return img, threshold
@ -197,4 +197,4 @@ def find_table_joints(contours, vertical, horizontal):
joint_coords.append((c1, c2))
tables[(x, y + h, x + w, y)] = joint_coords
return tables
return tables

View File

@ -89,4 +89,4 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
return tables

View File

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
from .stream import Stream
from .lattice import Lattice
from .lattice import Lattice

View File

@ -18,4 +18,4 @@ class BaseParser(object):
self.horizontal_text = get_text_objects(self.layout, ltype="lh")
self.vertical_text = get_text_objects(self.layout, ltype="lv")
self.pdf_width, self.pdf_height = self.dimensions
self.rootname, __ = os.path.splitext(self.filename)
self.rootname, __ = os.path.splitext(self.filename)

View File

@ -201,8 +201,9 @@ class Lattice(BaseParser):
if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
gs_call.insert(0, 'gs')
else:
gs_call.insert(0, 'gsc')
subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
gs_call.insert(0, "gsc")
subprocess.call(
gs_call, stdout=open(os.devnull, 'w'),
stderr=subprocess.STDOUT)
def _generate_table_bbox(self):
@ -339,10 +340,10 @@ class Lattice(BaseParser):
_tables = []
# sort tables based on y-coord
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
key=lambda x: x[1], reverse=True)):
for table_idx, tk in enumerate(sorted(
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
_tables.append(table)
return _tables
return _tables

View File

@ -116,7 +116,7 @@ class Stream(BaseParser):
row_y = t.y0
temp.append(t)
rows.append(sorted(temp, key=lambda t: t.x0))
__ = rows.pop(0) # hacky
__ = rows.pop(0) # hacky
return rows
@staticmethod
@ -211,7 +211,7 @@ class Stream(BaseParser):
text = Stream._group_rows(text, row_close_tol=row_close_tol)
elements = [len(r) for r in text]
new_cols = [(t.x0, t.x1)
for r in text if len(r) == max(elements) for t in r]
for r in text if len(r) == max(elements) for t in r]
cols.extend(Stream._merge_columns(sorted(new_cols)))
return cols
@ -357,10 +357,10 @@ class Stream(BaseParser):
_tables = []
# sort tables based on y-coord
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
key=lambda x: x[1], reverse=True)):
for table_idx, tk in enumerate(sorted(
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
cols, rows = self._generate_columns_and_rows(table_idx, tk)
table = self._generate_table(table_idx, cols, rows)
_tables.append(table)
return _tables
return _tables

View File

@ -41,16 +41,16 @@ def plot_table(table):
for cell in row:
if cell.left:
plt.plot([cell.lb[0], cell.lt[0]],
[cell.lb[1], cell.lt[1]])
[cell.lb[1], cell.lt[1]])
if cell.right:
plt.plot([cell.rb[0], cell.rt[0]],
[cell.rb[1], cell.rt[1]])
[cell.rb[1], cell.rt[1]])
if cell.top:
plt.plot([cell.lt[0], cell.rt[0]],
[cell.lt[1], cell.rt[1]])
[cell.lt[1], cell.rt[1]])
if cell.bottom:
plt.plot([cell.lb[0], cell.rb[0]],
[cell.lb[1], cell.rb[1]])
[cell.lb[1], cell.rb[1]])
plt.show()
@ -105,4 +105,4 @@ def plot_line(segments):
plt.plot([v[0], v[2]], [v[1], v[3]])
for h in horizontal:
plt.plot([h[0], h[2]], [h[1], h[3]])
plt.show()
plt.show()

View File

@ -1,5 +1,4 @@
from __future__ import division
import os
import shutil
import tempfile
import warnings
@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
LTTextLineVertical)
@ -278,8 +276,8 @@ def text_in_bbox(bbox, text):
lb = (bbox[0], bbox[1])
rt = (bbox[2], bbox[3])
t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
<= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
<= rt[1] + 2]
<= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
<= rt[1] + 2]
return t_bbox
@ -640,4 +638,4 @@ def get_text_objects(layout, ltype="char", t=None):
t += get_text_objects(obj, ltype=ltype)
except AttributeError:
pass
return t
return t

View File

@ -10,21 +10,21 @@ class FlaskyStyle(Style):
styles = {
# No corresponding class for the following:
#Text: "", # class: ''
Whitespace: "underline #f8f8f8", # class: 'w'
Error: "#a40000 border:#ef2929", # class: 'err'
Other: "#000000", # class 'x'
# Text: "", # class: ''
Whitespace: "underline #f8f8f8", # class: 'w'
Error: "#a40000 border:#ef2929", # class: 'err'
Other: "#000000", # class 'x'
Comment: "italic #8f5902", # class: 'c'
Comment.Preproc: "noitalic", # class: 'cp'
Comment: "italic #8f5902", # class: 'c'
Comment.Preproc: "noitalic", # class: 'cp'
Keyword: "bold #004461", # class: 'k'
Keyword.Constant: "bold #004461", # class: 'kc'
Keyword.Declaration: "bold #004461", # class: 'kd'
Keyword.Namespace: "bold #004461", # class: 'kn'
Keyword.Pseudo: "bold #004461", # class: 'kp'
Keyword.Reserved: "bold #004461", # class: 'kr'
Keyword.Type: "bold #004461", # class: 'kt'
Keyword: "bold #004461", # class: 'k'
Keyword.Constant: "bold #004461", # class: 'kc'
Keyword.Declaration: "bold #004461", # class: 'kd'
Keyword.Namespace: "bold #004461", # class: 'kn'
Keyword.Pseudo: "bold #004461", # class: 'kp'
Keyword.Reserved: "bold #004461", # class: 'kr'
Keyword.Type: "bold #004461", # class: 'kt'
Operator: "#582800", # class: 'o'
Operator.Word: "bold #004461", # class: 'ow' - like keywords
@ -34,53 +34,53 @@ class FlaskyStyle(Style):
# because special names such as Name.Class, Name.Function, etc.
# are not recognized as such later in the parsing, we choose them
# to look the same as ordinary variables.
Name: "#000000", # class: 'n'
Name.Attribute: "#c4a000", # class: 'na' - to be revised
Name.Builtin: "#004461", # class: 'nb'
Name.Builtin.Pseudo: "#3465a4", # class: 'bp'
Name.Class: "#000000", # class: 'nc' - to be revised
Name.Constant: "#000000", # class: 'no' - to be revised
Name.Decorator: "#888", # class: 'nd' - to be revised
Name.Entity: "#ce5c00", # class: 'ni'
Name.Exception: "bold #cc0000", # class: 'ne'
Name.Function: "#000000", # class: 'nf'
Name.Property: "#000000", # class: 'py'
Name.Label: "#f57900", # class: 'nl'
Name.Namespace: "#000000", # class: 'nn' - to be revised
Name.Other: "#000000", # class: 'nx'
Name.Tag: "bold #004461", # class: 'nt' - like a keyword
Name.Variable: "#000000", # class: 'nv' - to be revised
Name.Variable.Class: "#000000", # class: 'vc' - to be revised
Name.Variable.Global: "#000000", # class: 'vg' - to be revised
Name.Variable.Instance: "#000000", # class: 'vi' - to be revised
Name: "#000000", # class: 'n'
Name.Attribute: "#c4a000", # class: 'na' - to be revised
Name.Builtin: "#004461", # class: 'nb'
Name.Builtin.Pseudo: "#3465a4", # class: 'bp'
Name.Class: "#000000", # class: 'nc' - to be revised
Name.Constant: "#000000", # class: 'no' - to be revised
Name.Decorator: "#888", # class: 'nd' - to be revised
Name.Entity: "#ce5c00", # class: 'ni'
Name.Exception: "bold #cc0000", # class: 'ne'
Name.Function: "#000000", # class: 'nf'
Name.Property: "#000000", # class: 'py'
Name.Label: "#f57900", # class: 'nl'
Name.Namespace: "#000000", # class: 'nn' - to be revised
Name.Other: "#000000", # class: 'nx'
Name.Tag: "bold #004461", # class: 'nt' - like a keyword
Name.Variable: "#000000", # class: 'nv' - to be revised
Name.Variable.Class: "#000000", # class: 'vc' - to be revised
Name.Variable.Global: "#000000", # class: 'vg' - to be revised
Name.Variable.Instance: "#000000", # class: 'vi' - to be revised
Number: "#990000", # class: 'm'
Number: "#990000", # class: 'm'
Literal: "#000000", # class: 'l'
Literal.Date: "#000000", # class: 'ld'
Literal: "#000000", # class: 'l'
Literal.Date: "#000000", # class: 'ld'
String: "#4e9a06", # class: 's'
String.Backtick: "#4e9a06", # class: 'sb'
String.Char: "#4e9a06", # class: 'sc'
String.Doc: "italic #8f5902", # class: 'sd' - like a comment
String.Double: "#4e9a06", # class: 's2'
String.Escape: "#4e9a06", # class: 'se'
String.Heredoc: "#4e9a06", # class: 'sh'
String.Interpol: "#4e9a06", # class: 'si'
String.Other: "#4e9a06", # class: 'sx'
String.Regex: "#4e9a06", # class: 'sr'
String.Single: "#4e9a06", # class: 's1'
String.Symbol: "#4e9a06", # class: 'ss'
String: "#4e9a06", # class: 's'
String.Backtick: "#4e9a06", # class: 'sb'
String.Char: "#4e9a06", # class: 'sc'
String.Doc: "italic #8f5902", # class: 'sd' - like a comment
String.Double: "#4e9a06", # class: 's2'
String.Escape: "#4e9a06", # class: 'se'
String.Heredoc: "#4e9a06", # class: 'sh'
String.Interpol: "#4e9a06", # class: 'si'
String.Other: "#4e9a06", # class: 'sx'
String.Regex: "#4e9a06", # class: 'sr'
String.Single: "#4e9a06", # class: 's1'
String.Symbol: "#4e9a06", # class: 'ss'
Generic: "#000000", # class: 'g'
Generic.Deleted: "#a40000", # class: 'gd'
Generic.Emph: "italic #000000", # class: 'ge'
Generic.Error: "#ef2929", # class: 'gr'
Generic.Heading: "bold #000080", # class: 'gh'
Generic.Inserted: "#00A000", # class: 'gi'
Generic.Output: "#888", # class: 'go'
Generic.Prompt: "#745334", # class: 'gp'
Generic.Strong: "bold #000000", # class: 'gs'
Generic.Subheading: "bold #800080", # class: 'gu'
Generic.Traceback: "bold #a40000", # class: 'gt'
}
Generic: "#000000", # class: 'g'
Generic.Deleted: "#a40000", # class: 'gd'
Generic.Emph: "italic #000000", # class: 'ge'
Generic.Error: "#ef2929", # class: 'gr'
Generic.Heading: "bold #000080", # class: 'gh'
Generic.Inserted: "#00A000", # class: 'gi'
Generic.Output: "#888", # class: 'go'
Generic.Prompt: "#745334", # class: 'gp'
Generic.Strong: "bold #000000", # class: 'gs'
Generic.Subheading: "bold #800080", # class: 'gu'
Generic.Traceback: "bold #a40000", # class: 'gt'
}

View File

@ -358,4 +358,4 @@ texinfo_documents = [
intersphinx_mapping = {
'https://docs.python.org/2': None,
'http://pandas.pydata.org/pandas-docs/stable': None
}
}

View File

@ -2,7 +2,6 @@
import os
from setuptools import find_packages
from pkg_resources import parse_version
here = os.path.abspath(os.path.dirname(__file__))
@ -56,11 +55,11 @@ def setup_package():
try:
from setuptools import setup
except:
except ImportError:
from distutils.core import setup
setup(**metadata)
if __name__ == '__main__':
setup_package()
setup_package()

View File

@ -373,4 +373,4 @@ data_lattice_shift_text_right_bottom = [
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
["Knowledge &Practices on HTN &DM", "2400", "Women (≥ 18 yrs)", "-", "-", "-", "1728"]
]
]

View File

@ -76,4 +76,4 @@ def test_cli_output_format():
# zip
result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
'stream', infile])
assert result.exit_code == 0
assert result.exit_code == 0

View File

@ -82,8 +82,8 @@ def test_stream_flag_size():
def test_lattice():
df = pd.DataFrame(data_lattice)
filename = os.path.join(testdir,
"tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
filename = os.path.join(
testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
tables = camelot.read_pdf(filename, pages="2")
assert df.equals(tables[0].df)
@ -137,4 +137,4 @@ def test_lattice_shift_text():
assert df_disable.equals(tables[0].df)
tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b'])
assert df_rb.equals(tables[0].df)
assert df_rb.equals(tables[0].df)

View File

@ -50,4 +50,4 @@ def test_no_tables_found():
tables = camelot.read_pdf(filename)
except Exception as e:
assert type(e).__name__ == 'UserWarning'
assert str(e) == 'No tables found on page-1'
assert str(e) == 'No tables found on page-1'