[MRG + 1] Make pep8 (#125)

* Make setup.py pep8

Add new line at end of file, fix bare except, remove unused import.

* Make tests/*.py pep8

Add some newlines at and of files and a visual indent.

* Make docs/*.py pep8

Fix block comments and add new lines at end of files.

* Make camelot/*.py pep8

Fixed unused import, a few weirdly ordered imports, a docstring typo and  many new lines at the end of lines.

* Fix imports

Fix import order and remove a couple more unused imports.

* Fix indents

Fix indentation (no opening delimiter alignment).

* Add newlines
pull/2/head
Oshawk 2018-10-05 12:25:43 +01:00 committed by Vinayak Mehta
parent 6e8079df84
commit 90aaba6eec
20 changed files with 107 additions and 111 deletions

View File

@ -2,6 +2,9 @@
import logging
from .__version__ import __version__
from .io import read_pdf
# set up logging
logger = logging.getLogger('camelot')
@ -12,8 +15,3 @@ handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)
from .__version__ import __version__
from .io import read_pdf

View File

@ -2,17 +2,18 @@
import logging
logger = logging.getLogger('camelot')
logger.setLevel(logging.INFO)
import click
from . import __version__
from .io import read_pdf
logger = logging.getLogger('camelot')
logger.setLevel(logging.INFO)
class Config(object):
def __init__(self):
def __init__(self):
self.config = {}
def set_config(self, key, value):

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
import os
import json
import zipfile
import tempfile

View File

@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import division
from itertools import groupby
from operator import itemgetter
import cv2
import numpy as np
@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
if process_background:
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
threshold = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, blocksize, c)
else:
threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
threshold = cv2.adaptiveThreshold(
np.invert(gray), 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
return img, threshold

View File

@ -201,8 +201,9 @@ class Lattice(BaseParser):
if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
gs_call.insert(0, 'gs')
else:
gs_call.insert(0, 'gsc')
subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
gs_call.insert(0, "gsc")
subprocess.call(
gs_call, stdout=open(os.devnull, 'w'),
stderr=subprocess.STDOUT)
def _generate_table_bbox(self):
@ -339,8 +340,8 @@ class Lattice(BaseParser):
_tables = []
# sort tables based on y-coord
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
key=lambda x: x[1], reverse=True)):
for table_idx, tk in enumerate(sorted(
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
_tables.append(table)

View File

@ -116,7 +116,7 @@ class Stream(BaseParser):
row_y = t.y0
temp.append(t)
rows.append(sorted(temp, key=lambda t: t.x0))
__ = rows.pop(0) # hacky
__ = rows.pop(0) # hacky
return rows
@staticmethod
@ -211,7 +211,7 @@ class Stream(BaseParser):
text = Stream._group_rows(text, row_close_tol=row_close_tol)
elements = [len(r) for r in text]
new_cols = [(t.x0, t.x1)
for r in text if len(r) == max(elements) for t in r]
for r in text if len(r) == max(elements) for t in r]
cols.extend(Stream._merge_columns(sorted(new_cols)))
return cols
@ -357,8 +357,8 @@ class Stream(BaseParser):
_tables = []
# sort tables based on y-coord
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
key=lambda x: x[1], reverse=True)):
for table_idx, tk in enumerate(sorted(
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
cols, rows = self._generate_columns_and_rows(table_idx, tk)
table = self._generate_table(table_idx, cols, rows)
_tables.append(table)

View File

@ -41,16 +41,16 @@ def plot_table(table):
for cell in row:
if cell.left:
plt.plot([cell.lb[0], cell.lt[0]],
[cell.lb[1], cell.lt[1]])
[cell.lb[1], cell.lt[1]])
if cell.right:
plt.plot([cell.rb[0], cell.rt[0]],
[cell.rb[1], cell.rt[1]])
[cell.rb[1], cell.rt[1]])
if cell.top:
plt.plot([cell.lt[0], cell.rt[0]],
[cell.lt[1], cell.rt[1]])
[cell.lt[1], cell.rt[1]])
if cell.bottom:
plt.plot([cell.lb[0], cell.rb[0]],
[cell.lb[1], cell.rb[1]])
[cell.lb[1], cell.rb[1]])
plt.show()

View File

@ -1,5 +1,4 @@
from __future__ import division
import os
import shutil
import tempfile
import warnings
@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
LTTextLineVertical)
@ -278,8 +276,8 @@ def text_in_bbox(bbox, text):
lb = (bbox[0], bbox[1])
rt = (bbox[2], bbox[3])
t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
<= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
<= rt[1] + 2]
<= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
<= rt[1] + 2]
return t_bbox

View File

@ -10,21 +10,21 @@ class FlaskyStyle(Style):
styles = {
# No corresponding class for the following:
#Text: "", # class: ''
Whitespace: "underline #f8f8f8", # class: 'w'
Error: "#a40000 border:#ef2929", # class: 'err'
Other: "#000000", # class 'x'
# Text: "", # class: ''
Whitespace: "underline #f8f8f8", # class: 'w'
Error: "#a40000 border:#ef2929", # class: 'err'
Other: "#000000", # class 'x'
Comment: "italic #8f5902", # class: 'c'
Comment.Preproc: "noitalic", # class: 'cp'
Comment: "italic #8f5902", # class: 'c'
Comment.Preproc: "noitalic", # class: 'cp'
Keyword: "bold #004461", # class: 'k'
Keyword.Constant: "bold #004461", # class: 'kc'
Keyword.Declaration: "bold #004461", # class: 'kd'
Keyword.Namespace: "bold #004461", # class: 'kn'
Keyword.Pseudo: "bold #004461", # class: 'kp'
Keyword.Reserved: "bold #004461", # class: 'kr'
Keyword.Type: "bold #004461", # class: 'kt'
Keyword: "bold #004461", # class: 'k'
Keyword.Constant: "bold #004461", # class: 'kc'
Keyword.Declaration: "bold #004461", # class: 'kd'
Keyword.Namespace: "bold #004461", # class: 'kn'
Keyword.Pseudo: "bold #004461", # class: 'kp'
Keyword.Reserved: "bold #004461", # class: 'kr'
Keyword.Type: "bold #004461", # class: 'kt'
Operator: "#582800", # class: 'o'
Operator.Word: "bold #004461", # class: 'ow' - like keywords
@ -34,53 +34,53 @@ class FlaskyStyle(Style):
# because special names such as Name.Class, Name.Function, etc.
# are not recognized as such later in the parsing, we choose them
# to look the same as ordinary variables.
Name: "#000000", # class: 'n'
Name.Attribute: "#c4a000", # class: 'na' - to be revised
Name.Builtin: "#004461", # class: 'nb'
Name.Builtin.Pseudo: "#3465a4", # class: 'bp'
Name.Class: "#000000", # class: 'nc' - to be revised
Name.Constant: "#000000", # class: 'no' - to be revised
Name.Decorator: "#888", # class: 'nd' - to be revised
Name.Entity: "#ce5c00", # class: 'ni'
Name.Exception: "bold #cc0000", # class: 'ne'
Name.Function: "#000000", # class: 'nf'
Name.Property: "#000000", # class: 'py'
Name.Label: "#f57900", # class: 'nl'
Name.Namespace: "#000000", # class: 'nn' - to be revised
Name.Other: "#000000", # class: 'nx'
Name.Tag: "bold #004461", # class: 'nt' - like a keyword
Name.Variable: "#000000", # class: 'nv' - to be revised
Name.Variable.Class: "#000000", # class: 'vc' - to be revised
Name.Variable.Global: "#000000", # class: 'vg' - to be revised
Name.Variable.Instance: "#000000", # class: 'vi' - to be revised
Name: "#000000", # class: 'n'
Name.Attribute: "#c4a000", # class: 'na' - to be revised
Name.Builtin: "#004461", # class: 'nb'
Name.Builtin.Pseudo: "#3465a4", # class: 'bp'
Name.Class: "#000000", # class: 'nc' - to be revised
Name.Constant: "#000000", # class: 'no' - to be revised
Name.Decorator: "#888", # class: 'nd' - to be revised
Name.Entity: "#ce5c00", # class: 'ni'
Name.Exception: "bold #cc0000", # class: 'ne'
Name.Function: "#000000", # class: 'nf'
Name.Property: "#000000", # class: 'py'
Name.Label: "#f57900", # class: 'nl'
Name.Namespace: "#000000", # class: 'nn' - to be revised
Name.Other: "#000000", # class: 'nx'
Name.Tag: "bold #004461", # class: 'nt' - like a keyword
Name.Variable: "#000000", # class: 'nv' - to be revised
Name.Variable.Class: "#000000", # class: 'vc' - to be revised
Name.Variable.Global: "#000000", # class: 'vg' - to be revised
Name.Variable.Instance: "#000000", # class: 'vi' - to be revised
Number: "#990000", # class: 'm'
Number: "#990000", # class: 'm'
Literal: "#000000", # class: 'l'
Literal.Date: "#000000", # class: 'ld'
Literal: "#000000", # class: 'l'
Literal.Date: "#000000", # class: 'ld'
String: "#4e9a06", # class: 's'
String.Backtick: "#4e9a06", # class: 'sb'
String.Char: "#4e9a06", # class: 'sc'
String.Doc: "italic #8f5902", # class: 'sd' - like a comment
String.Double: "#4e9a06", # class: 's2'
String.Escape: "#4e9a06", # class: 'se'
String.Heredoc: "#4e9a06", # class: 'sh'
String.Interpol: "#4e9a06", # class: 'si'
String.Other: "#4e9a06", # class: 'sx'
String.Regex: "#4e9a06", # class: 'sr'
String.Single: "#4e9a06", # class: 's1'
String.Symbol: "#4e9a06", # class: 'ss'
String: "#4e9a06", # class: 's'
String.Backtick: "#4e9a06", # class: 'sb'
String.Char: "#4e9a06", # class: 'sc'
String.Doc: "italic #8f5902", # class: 'sd' - like a comment
String.Double: "#4e9a06", # class: 's2'
String.Escape: "#4e9a06", # class: 'se'
String.Heredoc: "#4e9a06", # class: 'sh'
String.Interpol: "#4e9a06", # class: 'si'
String.Other: "#4e9a06", # class: 'sx'
String.Regex: "#4e9a06", # class: 'sr'
String.Single: "#4e9a06", # class: 's1'
String.Symbol: "#4e9a06", # class: 'ss'
Generic: "#000000", # class: 'g'
Generic.Deleted: "#a40000", # class: 'gd'
Generic.Emph: "italic #000000", # class: 'ge'
Generic.Error: "#ef2929", # class: 'gr'
Generic.Heading: "bold #000080", # class: 'gh'
Generic.Inserted: "#00A000", # class: 'gi'
Generic.Output: "#888", # class: 'go'
Generic.Prompt: "#745334", # class: 'gp'
Generic.Strong: "bold #000000", # class: 'gs'
Generic.Subheading: "bold #800080", # class: 'gu'
Generic.Traceback: "bold #a40000", # class: 'gt'
Generic: "#000000", # class: 'g'
Generic.Deleted: "#a40000", # class: 'gd'
Generic.Emph: "italic #000000", # class: 'ge'
Generic.Error: "#ef2929", # class: 'gr'
Generic.Heading: "bold #000080", # class: 'gh'
Generic.Inserted: "#00A000", # class: 'gi'
Generic.Output: "#888", # class: 'go'
Generic.Prompt: "#745334", # class: 'gp'
Generic.Strong: "bold #000000", # class: 'gs'
Generic.Subheading: "bold #800080", # class: 'gu'
Generic.Traceback: "bold #a40000", # class: 'gt'
}

View File

@ -2,7 +2,6 @@
import os
from setuptools import find_packages
from pkg_resources import parse_version
here = os.path.abspath(os.path.dirname(__file__))
@ -56,7 +55,7 @@ def setup_package():
try:
from setuptools import setup
except:
except ImportError:
from distutils.core import setup
setup(**metadata)

View File

@ -82,8 +82,8 @@ def test_stream_flag_size():
def test_lattice():
df = pd.DataFrame(data_lattice)
filename = os.path.join(testdir,
"tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
filename = os.path.join(
testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
tables = camelot.read_pdf(filename, pages="2")
assert df.equals(tables[0].df)