[MRG + 1] Make pep8 (#125)
* Make setup.py pep8 Add new line at end of file, fix bare except, remove unused import. * Make tests/*.py pep8 Add some newlines at and of files and a visual indent. * Make docs/*.py pep8 Fix block comments and add new lines at end of files. * Make camelot/*.py pep8 Fixed unused import, a few weirdly ordered imports, a docstring typo and many new lines at the end of lines. * Fix imports Fix import order and remove a couple more unused imports. * Fix indents Fix indentation (no opening delimiter alignment). * Add newlinespull/2/head
parent
6e8079df84
commit
90aaba6eec
|
|
@ -2,6 +2,9 @@
|
|||
|
||||
import logging
|
||||
|
||||
from .__version__ import __version__
|
||||
from .io import read_pdf
|
||||
|
||||
|
||||
# set up logging
|
||||
logger = logging.getLogger('camelot')
|
||||
|
|
@ -12,8 +15,3 @@ handler = logging.StreamHandler()
|
|||
handler.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
from .__version__ import __version__
|
||||
|
||||
from .io import read_pdf
|
||||
|
|
@ -2,17 +2,18 @@
|
|||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger('camelot')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
import click
|
||||
|
||||
from . import __version__
|
||||
from .io import read_pdf
|
||||
|
||||
|
||||
logger = logging.getLogger('camelot')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
class Config(object):
|
||||
def __init__(self):
|
||||
def __init__(self):
|
||||
self.config = {}
|
||||
|
||||
def set_config(self, key, value):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import json
|
||||
import zipfile
|
||||
import tempfile
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import division
|
||||
from itertools import groupby
|
||||
from operator import itemgetter
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
|
@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
|
|||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
if process_background:
|
||||
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
threshold = cv2.adaptiveThreshold(
|
||||
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY, blocksize, c)
|
||||
else:
|
||||
threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
|
||||
threshold = cv2.adaptiveThreshold(
|
||||
np.invert(gray), 255,
|
||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
|
||||
return img, threshold
|
||||
|
||||
|
|
|
|||
|
|
@ -201,8 +201,9 @@ class Lattice(BaseParser):
|
|||
if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
|
||||
gs_call.insert(0, 'gs')
|
||||
else:
|
||||
gs_call.insert(0, 'gsc')
|
||||
subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
|
||||
gs_call.insert(0, "gsc")
|
||||
subprocess.call(
|
||||
gs_call, stdout=open(os.devnull, 'w'),
|
||||
stderr=subprocess.STDOUT)
|
||||
|
||||
def _generate_table_bbox(self):
|
||||
|
|
@ -339,8 +340,8 @@ class Lattice(BaseParser):
|
|||
|
||||
_tables = []
|
||||
# sort tables based on y-coord
|
||||
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
|
||||
key=lambda x: x[1], reverse=True)):
|
||||
for table_idx, tk in enumerate(sorted(
|
||||
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
|
||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
|
||||
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||
_tables.append(table)
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ class Stream(BaseParser):
|
|||
row_y = t.y0
|
||||
temp.append(t)
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
__ = rows.pop(0) # hacky
|
||||
__ = rows.pop(0) # hacky
|
||||
return rows
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -211,7 +211,7 @@ class Stream(BaseParser):
|
|||
text = Stream._group_rows(text, row_close_tol=row_close_tol)
|
||||
elements = [len(r) for r in text]
|
||||
new_cols = [(t.x0, t.x1)
|
||||
for r in text if len(r) == max(elements) for t in r]
|
||||
for r in text if len(r) == max(elements) for t in r]
|
||||
cols.extend(Stream._merge_columns(sorted(new_cols)))
|
||||
return cols
|
||||
|
||||
|
|
@ -357,8 +357,8 @@ class Stream(BaseParser):
|
|||
|
||||
_tables = []
|
||||
# sort tables based on y-coord
|
||||
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
|
||||
key=lambda x: x[1], reverse=True)):
|
||||
for table_idx, tk in enumerate(sorted(
|
||||
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
|
||||
cols, rows = self._generate_columns_and_rows(table_idx, tk)
|
||||
table = self._generate_table(table_idx, cols, rows)
|
||||
_tables.append(table)
|
||||
|
|
|
|||
|
|
@ -41,16 +41,16 @@ def plot_table(table):
|
|||
for cell in row:
|
||||
if cell.left:
|
||||
plt.plot([cell.lb[0], cell.lt[0]],
|
||||
[cell.lb[1], cell.lt[1]])
|
||||
[cell.lb[1], cell.lt[1]])
|
||||
if cell.right:
|
||||
plt.plot([cell.rb[0], cell.rt[0]],
|
||||
[cell.rb[1], cell.rt[1]])
|
||||
[cell.rb[1], cell.rt[1]])
|
||||
if cell.top:
|
||||
plt.plot([cell.lt[0], cell.rt[0]],
|
||||
[cell.lt[1], cell.rt[1]])
|
||||
[cell.lt[1], cell.rt[1]])
|
||||
if cell.bottom:
|
||||
plt.plot([cell.lb[0], cell.rb[0]],
|
||||
[cell.lb[1], cell.rb[1]])
|
||||
[cell.lb[1], cell.rb[1]])
|
||||
plt.show()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from __future__ import division
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import warnings
|
||||
|
|
@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage
|
|||
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
|
||||
from pdfminer.pdfinterp import PDFResourceManager
|
||||
from pdfminer.pdfinterp import PDFPageInterpreter
|
||||
from pdfminer.pdfdevice import PDFDevice
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
|
||||
LTTextLineVertical)
|
||||
|
|
@ -278,8 +276,8 @@ def text_in_bbox(bbox, text):
|
|||
lb = (bbox[0], bbox[1])
|
||||
rt = (bbox[2], bbox[3])
|
||||
t_bbox = [t for t in text if lb[0] - 2 <= (t.x0 + t.x1) / 2.0
|
||||
<= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
|
||||
<= rt[1] + 2]
|
||||
<= rt[0] + 2 and lb[1] - 2 <= (t.y0 + t.y1) / 2.0
|
||||
<= rt[1] + 2]
|
||||
return t_bbox
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -10,21 +10,21 @@ class FlaskyStyle(Style):
|
|||
|
||||
styles = {
|
||||
# No corresponding class for the following:
|
||||
#Text: "", # class: ''
|
||||
Whitespace: "underline #f8f8f8", # class: 'w'
|
||||
Error: "#a40000 border:#ef2929", # class: 'err'
|
||||
Other: "#000000", # class 'x'
|
||||
# Text: "", # class: ''
|
||||
Whitespace: "underline #f8f8f8", # class: 'w'
|
||||
Error: "#a40000 border:#ef2929", # class: 'err'
|
||||
Other: "#000000", # class 'x'
|
||||
|
||||
Comment: "italic #8f5902", # class: 'c'
|
||||
Comment.Preproc: "noitalic", # class: 'cp'
|
||||
Comment: "italic #8f5902", # class: 'c'
|
||||
Comment.Preproc: "noitalic", # class: 'cp'
|
||||
|
||||
Keyword: "bold #004461", # class: 'k'
|
||||
Keyword.Constant: "bold #004461", # class: 'kc'
|
||||
Keyword.Declaration: "bold #004461", # class: 'kd'
|
||||
Keyword.Namespace: "bold #004461", # class: 'kn'
|
||||
Keyword.Pseudo: "bold #004461", # class: 'kp'
|
||||
Keyword.Reserved: "bold #004461", # class: 'kr'
|
||||
Keyword.Type: "bold #004461", # class: 'kt'
|
||||
Keyword: "bold #004461", # class: 'k'
|
||||
Keyword.Constant: "bold #004461", # class: 'kc'
|
||||
Keyword.Declaration: "bold #004461", # class: 'kd'
|
||||
Keyword.Namespace: "bold #004461", # class: 'kn'
|
||||
Keyword.Pseudo: "bold #004461", # class: 'kp'
|
||||
Keyword.Reserved: "bold #004461", # class: 'kr'
|
||||
Keyword.Type: "bold #004461", # class: 'kt'
|
||||
|
||||
Operator: "#582800", # class: 'o'
|
||||
Operator.Word: "bold #004461", # class: 'ow' - like keywords
|
||||
|
|
@ -34,53 +34,53 @@ class FlaskyStyle(Style):
|
|||
# because special names such as Name.Class, Name.Function, etc.
|
||||
# are not recognized as such later in the parsing, we choose them
|
||||
# to look the same as ordinary variables.
|
||||
Name: "#000000", # class: 'n'
|
||||
Name.Attribute: "#c4a000", # class: 'na' - to be revised
|
||||
Name.Builtin: "#004461", # class: 'nb'
|
||||
Name.Builtin.Pseudo: "#3465a4", # class: 'bp'
|
||||
Name.Class: "#000000", # class: 'nc' - to be revised
|
||||
Name.Constant: "#000000", # class: 'no' - to be revised
|
||||
Name.Decorator: "#888", # class: 'nd' - to be revised
|
||||
Name.Entity: "#ce5c00", # class: 'ni'
|
||||
Name.Exception: "bold #cc0000", # class: 'ne'
|
||||
Name.Function: "#000000", # class: 'nf'
|
||||
Name.Property: "#000000", # class: 'py'
|
||||
Name.Label: "#f57900", # class: 'nl'
|
||||
Name.Namespace: "#000000", # class: 'nn' - to be revised
|
||||
Name.Other: "#000000", # class: 'nx'
|
||||
Name.Tag: "bold #004461", # class: 'nt' - like a keyword
|
||||
Name.Variable: "#000000", # class: 'nv' - to be revised
|
||||
Name.Variable.Class: "#000000", # class: 'vc' - to be revised
|
||||
Name.Variable.Global: "#000000", # class: 'vg' - to be revised
|
||||
Name.Variable.Instance: "#000000", # class: 'vi' - to be revised
|
||||
Name: "#000000", # class: 'n'
|
||||
Name.Attribute: "#c4a000", # class: 'na' - to be revised
|
||||
Name.Builtin: "#004461", # class: 'nb'
|
||||
Name.Builtin.Pseudo: "#3465a4", # class: 'bp'
|
||||
Name.Class: "#000000", # class: 'nc' - to be revised
|
||||
Name.Constant: "#000000", # class: 'no' - to be revised
|
||||
Name.Decorator: "#888", # class: 'nd' - to be revised
|
||||
Name.Entity: "#ce5c00", # class: 'ni'
|
||||
Name.Exception: "bold #cc0000", # class: 'ne'
|
||||
Name.Function: "#000000", # class: 'nf'
|
||||
Name.Property: "#000000", # class: 'py'
|
||||
Name.Label: "#f57900", # class: 'nl'
|
||||
Name.Namespace: "#000000", # class: 'nn' - to be revised
|
||||
Name.Other: "#000000", # class: 'nx'
|
||||
Name.Tag: "bold #004461", # class: 'nt' - like a keyword
|
||||
Name.Variable: "#000000", # class: 'nv' - to be revised
|
||||
Name.Variable.Class: "#000000", # class: 'vc' - to be revised
|
||||
Name.Variable.Global: "#000000", # class: 'vg' - to be revised
|
||||
Name.Variable.Instance: "#000000", # class: 'vi' - to be revised
|
||||
|
||||
Number: "#990000", # class: 'm'
|
||||
Number: "#990000", # class: 'm'
|
||||
|
||||
Literal: "#000000", # class: 'l'
|
||||
Literal.Date: "#000000", # class: 'ld'
|
||||
Literal: "#000000", # class: 'l'
|
||||
Literal.Date: "#000000", # class: 'ld'
|
||||
|
||||
String: "#4e9a06", # class: 's'
|
||||
String.Backtick: "#4e9a06", # class: 'sb'
|
||||
String.Char: "#4e9a06", # class: 'sc'
|
||||
String.Doc: "italic #8f5902", # class: 'sd' - like a comment
|
||||
String.Double: "#4e9a06", # class: 's2'
|
||||
String.Escape: "#4e9a06", # class: 'se'
|
||||
String.Heredoc: "#4e9a06", # class: 'sh'
|
||||
String.Interpol: "#4e9a06", # class: 'si'
|
||||
String.Other: "#4e9a06", # class: 'sx'
|
||||
String.Regex: "#4e9a06", # class: 'sr'
|
||||
String.Single: "#4e9a06", # class: 's1'
|
||||
String.Symbol: "#4e9a06", # class: 'ss'
|
||||
String: "#4e9a06", # class: 's'
|
||||
String.Backtick: "#4e9a06", # class: 'sb'
|
||||
String.Char: "#4e9a06", # class: 'sc'
|
||||
String.Doc: "italic #8f5902", # class: 'sd' - like a comment
|
||||
String.Double: "#4e9a06", # class: 's2'
|
||||
String.Escape: "#4e9a06", # class: 'se'
|
||||
String.Heredoc: "#4e9a06", # class: 'sh'
|
||||
String.Interpol: "#4e9a06", # class: 'si'
|
||||
String.Other: "#4e9a06", # class: 'sx'
|
||||
String.Regex: "#4e9a06", # class: 'sr'
|
||||
String.Single: "#4e9a06", # class: 's1'
|
||||
String.Symbol: "#4e9a06", # class: 'ss'
|
||||
|
||||
Generic: "#000000", # class: 'g'
|
||||
Generic.Deleted: "#a40000", # class: 'gd'
|
||||
Generic.Emph: "italic #000000", # class: 'ge'
|
||||
Generic.Error: "#ef2929", # class: 'gr'
|
||||
Generic.Heading: "bold #000080", # class: 'gh'
|
||||
Generic.Inserted: "#00A000", # class: 'gi'
|
||||
Generic.Output: "#888", # class: 'go'
|
||||
Generic.Prompt: "#745334", # class: 'gp'
|
||||
Generic.Strong: "bold #000000", # class: 'gs'
|
||||
Generic.Subheading: "bold #800080", # class: 'gu'
|
||||
Generic.Traceback: "bold #a40000", # class: 'gt'
|
||||
Generic: "#000000", # class: 'g'
|
||||
Generic.Deleted: "#a40000", # class: 'gd'
|
||||
Generic.Emph: "italic #000000", # class: 'ge'
|
||||
Generic.Error: "#ef2929", # class: 'gr'
|
||||
Generic.Heading: "bold #000080", # class: 'gh'
|
||||
Generic.Inserted: "#00A000", # class: 'gi'
|
||||
Generic.Output: "#888", # class: 'go'
|
||||
Generic.Prompt: "#745334", # class: 'gp'
|
||||
Generic.Strong: "bold #000000", # class: 'gs'
|
||||
Generic.Subheading: "bold #800080", # class: 'gu'
|
||||
Generic.Traceback: "bold #a40000", # class: 'gt'
|
||||
}
|
||||
3
setup.py
3
setup.py
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
import os
|
||||
from setuptools import find_packages
|
||||
from pkg_resources import parse_version
|
||||
|
||||
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
|
|
@ -56,7 +55,7 @@ def setup_package():
|
|||
|
||||
try:
|
||||
from setuptools import setup
|
||||
except:
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
|
||||
setup(**metadata)
|
||||
|
|
|
|||
|
|
@ -82,8 +82,8 @@ def test_stream_flag_size():
|
|||
def test_lattice():
|
||||
df = pd.DataFrame(data_lattice)
|
||||
|
||||
filename = os.path.join(testdir,
|
||||
"tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
|
||||
filename = os.path.join(
|
||||
testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
|
||||
tables = camelot.read_pdf(filename, pages="2")
|
||||
assert df.equals(tables[0].df)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue