[MRG + 1] Make pep8 (#125)
* Make setup.py pep8 Add new line at end of file, fix bare except, remove unused import. * Make tests/*.py pep8 Add some newlines at and of files and a visual indent. * Make docs/*.py pep8 Fix block comments and add new lines at end of files. * Make camelot/*.py pep8 Fixed unused import, a few weirdly ordered imports, a docstring typo and many new lines at the end of lines. * Fix imports Fix import order and remove a couple more unused imports. * Fix indents Fix indentation (no opening delimiter alignment). * Add newlinespull/2/head
parent
6e8079df84
commit
90aaba6eec
|
|
@ -2,6 +2,9 @@
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from .__version__ import __version__
|
||||||
|
from .io import read_pdf
|
||||||
|
|
||||||
|
|
||||||
# set up logging
|
# set up logging
|
||||||
logger = logging.getLogger('camelot')
|
logger = logging.getLogger('camelot')
|
||||||
|
|
@ -12,8 +15,3 @@ handler = logging.StreamHandler()
|
||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
from .__version__ import __version__
|
|
||||||
|
|
||||||
from .io import read_pdf
|
|
||||||
|
|
@ -2,15 +2,16 @@
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logger = logging.getLogger('camelot')
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from .io import read_pdf
|
from .io import read_pdf
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger('camelot')
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
class Config(object):
|
class Config(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.config = {}
|
self.config = {}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import json
|
|
||||||
import zipfile
|
import zipfile
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from itertools import groupby
|
|
||||||
from operator import itemgetter
|
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
@ -40,10 +38,12 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
|
||||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||||
|
|
||||||
if process_background:
|
if process_background:
|
||||||
threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
threshold = cv2.adaptiveThreshold(
|
||||||
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||||
cv2.THRESH_BINARY, blocksize, c)
|
cv2.THRESH_BINARY, blocksize, c)
|
||||||
else:
|
else:
|
||||||
threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
|
threshold = cv2.adaptiveThreshold(
|
||||||
|
np.invert(gray), 255,
|
||||||
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c)
|
||||||
return img, threshold
|
return img, threshold
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -201,8 +201,9 @@ class Lattice(BaseParser):
|
||||||
if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
|
if 'ghostscript' in subprocess.check_output(['gs', '-version']).decode('utf-8').lower():
|
||||||
gs_call.insert(0, 'gs')
|
gs_call.insert(0, 'gs')
|
||||||
else:
|
else:
|
||||||
gs_call.insert(0, 'gsc')
|
gs_call.insert(0, "gsc")
|
||||||
subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
|
subprocess.call(
|
||||||
|
gs_call, stdout=open(os.devnull, 'w'),
|
||||||
stderr=subprocess.STDOUT)
|
stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
def _generate_table_bbox(self):
|
def _generate_table_bbox(self):
|
||||||
|
|
@ -339,8 +340,8 @@ class Lattice(BaseParser):
|
||||||
|
|
||||||
_tables = []
|
_tables = []
|
||||||
# sort tables based on y-coord
|
# sort tables based on y-coord
|
||||||
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
|
for table_idx, tk in enumerate(sorted(
|
||||||
key=lambda x: x[1], reverse=True)):
|
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
|
||||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
|
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
|
||||||
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||||
_tables.append(table)
|
_tables.append(table)
|
||||||
|
|
|
||||||
|
|
@ -357,8 +357,8 @@ class Stream(BaseParser):
|
||||||
|
|
||||||
_tables = []
|
_tables = []
|
||||||
# sort tables based on y-coord
|
# sort tables based on y-coord
|
||||||
for table_idx, tk in enumerate(sorted(self.table_bbox.keys(),
|
for table_idx, tk in enumerate(sorted(
|
||||||
key=lambda x: x[1], reverse=True)):
|
self.table_bbox.keys(), key=lambda x: x[1], reverse=True)):
|
||||||
cols, rows = self._generate_columns_and_rows(table_idx, tk)
|
cols, rows = self._generate_columns_and_rows(table_idx, tk)
|
||||||
table = self._generate_table(table_idx, cols, rows)
|
table = self._generate_table(table_idx, cols, rows)
|
||||||
_tables.append(table)
|
_tables.append(table)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
import os
|
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
import warnings
|
import warnings
|
||||||
|
|
@ -14,7 +13,6 @@ from pdfminer.pdfpage import PDFPage
|
||||||
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
|
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
|
||||||
from pdfminer.pdfinterp import PDFResourceManager
|
from pdfminer.pdfinterp import PDFResourceManager
|
||||||
from pdfminer.pdfinterp import PDFPageInterpreter
|
from pdfminer.pdfinterp import PDFPageInterpreter
|
||||||
from pdfminer.pdfdevice import PDFDevice
|
|
||||||
from pdfminer.converter import PDFPageAggregator
|
from pdfminer.converter import PDFPageAggregator
|
||||||
from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
|
from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
|
||||||
LTTextLineVertical)
|
LTTextLineVertical)
|
||||||
|
|
|
||||||
3
setup.py
3
setup.py
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from setuptools import find_packages
|
from setuptools import find_packages
|
||||||
from pkg_resources import parse_version
|
|
||||||
|
|
||||||
|
|
||||||
here = os.path.abspath(os.path.dirname(__file__))
|
here = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
@ -56,7 +55,7 @@ def setup_package():
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
except:
|
except ImportError:
|
||||||
from distutils.core import setup
|
from distutils.core import setup
|
||||||
|
|
||||||
setup(**metadata)
|
setup(**metadata)
|
||||||
|
|
|
||||||
|
|
@ -82,8 +82,8 @@ def test_stream_flag_size():
|
||||||
def test_lattice():
|
def test_lattice():
|
||||||
df = pd.DataFrame(data_lattice)
|
df = pd.DataFrame(data_lattice)
|
||||||
|
|
||||||
filename = os.path.join(testdir,
|
filename = os.path.join(
|
||||||
"tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
|
testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
|
||||||
tables = camelot.read_pdf(filename, pages="2")
|
tables = camelot.read_pdf(filename, pages="2")
|
||||||
assert df.equals(tables[0].df)
|
assert df.equals(tables[0].df)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue