diff --git a/README.md b/README.md index 790b2b2..78485aa 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Camelot: PDF Table Parsing for Humans +# Camelot: PDF Table Extraction for Humans   @@ -38,7 +38,7 @@ | 2032_2 | 0.17 | 57.8 | 21.7% | 0.3% | 2.7% | 1.2% | | 4171_1 | 0.07 | 173.9 | 58.1% | 1.6% | 2.1% | 0.5% | -There's a [command-line interface]() too! +There's a [command-line interface](http://camelot-py.readthedocs.io/en/master/user/cli.html) too! ## Why Camelot? @@ -46,13 +46,12 @@ There's a [command-line interface]() too! - **Metrics**: *Bad* tables can be discarded based on metrics like accuracy and whitespace, without ever having to manually look at each table. - Each table is a **pandas DataFrame**, which enables seamless integration into [ETL and data analysis workflows](https://gist.github.com/vinayak-mehta/e5949f7c2410a0e12f25d3682dc9e873). - **Export** to multiple formats, including json, excel and html. -- Simple and Elegant API, written in **Python**! -See [comparison with other PDF parsing libraries and tools](https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Parsing-libraries-and-tools). +See [comparison with other PDF table extraction libraries and tools](https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Extraction-libraries-and-tools). ## Installation -After [installing the dependencies](), you can simply use pip to install Camelot: +After [installing the dependencies](http://camelot-py.readthedocs.io/en/master/user/install.html), [tk](https://packages.ubuntu.com/trusty/python-tk) and [ghostscript](https://www.ghostscript.com/), you can simply use pip to install Camelot:
$ pip install camelot-py
@@ -60,7 +59,7 @@ $ pip install camelot-py
### Alternatively
-You can install the dependencies [tk](https://packages.ubuntu.com/trusty/python-tk) and [ghostscript](https://www.ghostscript.com/) using your system's package manager. After that, clone the repo using:
+After [installing the dependencies](http://camelot-py.readthedocs.io/en/master/user/install.html), clone the repo using:
$ git clone https://www.github.com/socialcopsdev/camelot
@@ -77,7 +76,7 @@ Note: Use a [virtualenv](https://virtualenv.pypa.io/en/stable/) if you don't wan
## Documentation
-Great documentation is available at [insert link]().
+Great documentation is available at [insert link](http://camelot-py.readthedocs.io/).
## Development
diff --git a/camelot/__init__.py b/camelot/__init__.py
index b762cea..72f362e 100644
--- a/camelot/__init__.py
+++ b/camelot/__init__.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
from .__version__ import __version__
from .io import read_pdf
\ No newline at end of file
diff --git a/camelot/__version__.py b/camelot/__version__.py
index 7341562..485c17e 100644
--- a/camelot/__version__.py
+++ b/camelot/__version__.py
@@ -1,3 +1,11 @@
+# -*- coding: utf-8 -*-
+
VERSION = (0, 1, 0)
+__title__ = 'camelot-py'
+__description__ = 'PDF Table Extraction for Humans.'
+__url__ = 'http://camelot-py.readthedocs.io/'
__version__ = '.'.join(map(str, VERSION))
+__author__ = 'Vinayak Mehta'
+__author_email__ = 'vmehta94@gmail.com'
+__license__ = 'MIT License'
\ No newline at end of file
diff --git a/camelot/cli.py b/camelot/cli.py
index 02e37d3..af09b24 100644
--- a/camelot/cli.py
+++ b/camelot/cli.py
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
+
from pprint import pprint
import click
@@ -20,23 +21,22 @@ pass_config = click.make_pass_decorator(Config)
@click.group()
@click.version_option(version=__version__)
-@click.option('-p', '--pages', default='1', help='Comma-separated page numbers'
- ' to parse. Example: 1,3,4 or 1,4-end')
-@click.option('-o', '--output', help='Output filepath.')
+@click.option('-p', '--pages', default='1', help='Comma-separated page numbers.'
+ ' Example: 1,3,4 or 1,4-end.')
+@click.option('-o', '--output', help='Output file path.')
@click.option('-f', '--format',
type=click.Choice(['csv', 'json', 'excel', 'html']),
help='Output file format.')
-@click.option('-z', '--zip', is_flag=True, help='Whether or not to create a ZIP'
- ' archive.')
-@click.option('-split', '--split_text', is_flag=True, help='Whether or not to'
- ' split text if it spans across multiple cells.')
-@click.option('-flag', '--flag_size', is_flag=True, help='(inactive) Whether or'
- ' not to flag text which has uncommon size. (Useful to detect'
- ' super/subscripts)')
+@click.option('-z', '--zip', is_flag=True, help='Create ZIP archive.')
+@click.option('-split', '--split_text', is_flag=True,
+ help='Split text that spans across multiple cells.')
+@click.option('-flag', '--flag_size', is_flag=True, help='Flag text based on'
+ ' font size. Useful to detect super/subscripts.')
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
- help='char_margin, line_margin, word_margin for PDFMiner.')
+ help='PDFMiner char_margin, line_margin and word_margin.')
@click.pass_context
def cli(ctx, *args, **kwargs):
+ """Camelot: PDF Table Extraction for Humans"""
ctx.obj = Config()
for key, value in kwargs.iteritems():
ctx.obj.set_config(key, value)
@@ -44,45 +44,42 @@ def cli(ctx, *args, **kwargs):
@cli.command('lattice')
@click.option('-T', '--table_area', default=[], multiple=True,
- help='Table areas (x1,y1,x2,y2) to process.\n'
- ' x1, y1 -> left-top and x2, y2 -> right-bottom')
+ help='Table areas to process. Example: x1,y1,x2,y2'
+ ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
@click.option('-back', '--process_background', is_flag=True,
- help='Whether or not to process lines that are in'
- ' background.')
+ help='Process background lines.')
@click.option('-scale', '--line_size_scaling', default=15,
- help='Factor by which the page dimensions will be'
- ' divided to get smallest length of detected lines.')
+ help='Line size scaling factor. The larger the value,'
+ ' the smaller the detected lines.')
@click.option('-copy', '--copy_text', default=[], type=click.Choice(['h', 'v']),
- multiple=True, help='Specify direction'
- ' in which text will be copied over in a spanning cell.')
+ multiple=True, help='Direction in which text in a spanning cell'
+ ' will be copied over.')
@click.option('-shift', '--shift_text', default=['l', 't'],
type=click.Choice(['', 'l', 'r', 't', 'b']), multiple=True,
- help='Specify direction in which text in a spanning'
- ' cell should flow.')
+ help='Direction in which text in a spanning cell will flow.')
@click.option('-l', '--line_close_tol', default=2,
help='Tolerance parameter used to merge close vertical'
- ' lines and close horizontal lines.')
+ ' and horizontal lines.')
@click.option('-j', '--joint_close_tol', default=2,
help='Tolerance parameter used to decide whether'
' the detected lines and points lie close to each other.')
@click.option('-block', '--threshold_blocksize', default=15,
help='For adaptive thresholding, size of a pixel'
' neighborhood that is used to calculate a threshold value for'
- ' the pixel: 3, 5, 7, and so on.')
+ ' the pixel. Example: 3, 5, 7, and so on.')
@click.option('-const', '--threshold_constant', default=-2,
help='For adaptive thresholding, constant subtracted'
- ' from the mean or weighted mean.\nNormally, it is positive but'
+ ' from the mean or weighted mean. Normally, it is positive but'
' may be zero or negative as well.')
@click.option('-I', '--iterations', default=0,
- help='Number of times for erosion/dilation is'
- ' applied.')
+ help='Number of times for erosion/dilation will be applied.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'table', 'contour', 'joint', 'line']),
- help='Plot geometry found on PDF page for debugging.')
+ help='Plot geometry found on PDF page, for debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
def lattice(c, *args, **kwargs):
- """Use lines between text to parse table."""
+ """Use lines between text to parse the table."""
conf = c.config
pages = conf.pop('pages')
output = conf.pop('output')
@@ -105,29 +102,29 @@ def lattice(c, *args, **kwargs):
table.plot(plot_type)
else:
if output is None:
- raise click.UsageError('Please specify output filepath using --output')
+ raise click.UsageError('Please specify output file path using --output')
if f is None:
- raise click.UsageError('Please specify output format using --format')
+ raise click.UsageError('Please specify output file format using --format')
tables.export(output, f=f, compress=compress)
@cli.command('stream')
@click.option('-T', '--table_area', default=[], multiple=True,
- help='Table areas (x1,y1,x2,y2) to process.\n'
- ' x1, y1 -> left-top and x2, y2 -> right-bottom')
+ help='Table areas to process. Example: x1,y1,x2,y2'
+ ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
@click.option('-C', '--columns', default=[], multiple=True,
- help='x-coordinates of column separators.')
-@click.option('-r', '--row_close_tol', default=2, help='Rows will be'
- ' formed by combining text vertically within this tolerance.')
-@click.option('-c', '--col_close_tol', default=0, help='Columns will'
- ' be formed by combining text horizontally within this tolerance.')
+ help='X coordinates of column separators.')
+@click.option('-r', '--row_close_tol', default=2, help='Tolerance parameter'
+ ' used to combine text vertically, to generate rows.')
+@click.option('-c', '--col_close_tol', default=0, help='Tolerance parameter'
+ ' used to combine text horizontally, to generate columns.')
@click.option('-plot', '--plot_type',
type=click.Choice(['text', 'table']),
help='Plot geometry found on PDF page for debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
def stream(c, *args, **kwargs):
- """Use spaces between text to parse table."""
+ """Use spaces between text to parse the table."""
conf = c.config
pages = conf.pop('pages')
output = conf.pop('output')
@@ -149,7 +146,7 @@ def stream(c, *args, **kwargs):
table.plot(plot_type)
else:
if output is None:
- raise click.UsageError('Please specify output filepath using --output')
+ raise click.UsageError('Please specify output file path using --output')
if f is None:
- raise click.UsageError('Please specify output format using --format')
+ raise click.UsageError('Please specify output file format using --format')
tables.export(output, f=f, compress=compress)
\ No newline at end of file
diff --git a/camelot/core.py b/camelot/core.py
index a7f8d78..0658236 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import os
import json
import zipfile
@@ -11,7 +13,7 @@ from .plotting import *
class Cell(object):
"""Defines a cell in a table with coordinates relative to a
- left-bottom origin. (pdf coordinate space)
+ left-bottom origin. (PDF coordinate space)
Parameters
----------
@@ -89,7 +91,7 @@ class Cell(object):
class Table(object):
"""Defines a table with coordinates relative to a left-bottom
- origin. (pdf coordinate space)
+ origin. (PDF coordinate space)
Parameters
----------
@@ -110,9 +112,9 @@ class Table(object):
whitespace : float
Percentage of whitespace in the table.
order : int
- Table number on pdf page.
+ Table number on PDF page.
page : int
- Pdf page number.
+ PDF page number.
"""
def __init__(self, cols, rows):
diff --git a/camelot/handlers.py b/camelot/handlers.py
index 0ea9785..40f4074 100644
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
@@ -10,16 +12,16 @@ from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
class PDFHandler(object):
"""Handles all operations like temp directory creation, splitting
- file into single page pdfs, parsing each pdf and then removing the
+ file into single page PDFs, parsing each PDF and then removing the
temp directory.
Parameters
----------
filename : str
- Path to pdf file.
+ Path to PDF file.
pages : str, optional (default: '1')
- Comma-separated page numbers to parse.
- Example: 1,3,4 or 1,4-end
+ Comma-separated page numbers.
+ Example: 1,3,4 or 1,4-end.
"""
def __init__(self, filename, pages='1'):
@@ -34,10 +36,10 @@ class PDFHandler(object):
Parameters
----------
filename : str
- Path to pdf file.
+ Path to PDF file.
pages : str, optional (default: '1')
- Comma-separated page numbers to parse.
- Example: 1,3,4 or 1,4-end
+ Comma-separated page numbers.
+ Example: 1,3,4 or 1,4-end.
Returns
-------
@@ -67,16 +69,16 @@ class PDFHandler(object):
return sorted(set(P))
def _save_page(self, filename, page, temp):
- """Saves specified page from pdf into a temporary directory.
+ """Saves specified page from PDF into a temporary directory.
Parameters
----------
filename : str
- Path to pdf file.
+ Path to PDF file.
page : int
- Page number
+ Page number.
temp : str
- Tmp directory
+ Tmp directory.
"""
with open(filename, 'rb') as fileobj:
@@ -91,7 +93,7 @@ class PDFHandler(object):
with open(fpath, 'wb') as f:
outfile.write(f)
layout, dim = get_page_layout(fpath)
- # fix rotated pdf
+ # fix rotated PDF
lttextlh = get_text_objects(layout, ltype="lh")
lttextlv = get_text_objects(layout, ltype="lv")
ltchar = get_text_objects(layout, ltype="char")
@@ -114,7 +116,7 @@ class PDFHandler(object):
def parse(self, flavor='lattice', **kwargs):
"""Extracts tables by calling parser.get_tables on all single
- page pdfs.
+ page PDFs.
Parameters
----------
@@ -127,10 +129,10 @@ class PDFHandler(object):
Returns
-------
tables : camelot.core.TableList
- List of tables found in pdf.
+ List of tables found in PDF.
geometry : camelot.core.GeometryList
- List of geometry objects (contours, lines, joints)
- found in pdf.
+ List of geometry objects (contours, lines, joints) found
+ in PDF.
"""
tables = []
diff --git a/camelot/image_processing.py b/camelot/image_processing.py
index 23923b2..d3ae8ef 100644
--- a/camelot/image_processing.py
+++ b/camelot/image_processing.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
from __future__ import division
from itertools import groupby
from operator import itemgetter
diff --git a/camelot/io.py b/camelot/io.py
index 3fdac0d..bdbcc69 100644
--- a/camelot/io.py
+++ b/camelot/io.py
@@ -1,9 +1,11 @@
+# -*- coding: utf-8 -*-
+
from .handlers import PDFHandler
from .utils import validate_input, remove_extra
def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
- """Read PDF and return parsed data tables.
+ """Read PDF and return extracted tables.
Note: kwargs annotated with ^ can only be used with flavor='stream'
and kwargs annotated with * can only be used with flavor='lattice'.
@@ -11,53 +13,47 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
Parameters
----------
filepath : str
- Path to pdf file.
+ Path to PDF file.
pages : str, optional (default: '1')
- Comma-separated page numbers to parse.
- Example: 1,3,4 or 1,4-end
+ Comma-separated page numbers.
+ Example: 1,3,4 or 1,4-end.
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
table_area : list, optional (default: None)
- List of table areas to process as strings of the form
- x1,y1,x2,y2 where (x1, y1) -> left-top and
- (x2, y2) -> right-bottom in pdf coordinate space.
+ List of table area strings of the form x1,y1,x2,y2
+ where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+ in PDF coordinate space.
columns^ : list, optional (default: None)
- List of column x-coordinates as strings where the coordinates
+ List of column x-coordinates strings where the coordinates
are comma-separated.
split_text : bool, optional (default: False)
- Whether or not to split a text line if it spans across
- multiple cells.
+ Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
- Whether or not to highlight a substring using
- if its size is different from rest of the string. (Useful for
- super and subscripts)
+ Flag text based on font size. Useful to detect
+ super/subscripts. Adds around flagged text.
row_close_tol^ : int, optional (default: 2)
- Rows will be formed by combining text vertically
- within this tolerance.
+ Tolerance parameter used to combine text vertically,
+ to generate rows.
col_close_tol^ : int, optional (default: 0)
- Columns will be formed by combining text horizontally
- within this tolerance.
+ Tolerance parameter used to combine text horizontally,
+ to generate columns.
process_background* : bool, optional (default: False)
- Whether or not to process lines that are in background.
+ Process background lines.
line_size_scaling* : int, optional (default: 15)
- Factor by which the page dimensions will be divided to get
- smallest length of lines that should be detected.
-
- The larger this value, smaller the detected lines. Making it
- too large will lead to text being detected as lines.
+ Line size scaling factor. The larger the value the smaller
+ the detected lines. Making it very large will lead to text
+ being detected as lines.
copy_text* : list, optional (default: None)
{'h', 'v'}
- Select one or more strings from above and pass them as a list
- to specify the direction in which text should be copied over
- when a cell spans multiple rows or columns.
+ Direction in which text in a spanning cell will be copied
+ over.
shift_text* : list, optional (default: ['l', 't'])
{'l', 'r', 't', 'b'}
- Select one or more strings from above and pass them as a list
- to specify where the text in a spanning cell should flow.
+ Direction in which text in a spanning cell will flow.
line_close_tol* : int, optional (default: 2)
- Tolerance parameter used to merge vertical and horizontal
- detected lines which lie close to each other.
+ Tolerance parameter used to merge close vertical and horizontal
+ lines.
joint_close_tol* : int, optional (default: 2)
Tolerance parameter used to decide whether the detected lines
and points lie close to each other.
@@ -76,7 +72,7 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
For more information, refer `OpenCV's dilate `_.
margins : tuple
- PDFMiner margins. (char_margin, line_margin, word_margin)
+ PDFMiner char_margin, line_margin and word_margin.
For more information, refer `PDFMiner docs `_.
diff --git a/camelot/parsers/__init__.py b/camelot/parsers/__init__.py
index e046b46..9366b78 100644
--- a/camelot/parsers/__init__.py
+++ b/camelot/parsers/__init__.py
@@ -1,2 +1,4 @@
+# -*- coding: utf-8 -*-
+
from .stream import Stream
from .lattice import Lattice
\ No newline at end of file
diff --git a/camelot/parsers/base.py b/camelot/parsers/base.py
index 5035966..bd3de99 100644
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import os
from ..utils import get_page_layout, get_text_objects
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index c4649e8..5219bc8 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
from __future__ import division
import os
import copy
@@ -21,41 +23,35 @@ logger = setup_logging(__name__)
class Lattice(BaseParser):
"""Lattice method of parsing looks for lines between text
- to parse table.
+ to parse the table.
Parameters
----------
table_area : list, optional (default: None)
- List of table areas to analyze as strings of the form
- x1,y1,x2,y2 where (x1, y1) -> left-top and
- (x2, y2) -> right-bottom in pdf coordinate space.
+ List of table area strings of the form x1,y1,x2,y2
+ where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+ in PDF coordinate space.
process_background : bool, optional (default: False)
- Whether or not to process lines that are in background.
+ Process background lines.
line_size_scaling : int, optional (default: 15)
- Factor by which the page dimensions will be divided to get
- smallest length of lines that should be detected.
-
- The larger this value, smaller the detected lines. Making it
- too large will lead to text being detected as lines.
+ Line size scaling factor. The larger the value the smaller
+ the detected lines. Making it very large will lead to text
+ being detected as lines.
copy_text : list, optional (default: None)
{'h', 'v'}
- Select one or more strings from above and pass them as a list
- to specify the direction in which text should be copied over
- when a cell spans multiple rows or columns.
+ Direction in which text in a spanning cell will be copied
+ over.
shift_text : list, optional (default: ['l', 't'])
{'l', 'r', 't', 'b'}
- Select one or more strings from above and pass them as a list
- to specify where the text in a spanning cell should flow.
+ Direction in which text in a spanning cell will flow.
split_text : bool, optional (default: False)
- Whether or not to split a text line if it spans across
- multiple cells.
+ Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
- Whether or not to highlight a substring using
- if its size is different from rest of the string. (Useful for
- super and subscripts)
+ Flag text based on font size. Useful to detect
+ super/subscripts. Adds around flagged text.
line_close_tol : int, optional (default: 2)
- Tolerance parameter used to merge vertical and horizontal
- detected lines which lie close to each other.
+ Tolerance parameter used to merge close vertical and horizontal
+ lines.
joint_close_tol : int, optional (default: 2)
Tolerance parameter used to decide whether the detected lines
and points lie close to each other.
@@ -74,7 +70,7 @@ class Lattice(BaseParser):
For more information, refer `OpenCV's dilate `_.
margins : tuple
- PDFMiner margins. (char_margin, line_margin, word_margin)
+ PDFMiner char_margin, line_margin and word_margin.
For more information, refer `PDFMiner docs `_.
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 5a05dba..aa3c461 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
from __future__ import division
import os
import logging
@@ -16,7 +18,7 @@ logger = setup_logging(__name__)
class Stream(BaseParser):
"""Stream method of parsing looks for spaces between text
- to parse table.
+ to parse the table.
If you want to specify columns when specifying multiple table
areas, make sure that the length of both lists are equal.
@@ -24,27 +26,25 @@ class Stream(BaseParser):
Parameters
----------
table_area : list, optional (default: None)
- List of table areas to analyze as strings of the form
- x1,y1,x2,y2 where (x1, y1) -> left-top and
- (x2, y2) -> right-bottom in pdf coordinate space.
+ List of table area strings of the form x1,y1,x2,y2
+ where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+ in PDF coordinate space.
columns : list, optional (default: None)
- List of column x-coordinates as strings where the coordinates
+ List of column x-coordinates strings where the coordinates
are comma-separated.
split_text : bool, optional (default: False)
- Whether or not to split a text line if it spans across
- multiple cells.
+ Split text that spans across multiple cells.
flag_size : bool, optional (default: False)
- Whether or not to highlight a substring using
- if its size is different from rest of the string. (Useful for
- super and subscripts)
+ Flag text based on font size. Useful to detect
+ super/subscripts. Adds around flagged text.
row_close_tol : int, optional (default: 2)
- Rows will be formed by combining text vertically
- within this tolerance.
+ Tolerance parameter used to combine text vertically,
+ to generate rows.
col_close_tol : int, optional (default: 0)
- Columns will be formed by combining text horizontally
- within this tolerance.
+ Tolerance parameter used to combine text horizontally,
+ to generate columns.
margins : tuple, optional (default: (1.0, 0.5, 0.1))
- PDFMiner margins. (char_margin, line_margin, word_margin)
+ PDFMiner char_margin, line_margin and word_margin.
For more information, refer `PDFMiner docs `_.
diff --git a/setup.py b/setup.py
index 00d6e8f..439e402 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import os
from setuptools import find_packages
from pkg_resources import parse_version
@@ -8,16 +10,8 @@ about = {}
with open(os.path.join(here, 'camelot', '__version__.py'), 'r') as f:
exec(f.read(), about)
-# TODO: Move these to __version__.py
-NAME = 'camelot-py'
-VERSION = about['__version__']
-DESCRIPTION = 'PDF Table Parsing for Humans'
-with open('README.md') as f:
- LONG_DESCRIPTION = f.read()
-URL = 'https://github.com/socialcopsdev/camelot'
-AUTHOR = 'Vinayak Mehta'
-AUTHOR_EMAIL = 'vmehta94@gmail.com'
-LICENSE = 'MIT License'
+with open('README.md', 'r') as f:
+ readme = f.read()
def setup_package():
@@ -31,14 +25,14 @@ def setup_package():
for line in f:
dev_reqs.append(line.strip())
- metadata = dict(name=NAME,
- version=VERSION,
- description=DESCRIPTION,
- long_description=LONG_DESCRIPTION,
- url=URL,
- author=AUTHOR,
- author_email=AUTHOR_EMAIL,
- license=LICENSE,
+ metadata = dict(name=about['__title__'],
+ version=about['__version__'],
+ description=about['__description__'],
+ long_description=readme,
+ url=about['__url__'],
+ author=about['__author__'],
+ author_email=about['__author_email__'],
+ license=about['__license__'],
packages=find_packages(exclude=('tests',)),
install_requires=reqs,
extras_require={
diff --git a/tests/test_common.py b/tests/test_common.py
index 065a9e2..10b852c 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import os
import pandas as pd