Add utf8 header

2018-09-24 16:27:26 +05:30 · 2018-09-24 16:27:26 +05:30 · be2733ebd2
parent 3600025a22
commit be2733ebd2
14 changed files with 158 additions and 154 deletions
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# Camelot: PDF Table Parsing for Humans
+# Camelot: PDF Table Extraction for Humans

 ![license](https://img.shields.io/badge/license-MIT-lightgrey.svg) ![python-version](https://img.shields.io/badge/python-2.7-blue.svg)

@ -38,7 +38,7 @@
 | 2032_2     | 0.17      | 57.8          | 21.7%                | 0.3%            | 2.7%            | 1.2%           |
 | 4171_1     | 0.07      | 173.9         | 58.1%                | 1.6%            | 2.1%            | 0.5%           |

-There's a [command-line interface]() too!
+There's a [command-line interface](http://camelot-py.readthedocs.io/en/master/user/cli.html) too!

 ## Why Camelot?

@ -46,13 +46,12 @@ There's a [command-line interface]() too!
 - **Metrics**: *Bad* tables can be discarded based on metrics like accuracy and whitespace, without ever having to manually look at each table.
 - Each table is a **pandas DataFrame**, which enables seamless integration into [ETL and data analysis workflows](https://gist.github.com/vinayak-mehta/e5949f7c2410a0e12f25d3682dc9e873).
 - **Export** to multiple formats, including json, excel and html.
- Simple and Elegant API, written in **Python**!

-See [comparison with other PDF parsing libraries and tools](https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Parsing-libraries-and-tools).
+See [comparison with other PDF table extraction libraries and tools](https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Extraction-libraries-and-tools).

 ## Installation

-After [installing the dependencies](), you can simply use pip to install Camelot:
+After [installing the dependencies](http://camelot-py.readthedocs.io/en/master/user/install.html), [tk](https://packages.ubuntu.com/trusty/python-tk) and [ghostscript](https://www.ghostscript.com/), you can simply use pip to install Camelot:

 <pre>
 $ pip install camelot-py
@ -60,7 +59,7 @@ $ pip install camelot-py

 ### Alternatively

-You can install the dependencies [tk](https://packages.ubuntu.com/trusty/python-tk) and [ghostscript](https://www.ghostscript.com/) using your system's package manager. After that, clone the repo using:
+After [installing the dependencies](http://camelot-py.readthedocs.io/en/master/user/install.html), clone the repo using:

 <pre>
 $ git clone https://www.github.com/socialcopsdev/camelot
@ -77,7 +76,7 @@ Note: Use a [virtualenv](https://virtualenv.pypa.io/en/stable/) if you don't wan

 ## Documentation

-Great documentation is available at [insert link]().
+Great documentation is available at [insert link](http://camelot-py.readthedocs.io/).

 ## Development

--- a/camelot/init.py
+++ b/camelot/init.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from .__version__ import __version__

 from .io import read_pdf
--- a/camelot/version.py
+++ b/camelot/version.py
@ -1,3 +1,11 @@
+# -*- coding: utf-8 -*-
+
 VERSION = (0, 1, 0)

+__title__ = 'camelot-py'
+__description__ = 'PDF Table Extraction for Humans.'
+__url__ = 'http://camelot-py.readthedocs.io/'
 __version__ = '.'.join(map(str, VERSION))
+__author__ = 'Vinayak Mehta'
+__author_email__ = 'vmehta94@gmail.com'
+__license__ = 'MIT License'
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pprint import pprint

 import click
@ -20,23 +21,22 @@ pass_config = click.make_pass_decorator(Config)

@click.group()
@click.version_option(version=__version__)
-@click.option('-p', '--pages', default='1', help='Comma-separated page numbers'
-              ' to parse. Example: 1,3,4 or 1,4-end')
-@click.option('-o', '--output', help='Output filepath.')
+@click.option('-p', '--pages', default='1', help='Comma-separated page numbers.'
+              ' Example: 1,3,4 or 1,4-end.')
+@click.option('-o', '--output', help='Output file path.')
@click.option('-f', '--format',
              type=click.Choice(['csv', 'json', 'excel', 'html']),
              help='Output file format.')
-@click.option('-z', '--zip', is_flag=True, help='Whether or not to create a ZIP'
-              ' archive.')
-@click.option('-split', '--split_text', is_flag=True, help='Whether or not to'
-              ' split text if it spans across multiple cells.')
-@click.option('-flag', '--flag_size', is_flag=True, help='(inactive) Whether or'
-              ' not to flag text which has uncommon size. (Useful to detect'
-              ' super/subscripts)')
+@click.option('-z', '--zip', is_flag=True, help='Create ZIP archive.')
+@click.option('-split', '--split_text', is_flag=True,
+              help='Split text that spans across multiple cells.')
+@click.option('-flag', '--flag_size', is_flag=True, help='Flag text based on'
+              ' font size. Useful to detect super/subscripts.')
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
-              help='char_margin, line_margin, word_margin for PDFMiner.')
+              help='PDFMiner char_margin, line_margin and word_margin.')
@click.pass_context
 def cli(ctx, *args, **kwargs):
+    """Camelot: PDF Table Extraction for Humans"""
    ctx.obj = Config()
    for key, value in kwargs.iteritems():
        ctx.obj.set_config(key, value)
@ -44,45 +44,42 @@ def cli(ctx, *args, **kwargs):

@cli.command('lattice')
@click.option('-T', '--table_area', default=[], multiple=True,
-              help='Table areas (x1,y1,x2,y2) to process.\n'
-              ' x1, y1 -> left-top and x2, y2 -> right-bottom')
+              help='Table areas to process. Example: x1,y1,x2,y2'
+              ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
@click.option('-back', '--process_background', is_flag=True,
-              help='Whether or not to process lines that are in'
-              ' background.')
+              help='Process background lines.')
@click.option('-scale', '--line_size_scaling', default=15,
-              help='Factor by which the page dimensions will be'
-              ' divided to get smallest length of detected lines.')
+              help='Line size scaling factor. The larger the value,'
+              ' the smaller the detected lines.')
@click.option('-copy', '--copy_text', default=[], type=click.Choice(['h', 'v']),
-              multiple=True, help='Specify direction'
-              ' in which text will be copied over in a spanning cell.')
+              multiple=True, help='Direction in which text in a spanning cell'
+              ' will be copied over.')
@click.option('-shift', '--shift_text', default=['l', 't'],
              type=click.Choice(['', 'l', 'r', 't', 'b']), multiple=True,
-              help='Specify direction in which text in a spanning'
-              ' cell should flow.')
+              help='Direction in which text in a spanning cell will flow.')
@click.option('-l', '--line_close_tol', default=2,
              help='Tolerance parameter used to merge close vertical'
-              ' lines and close horizontal lines.')
+              ' and horizontal lines.')
@click.option('-j', '--joint_close_tol', default=2,
              help='Tolerance parameter used to decide whether'
              ' the detected lines and points lie close to each other.')
@click.option('-block', '--threshold_blocksize', default=15,
              help='For adaptive thresholding, size of a pixel'
              ' neighborhood that is used to calculate a threshold value for'
-              ' the pixel: 3, 5, 7, and so on.')
+              ' the pixel. Example: 3, 5, 7, and so on.')
@click.option('-const', '--threshold_constant', default=-2,
              help='For adaptive thresholding, constant subtracted'
-              ' from the mean or weighted mean.\nNormally, it is positive but'
+              ' from the mean or weighted mean. Normally, it is positive but'
              ' may be zero or negative as well.')
@click.option('-I', '--iterations', default=0,
-              help='Number of times for erosion/dilation is'
-              ' applied.')
+              help='Number of times for erosion/dilation will be applied.')
@click.option('-plot', '--plot_type',
              type=click.Choice(['text', 'table', 'contour', 'joint', 'line']),
-              help='Plot geometry found on PDF page for debugging.')
+              help='Plot geometry found on PDF page, for debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
 def lattice(c, *args, **kwargs):
-    """Use lines between text to parse table."""
+    """Use lines between text to parse the table."""
    conf = c.config
    pages = conf.pop('pages')
    output = conf.pop('output')
@ -105,29 +102,29 @@ def lattice(c, *args, **kwargs):
            table.plot(plot_type)
    else:
        if output is None:
-            raise click.UsageError('Please specify output filepath using --output')
+            raise click.UsageError('Please specify output file path using --output')
        if f is None:
-            raise click.UsageError('Please specify output format using --format')
+            raise click.UsageError('Please specify output file format using --format')
        tables.export(output, f=f, compress=compress)


@cli.command('stream')
@click.option('-T', '--table_area', default=[], multiple=True,
-              help='Table areas (x1,y1,x2,y2) to process.\n'
-              ' x1, y1 -> left-top and x2, y2 -> right-bottom')
+              help='Table areas to process. Example: x1,y1,x2,y2'
+              ' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
@click.option('-C', '--columns', default=[], multiple=True,
-              help='x-coordinates of column separators.')
-@click.option('-r', '--row_close_tol', default=2, help='Rows will be'
-              ' formed by combining text vertically within this tolerance.')
-@click.option('-c', '--col_close_tol', default=0, help='Columns will'
-              ' be formed by combining text horizontally within this tolerance.')
+              help='X coordinates of column separators.')
+@click.option('-r', '--row_close_tol', default=2, help='Tolerance parameter'
+              ' used to combine text vertically, to generate rows.')
+@click.option('-c', '--col_close_tol', default=0, help='Tolerance parameter'
+              ' used to combine text horizontally, to generate columns.')
@click.option('-plot', '--plot_type',
              type=click.Choice(['text', 'table']),
              help='Plot geometry found on PDF page for debugging.')
@click.argument('filepath', type=click.Path(exists=True))
@pass_config
 def stream(c, *args, **kwargs):
-    """Use spaces between text to parse table."""
+    """Use spaces between text to parse the table."""
    conf = c.config
    pages = conf.pop('pages')
    output = conf.pop('output')
@ -149,7 +146,7 @@ def stream(c, *args, **kwargs):
            table.plot(plot_type)
    else:
        if output is None:
-            raise click.UsageError('Please specify output filepath using --output')
+            raise click.UsageError('Please specify output file path using --output')
        if f is None:
-            raise click.UsageError('Please specify output format using --format')
+            raise click.UsageError('Please specify output file format using --format')
        tables.export(output, f=f, compress=compress)
--- a/camelot/core.py
+++ b/camelot/core.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import os
 import json
 import zipfile
@ -11,7 +13,7 @@ from .plotting import *

 class Cell(object):
    """Defines a cell in a table with coordinates relative to a
-    left-bottom origin. (pdf coordinate space)
+    left-bottom origin. (PDF coordinate space)

    Parameters
    ----------
@ -89,7 +91,7 @@ class Cell(object):

 class Table(object):
    """Defines a table with coordinates relative to a left-bottom
-    origin. (pdf coordinate space)
+    origin. (PDF coordinate space)

    Parameters
    ----------
@ -110,9 +112,9 @@ class Table(object):
    whitespace : float
        Percentage of whitespace in the table.
    order : int
-        Table number on pdf page.
+        Table number on PDF page.
    page : int
-        Pdf page number.
+        PDF page number.

    """
    def __init__(self, cols, rows):
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import os

 from PyPDF2 import PdfFileReader, PdfFileWriter
@ -10,16 +12,16 @@ from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,

 class PDFHandler(object):
    """Handles all operations like temp directory creation, splitting
-    file into single page pdfs, parsing each pdf and then removing the
+    file into single page PDFs, parsing each PDF and then removing the
    temp directory.

    Parameters
    ----------
    filename : str
-        Path to pdf file.
+        Path to PDF file.
    pages : str, optional (default: '1')
-        Comma-separated page numbers to parse.
-        Example: 1,3,4 or 1,4-end
+        Comma-separated page numbers.
+        Example: 1,3,4 or 1,4-end.

    """
    def __init__(self, filename, pages='1'):
@ -34,10 +36,10 @@ class PDFHandler(object):
        Parameters
        ----------
        filename : str
-            Path to pdf file.
+            Path to PDF file.
        pages : str, optional (default: '1')
-            Comma-separated page numbers to parse.
-            Example: 1,3,4 or 1,4-end
+            Comma-separated page numbers.
+            Example: 1,3,4 or 1,4-end.

        Returns
        -------
@ -67,16 +69,16 @@ class PDFHandler(object):
        return sorted(set(P))

    def _save_page(self, filename, page, temp):
-        """Saves specified page from pdf into a temporary directory.
+        """Saves specified page from PDF into a temporary directory.

        Parameters
        ----------
        filename : str
-            Path to pdf file.
+            Path to PDF file.
        page : int
-            Page number
+            Page number.
        temp : str
-            Tmp directory
+            Tmp directory.

        """
        with open(filename, 'rb') as fileobj:
@ -91,7 +93,7 @@ class PDFHandler(object):
            with open(fpath, 'wb') as f:
                outfile.write(f)
            layout, dim = get_page_layout(fpath)
-            # fix rotated pdf
+            # fix rotated PDF
            lttextlh = get_text_objects(layout, ltype="lh")
            lttextlv = get_text_objects(layout, ltype="lv")
            ltchar = get_text_objects(layout, ltype="char")
@ -114,7 +116,7 @@ class PDFHandler(object):

    def parse(self, flavor='lattice', **kwargs):
        """Extracts tables by calling parser.get_tables on all single
-        page pdfs.
+        page PDFs.

        Parameters
        ----------
@ -127,10 +129,10 @@ class PDFHandler(object):
        Returns
        -------
        tables : camelot.core.TableList
-            List of tables found in pdf.
+            List of tables found in PDF.
        geometry : camelot.core.GeometryList
-            List of geometry objects (contours, lines, joints)
-            found in pdf.
+            List of geometry objects (contours, lines, joints) found
+            in PDF.

        """
        tables = []
--- a/camelot/image_processing.py
+++ b/camelot/image_processing.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from __future__ import division
 from itertools import groupby
 from operator import itemgetter
--- a/camelot/io.py
+++ b/camelot/io.py
@ -1,9 +1,11 @@
+# -*- coding: utf-8 -*-
+
 from .handlers import PDFHandler
 from .utils import validate_input, remove_extra


 def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
-    """Read PDF and return parsed data tables.
+    """Read PDF and return extracted tables.

    Note: kwargs annotated with ^ can only be used with flavor='stream'
    and kwargs annotated with * can only be used with flavor='lattice'.
@ -11,53 +13,47 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
    Parameters
    ----------
    filepath : str
-        Path to pdf file.
+        Path to PDF file.
    pages : str, optional (default: '1')
-        Comma-separated page numbers to parse.
-        Example: 1,3,4 or 1,4-end
+        Comma-separated page numbers.
+        Example: 1,3,4 or 1,4-end.
    flavor : str (default: 'lattice')
        The parsing method to use ('lattice' or 'stream').
        Lattice is used by default.
    table_area : list, optional (default: None)
-        List of table areas to process as strings of the form
-        x1,y1,x2,y2 where (x1, y1) -> left-top and
-        (x2, y2) -> right-bottom in pdf coordinate space.
+        List of table area strings of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
    columns^ : list, optional (default: None)
-        List of column x-coordinates as strings where the coordinates
+        List of column x-coordinates strings where the coordinates
        are comma-separated.
    split_text : bool, optional (default: False)
-        Whether or not to split a text line if it spans across
-        multiple cells.
+        Split text that spans across multiple cells.
    flag_size : bool, optional (default: False)
-        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string. (Useful for
-        super and subscripts)
+        Flag text based on font size. Useful to detect
+        super/subscripts. Adds <s></s> around flagged text.
    row_close_tol^ : int, optional (default: 2)
-        Rows will be formed by combining text vertically
-        within this tolerance.
+        Tolerance parameter used to combine text vertically,
+        to generate rows.
    col_close_tol^ : int, optional (default: 0)
-        Columns will be formed by combining text horizontally
-        within this tolerance.
+        Tolerance parameter used to combine text horizontally,
+        to generate columns.
    process_background* : bool, optional (default: False)
-        Whether or not to process lines that are in background.
+        Process background lines.
    line_size_scaling* : int, optional (default: 15)
-        Factor by which the page dimensions will be divided to get
-        smallest length of lines that should be detected.
-
-        The larger this value, smaller the detected lines. Making it
-        too large will lead to text being detected as lines.
+        Line size scaling factor. The larger the value the smaller
+        the detected lines. Making it very large will lead to text
+        being detected as lines.
    copy_text* : list, optional (default: None)
        {'h', 'v'}
-        Select one or more strings from above and pass them as a list
-        to specify the direction in which text should be copied over
-        when a cell spans multiple rows or columns.
+        Direction in which text in a spanning cell will be copied
+        over.
    shift_text* : list, optional (default: ['l', 't'])
        {'l', 'r', 't', 'b'}
-        Select one or more strings from above and pass them as a list
-        to specify where the text in a spanning cell should flow.
+        Direction in which text in a spanning cell will flow.
    line_close_tol* : int, optional (default: 2)
-        Tolerance parameter used to merge vertical and horizontal
-        detected lines which lie close to each other.
+        Tolerance parameter used to merge close vertical and horizontal
+        lines.
    joint_close_tol* : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
@ -76,7 +72,7 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):

        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
    margins : tuple
-        PDFMiner margins. (char_margin, line_margin, word_margin)
+        PDFMiner char_margin, line_margin and word_margin.

        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.

--- a/camelot/parsers/init.py
+++ b/camelot/parsers/init.py
@ -1,2 +1,4 @@
+# -*- coding: utf-8 -*-
+
 from .stream import Stream
 from .lattice import Lattice
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import os

 from ..utils import get_page_layout, get_text_objects
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from __future__ import division
 import os
 import copy
@ -21,41 +23,35 @@ logger = setup_logging(__name__)

 class Lattice(BaseParser):
    """Lattice method of parsing looks for lines between text
-    to parse table.
+    to parse the table.

    Parameters
    ----------
    table_area : list, optional (default: None)
-        List of table areas to analyze as strings of the form
-        x1,y1,x2,y2 where (x1, y1) -> left-top and
-        (x2, y2) -> right-bottom in pdf coordinate space.
+        List of table area strings of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
    process_background : bool, optional (default: False)
-        Whether or not to process lines that are in background.
+        Process background lines.
    line_size_scaling : int, optional (default: 15)
-        Factor by which the page dimensions will be divided to get
-        smallest length of lines that should be detected.
-
-        The larger this value, smaller the detected lines. Making it
-        too large will lead to text being detected as lines.
+        Line size scaling factor. The larger the value the smaller
+        the detected lines. Making it very large will lead to text
+        being detected as lines.
    copy_text : list, optional (default: None)
        {'h', 'v'}
-        Select one or more strings from above and pass them as a list
-        to specify the direction in which text should be copied over
-        when a cell spans multiple rows or columns.
+        Direction in which text in a spanning cell will be copied
+        over.
    shift_text : list, optional (default: ['l', 't'])
        {'l', 'r', 't', 'b'}
-        Select one or more strings from above and pass them as a list
-        to specify where the text in a spanning cell should flow.
+        Direction in which text in a spanning cell will flow.
    split_text : bool, optional (default: False)
-        Whether or not to split a text line if it spans across
-        multiple cells.
+        Split text that spans across multiple cells.
    flag_size : bool, optional (default: False)
-        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string. (Useful for
-        super and subscripts)
+        Flag text based on font size. Useful to detect
+        super/subscripts. Adds <s></s> around flagged text.
    line_close_tol : int, optional (default: 2)
-        Tolerance parameter used to merge vertical and horizontal
-        detected lines which lie close to each other.
+        Tolerance parameter used to merge close vertical and horizontal
+        lines.
    joint_close_tol : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
@ -74,7 +70,7 @@ class Lattice(BaseParser):

        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
    margins : tuple
-        PDFMiner margins. (char_margin, line_margin, word_margin)
+        PDFMiner char_margin, line_margin and word_margin.

        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.

--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from __future__ import division
 import os
 import logging
@ -16,7 +18,7 @@ logger = setup_logging(__name__)

 class Stream(BaseParser):
    """Stream method of parsing looks for spaces between text
-    to parse table.
+    to parse the table.

    If you want to specify columns when specifying multiple table
    areas, make sure that the length of both lists are equal.
@ -24,27 +26,25 @@ class Stream(BaseParser):
    Parameters
    ----------
    table_area : list, optional (default: None)
-        List of table areas to analyze as strings of the form
-        x1,y1,x2,y2 where (x1, y1) -> left-top and
-        (x2, y2) -> right-bottom in pdf coordinate space.
+        List of table area strings of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
    columns : list, optional (default: None)
-        List of column x-coordinates as strings where the coordinates
+        List of column x-coordinates strings where the coordinates
        are comma-separated.
    split_text : bool, optional (default: False)
-        Whether or not to split a text line if it spans across
-        multiple cells.
+        Split text that spans across multiple cells.
    flag_size : bool, optional (default: False)
-        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string. (Useful for
-        super and subscripts)
+        Flag text based on font size. Useful to detect
+        super/subscripts. Adds <s></s> around flagged text.
    row_close_tol : int, optional (default: 2)
-        Rows will be formed by combining text vertically
-        within this tolerance.
+        Tolerance parameter used to combine text vertically,
+        to generate rows.
    col_close_tol : int, optional (default: 0)
-        Columns will be formed by combining text horizontally
-        within this tolerance.
+        Tolerance parameter used to combine text horizontally,
+        to generate columns.
    margins : tuple, optional (default: (1.0, 0.5, 0.1))
-        PDFMiner margins. (char_margin, line_margin, word_margin)
+        PDFMiner char_margin, line_margin and word_margin.

        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.

--- a/setup.py
+++ b/setup.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import os
 from setuptools import find_packages
 from pkg_resources import parse_version
@ -8,16 +10,8 @@ about = {}
 with open(os.path.join(here, 'camelot', '__version__.py'), 'r') as f:
    exec(f.read(), about)

-# TODO: Move these to __version__.py
-NAME = 'camelot-py'
-VERSION = about['__version__']
-DESCRIPTION = 'PDF Table Parsing for Humans'
-with open('README.md') as f:
-    LONG_DESCRIPTION = f.read()
-URL = 'https://github.com/socialcopsdev/camelot'
-AUTHOR = 'Vinayak Mehta'
-AUTHOR_EMAIL = 'vmehta94@gmail.com'
-LICENSE = 'MIT License'
+with open('README.md', 'r') as f:
+    readme = f.read()


 def setup_package():
@ -31,14 +25,14 @@ def setup_package():
        for line in f:
            dev_reqs.append(line.strip())

-    metadata = dict(name=NAME,
-                    version=VERSION,
-                    description=DESCRIPTION,
-                    long_description=LONG_DESCRIPTION,
-                    url=URL,
-                    author=AUTHOR,
-                    author_email=AUTHOR_EMAIL,
-                    license=LICENSE,
+    metadata = dict(name=about['__title__'],
+                    version=about['__version__'],
+                    description=about['__description__'],
+                    long_description=readme,
+                    url=about['__url__'],
+                    author=about['__author__'],
+                    author_email=about['__author_email__'],
+                    license=about['__license__'],
                    packages=find_packages(exclude=('tests',)),
                    install_requires=reqs,
                    extras_require={
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import os

 import pandas as pd