Fix docstrings and interlinks

2018-09-11 07:35:30 +05:30 · 2018-09-11 07:35:30 +05:30 · 17ea5f335e
parent 3713c08642
commit 17ea5f335e
7 changed files with 65 additions and 49 deletions
--- a/camelot/core.py
+++ b/camelot/core.py
@ -46,7 +46,6 @@ class Cell(object):
        Whether or not cell spans vertically.
    text : string
        Text assigned to cell.
-    bound

    """

@ -101,8 +100,7 @@ class Table(object):

    Attributes
    ----------
-    df : object
-        pandas.DataFrame
+    df : :class:`pandas.DataFrame`
    shape : tuple
        Shape of the table.
    accuracy : float
@ -113,8 +111,6 @@ class Table(object):
        Table number on pdf page.
    page : int
        Pdf page number.
-    data
-    parsing_report

    """
    def __init__(self, cols, rows):
@ -143,7 +139,7 @@ class Table(object):

    @property
    def parsing_report(self):
-        """Returns a parsing report with accuracy, %whitespace,
+        """Returns a parsing report with %accuracy, %whitespace,
        table number on page and page number.
        """
        # pretty?
@ -320,10 +316,15 @@ class Table(object):
        return self

    def to_csv(self, path, **kwargs):
-        """Write Table to a comma-separated values (csv) file.
+        """Writes Table to a comma-separated values (csv) file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_csv`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.

-        Check `pandas.DataFrame.to_csv <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html>`_
-        kwargs for more details around what kwargs to use.
        """
        kw = {
            'encoding': 'utf-8',
@ -334,10 +335,15 @@ class Table(object):
        self.df.to_csv(path, **kw)

    def to_json(self, path, **kwargs):
-        """Write Table to a JSON file.
+        """Writes Table to a JSON file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_json`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.

-        Check `pandas.DataFrame.to_json <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html>`_
-        kwargs for more details around what kwargs to use.
        """
        kw = {
            'orient': 'records'
@ -348,10 +354,15 @@ class Table(object):
            f.write(json_string)

    def to_excel(self, path, **kwargs):
-        """Write Table to an Excel file.
+        """Writes Table to an Excel file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_excel`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.

-        Check `pandas.DataFrame.to_excel <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html>`_
-        kwargs for more details around what kwargs to use.
        """
        kw = {
            'sheet_name': 'page-{}-table-{}'.format(self.page, self.order),
@ -363,10 +374,15 @@ class Table(object):
        writer.save()

    def to_html(self, path, **kwargs):
-        """Write Table to an HTML file.
+        """Writes Table to an HTML file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_html`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.

-        Check `pandas.DataFrame.to_html <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_html.html>`_
-        kwargs for more details around what kwargs to use.
        """
        html_string = self.df.to_html(**kwargs)
        with open(path, 'w') as f:
@ -434,7 +450,7 @@ class TableList(object):
        Parameters
        ----------
        path : str
-            Filepath
+            Output filepath.
        f : str
            File format. Can be csv, json, excel and html.
        compress : bool
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -13,8 +13,8 @@ class PDFHandler(object):
    file into single page pdfs, parsing each pdf and then removing the
    temp directory.

-    Parameter
-    ---------
+    Parameters
+    ----------
    filename : str
        Path to pdf file.
    pages : str
--- a/camelot/io.py
+++ b/camelot/io.py
@ -30,8 +30,8 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs):
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
+        if its size is different from rest of the string. (Useful for
+        super and subscripts)
    row_close_tol^ : int, optional (default: 2)
        Rows will be formed by combining text vertically
        within this tolerance.
@ -61,24 +61,24 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs):
    joint_close_tol* : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
-    threshold_blocksize : int, optional (default: 15)
+    threshold_blocksize* : int, optional (default: 15)
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.

        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
-    threshold_constant : int, optional (default: -2)
+    threshold_constant* : int, optional (default: -2)
        Constant subtracted from the mean or weighted mean.
        Normally, it is positive but may be zero or negative as well.

        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
-    iterations : int, optional (default: 0)
+    iterations* : int, optional (default: 0)
        Number of times for erosion/dilation is applied.

        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
    margins : tuple
        PDFMiner margins. (char_margin, line_margin, word_margin)

-        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.

    Returns
    -------
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -51,8 +51,8 @@ class Lattice(BaseParser):
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
+        if its size is different from rest of the string. (Useful for
+        super and subscripts)
    line_close_tol : int, optional (default: 2)
        Tolerance parameter used to merge vertical and horizontal
        detected lines which lie close to each other.
@ -76,7 +76,7 @@ class Lattice(BaseParser):
    margins : tuple
        PDFMiner margins. (char_margin, line_margin, word_margin)

-        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
    debug : bool, optional (default: False)
        Whether or not to return all text objects on the page
        which can be used to generate a matplotlib plot, to get
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -35,8 +35,8 @@ class Stream(BaseParser):
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
+        if its size is different from rest of the string. (Useful for
+        super and subscripts)
    row_close_tol : int, optional (default: 2)
        Rows will be formed by combining text vertically
        within this tolerance.
@ -46,7 +46,7 @@ class Stream(BaseParser):
    margins : tuple, optional (default: (1.0, 0.5, 0.1))
        PDFMiner margins. (char_margin, line_margin, word_margin)

-        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
    debug : bool, optional (default: False)
        Whether or not to return all text objects on the page
        which can be used to generate a matplotlib plot, to get
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -25,12 +25,12 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs)
        Whether or not to use Lattice method of parsing. Stream
        is used by default.
    geometry_type : str, optional (default: None)
-        'text' : Plot text objects found on page, useful to get
-                 table_area and columns coordinates.
-        'table' : Plot parsed table.
-        'contour'* : Plot detected rectangles.
-        'joint'* : Plot detected line intersections.
-        'line'* : Plot detected lines.
+        * 'text' : Plot text objects found on page. (Useful to get \
+                   table_area and columns coordinates)
+        * 'table' : Plot parsed table.
+        * 'contour'* : Plot detected rectangles.
+        * 'joint'* : Plot detected line intersections.
+        * 'line'* : Plot detected lines.
    table_area : list, optional (default: None)
        List of table areas to process as strings of the form
        x1,y1,x2,y2 where (x1, y1) -> left-top and
@ -43,8 +43,8 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs)
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
+        if its size is different from rest of the string. (Useful for
+        super and subscripts.)
    row_close_tol^ : int, optional (default: 2)
        Rows will be formed by combining text vertically
        within this tolerance.
@ -74,24 +74,24 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs)
    joint_close_tol* : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
-    threshold_blocksize : int, optional (default: 15)
+    threshold_blocksize* : int, optional (default: 15)
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.

        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
-    threshold_constant : int, optional (default: -2)
+    threshold_constant* : int, optional (default: -2)
        Constant subtracted from the mean or weighted mean.
        Normally, it is positive but may be zero or negative as well.

        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
-    iterations : int, optional (default: 0)
+    iterations* : int, optional (default: 0)
        Number of times for erosion/dilation is applied.

        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
    margins : tuple
        PDFMiner margins. (char_margin, line_margin, word_margin)

-        For for information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
+        For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.

    """
    validate_input(kwargs, mesh=mesh, geometry_type=geometry_type)
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -454,8 +454,8 @@ def split_textline(table, textline, direction, flag_size=False):
        Direction of the PDFMiner LTTextLine object.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
+        if its size is different from rest of the string. (Useful for
+        super and subscripts.)

    Returns
    -------
@ -530,8 +530,8 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False):
        multiple cells.
    flag_size : bool, optional (default: False)
        Whether or not to highlight a substring using <s></s>
-        if its size is different from rest of the string, useful for
-        super and subscripts.
+        if its size is different from rest of the string. (Useful for
+        super and subscripts)

    Returns
    -------