From 17ea5f335e8a755570e2dbbbb6dda0ae2535b6b7 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Tue, 11 Sep 2018 07:35:30 +0530 Subject: [PATCH] Fix docstrings and interlinks --- camelot/core.py | 54 ++++++++++++++++++++++++-------------- camelot/handlers.py | 4 +-- camelot/io.py | 12 ++++----- camelot/parsers/lattice.py | 6 ++--- camelot/parsers/stream.py | 6 ++--- camelot/plotting.py | 24 ++++++++--------- camelot/utils.py | 8 +++--- 7 files changed, 65 insertions(+), 49 deletions(-) diff --git a/camelot/core.py b/camelot/core.py index e09dc1e..62c4ba4 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -46,7 +46,6 @@ class Cell(object): Whether or not cell spans vertically. text : string Text assigned to cell. - bound """ @@ -101,8 +100,7 @@ class Table(object): Attributes ---------- - df : object - pandas.DataFrame + df : :class:`pandas.DataFrame` shape : tuple Shape of the table. accuracy : float @@ -113,8 +111,6 @@ class Table(object): Table number on pdf page. page : int Pdf page number. - data - parsing_report """ def __init__(self, cols, rows): @@ -143,7 +139,7 @@ class Table(object): @property def parsing_report(self): - """Returns a parsing report with accuracy, %whitespace, + """Returns a parsing report with %accuracy, %whitespace, table number on page and page number. """ # pretty? @@ -320,10 +316,15 @@ class Table(object): return self def to_csv(self, path, **kwargs): - """Write Table to a comma-separated values (csv) file. + """Writes Table to a comma-separated values (csv) file. + + For kwargs, check :meth:`pandas.DataFrame.to_csv`. + + Parameters + ---------- + path : str + Output filepath. - Check `pandas.DataFrame.to_csv `_ - kwargs for more details around what kwargs to use. """ kw = { 'encoding': 'utf-8', @@ -334,10 +335,15 @@ class Table(object): self.df.to_csv(path, **kw) def to_json(self, path, **kwargs): - """Write Table to a JSON file. + """Writes Table to a JSON file. + + For kwargs, check :meth:`pandas.DataFrame.to_json`. + + Parameters + ---------- + path : str + Output filepath. - Check `pandas.DataFrame.to_json `_ - kwargs for more details around what kwargs to use. """ kw = { 'orient': 'records' @@ -348,10 +354,15 @@ class Table(object): f.write(json_string) def to_excel(self, path, **kwargs): - """Write Table to an Excel file. + """Writes Table to an Excel file. + + For kwargs, check :meth:`pandas.DataFrame.to_excel`. + + Parameters + ---------- + path : str + Output filepath. - Check `pandas.DataFrame.to_excel `_ - kwargs for more details around what kwargs to use. """ kw = { 'sheet_name': 'page-{}-table-{}'.format(self.page, self.order), @@ -363,10 +374,15 @@ class Table(object): writer.save() def to_html(self, path, **kwargs): - """Write Table to an HTML file. + """Writes Table to an HTML file. + + For kwargs, check :meth:`pandas.DataFrame.to_html`. + + Parameters + ---------- + path : str + Output filepath. - Check `pandas.DataFrame.to_html `_ - kwargs for more details around what kwargs to use. """ html_string = self.df.to_html(**kwargs) with open(path, 'w') as f: @@ -434,7 +450,7 @@ class TableList(object): Parameters ---------- path : str - Filepath + Output filepath. f : str File format. Can be csv, json, excel and html. compress : bool diff --git a/camelot/handlers.py b/camelot/handlers.py index 516cc3b..f640c1b 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -13,8 +13,8 @@ class PDFHandler(object): file into single page pdfs, parsing each pdf and then removing the temp directory. - Parameter - --------- + Parameters + ---------- filename : str Path to pdf file. pages : str diff --git a/camelot/io.py b/camelot/io.py index 8297253..328b107 100644 --- a/camelot/io.py +++ b/camelot/io.py @@ -30,8 +30,8 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs): multiple cells. flag_size : bool, optional (default: False) Whether or not to highlight a substring using - if its size is different from rest of the string, useful for - super and subscripts. + if its size is different from rest of the string. (Useful for + super and subscripts) row_close_tol^ : int, optional (default: 2) Rows will be formed by combining text vertically within this tolerance. @@ -61,24 +61,24 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs): joint_close_tol* : int, optional (default: 2) Tolerance parameter used to decide whether the detected lines and points lie close to each other. - threshold_blocksize : int, optional (default: 15) + threshold_blocksize* : int, optional (default: 15) Size of a pixel neighborhood that is used to calculate a threshold value for the pixel: 3, 5, 7, and so on. For more information, refer `OpenCV's adaptiveThreshold `_. - threshold_constant : int, optional (default: -2) + threshold_constant* : int, optional (default: -2) Constant subtracted from the mean or weighted mean. Normally, it is positive but may be zero or negative as well. For more information, refer `OpenCV's adaptiveThreshold `_. - iterations : int, optional (default: 0) + iterations* : int, optional (default: 0) Number of times for erosion/dilation is applied. For more information, refer `OpenCV's dilate `_. margins : tuple PDFMiner margins. (char_margin, line_margin, word_margin) - For for information, refer `PDFMiner docs `_. + For more information, refer `PDFMiner docs `_. Returns ------- diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 40a9040..9e569ab 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -51,8 +51,8 @@ class Lattice(BaseParser): multiple cells. flag_size : bool, optional (default: False) Whether or not to highlight a substring using - if its size is different from rest of the string, useful for - super and subscripts. + if its size is different from rest of the string. (Useful for + super and subscripts) line_close_tol : int, optional (default: 2) Tolerance parameter used to merge vertical and horizontal detected lines which lie close to each other. @@ -76,7 +76,7 @@ class Lattice(BaseParser): margins : tuple PDFMiner margins. (char_margin, line_margin, word_margin) - For for information, refer `PDFMiner docs `_. + For more information, refer `PDFMiner docs `_. debug : bool, optional (default: False) Whether or not to return all text objects on the page which can be used to generate a matplotlib plot, to get diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index f547bf0..56a240d 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -35,8 +35,8 @@ class Stream(BaseParser): multiple cells. flag_size : bool, optional (default: False) Whether or not to highlight a substring using - if its size is different from rest of the string, useful for - super and subscripts. + if its size is different from rest of the string. (Useful for + super and subscripts) row_close_tol : int, optional (default: 2) Rows will be formed by combining text vertically within this tolerance. @@ -46,7 +46,7 @@ class Stream(BaseParser): margins : tuple, optional (default: (1.0, 0.5, 0.1)) PDFMiner margins. (char_margin, line_margin, word_margin) - For for information, refer `PDFMiner docs `_. + For more information, refer `PDFMiner docs `_. debug : bool, optional (default: False) Whether or not to return all text objects on the page which can be used to generate a matplotlib plot, to get diff --git a/camelot/plotting.py b/camelot/plotting.py index 23757e3..99365b1 100644 --- a/camelot/plotting.py +++ b/camelot/plotting.py @@ -25,12 +25,12 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs) Whether or not to use Lattice method of parsing. Stream is used by default. geometry_type : str, optional (default: None) - 'text' : Plot text objects found on page, useful to get - table_area and columns coordinates. - 'table' : Plot parsed table. - 'contour'* : Plot detected rectangles. - 'joint'* : Plot detected line intersections. - 'line'* : Plot detected lines. + * 'text' : Plot text objects found on page. (Useful to get \ + table_area and columns coordinates) + * 'table' : Plot parsed table. + * 'contour'* : Plot detected rectangles. + * 'joint'* : Plot detected line intersections. + * 'line'* : Plot detected lines. table_area : list, optional (default: None) List of table areas to process as strings of the form x1,y1,x2,y2 where (x1, y1) -> left-top and @@ -43,8 +43,8 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs) multiple cells. flag_size : bool, optional (default: False) Whether or not to highlight a substring using - if its size is different from rest of the string, useful for - super and subscripts. + if its size is different from rest of the string. (Useful for + super and subscripts.) row_close_tol^ : int, optional (default: 2) Rows will be formed by combining text vertically within this tolerance. @@ -74,24 +74,24 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs) joint_close_tol* : int, optional (default: 2) Tolerance parameter used to decide whether the detected lines and points lie close to each other. - threshold_blocksize : int, optional (default: 15) + threshold_blocksize* : int, optional (default: 15) Size of a pixel neighborhood that is used to calculate a threshold value for the pixel: 3, 5, 7, and so on. For more information, refer `OpenCV's adaptiveThreshold `_. - threshold_constant : int, optional (default: -2) + threshold_constant* : int, optional (default: -2) Constant subtracted from the mean or weighted mean. Normally, it is positive but may be zero or negative as well. For more information, refer `OpenCV's adaptiveThreshold `_. - iterations : int, optional (default: 0) + iterations* : int, optional (default: 0) Number of times for erosion/dilation is applied. For more information, refer `OpenCV's dilate `_. margins : tuple PDFMiner margins. (char_margin, line_margin, word_margin) - For for information, refer `PDFMiner docs `_. + For more information, refer `PDFMiner docs `_. """ validate_input(kwargs, mesh=mesh, geometry_type=geometry_type) diff --git a/camelot/utils.py b/camelot/utils.py index 815f87d..c0f4a59 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -454,8 +454,8 @@ def split_textline(table, textline, direction, flag_size=False): Direction of the PDFMiner LTTextLine object. flag_size : bool, optional (default: False) Whether or not to highlight a substring using - if its size is different from rest of the string, useful for - super and subscripts. + if its size is different from rest of the string. (Useful for + super and subscripts.) Returns ------- @@ -530,8 +530,8 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False): multiple cells. flag_size : bool, optional (default: False) Whether or not to highlight a substring using - if its size is different from rest of the string, useful for - super and subscripts. + if its size is different from rest of the string. (Useful for + super and subscripts) Returns -------