Fix docstrings
parent
050107b63d
commit
57917426e8
|
|
@ -130,7 +130,7 @@ class Lattice:
|
||||||
|
|
||||||
fill : None, 'h', 'v', 'hv'
|
fill : None, 'h', 'v', 'hv'
|
||||||
Fill data in horizontal and/or vertical spanning
|
Fill data in horizontal and/or vertical spanning
|
||||||
cells. (optional)
|
cells. (optional, default: None)
|
||||||
|
|
||||||
scale : int
|
scale : int
|
||||||
Scaling factor. Large scaling factor leads to smaller lines
|
Scaling factor. Large scaling factor leads to smaller lines
|
||||||
|
|
@ -150,6 +150,7 @@ class Lattice:
|
||||||
|
|
||||||
debug : 'contour', 'line', 'joint', 'table'
|
debug : 'contour', 'line', 'joint', 'table'
|
||||||
Debug by visualizing pdf geometry.
|
Debug by visualizing pdf geometry.
|
||||||
|
(optional, default: None)
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
----------
|
----------
|
||||||
|
|
@ -299,7 +300,6 @@ class Lattice:
|
||||||
cv2.rectangle(img, (t[0], t[1]),
|
cv2.rectangle(img, (t[0], t[1]),
|
||||||
(t[2], t[3]), (255, 0, 0), 3)
|
(t[2], t[3]), (255, 0, 0), 3)
|
||||||
plt.imshow(img)
|
plt.imshow(img)
|
||||||
plt.axis('off')
|
|
||||||
plt.show()
|
plt.show()
|
||||||
elif geometry == 'joint':
|
elif geometry == 'joint':
|
||||||
x_coord = []
|
x_coord = []
|
||||||
|
|
@ -314,7 +314,6 @@ class Lattice:
|
||||||
plt.plot(x_coord, y_coord, 'ro')
|
plt.plot(x_coord, y_coord, 'ro')
|
||||||
plt.axis([0, max_x + 100, max_y + 100, 0])
|
plt.axis([0, max_x + 100, max_y + 100, 0])
|
||||||
plt.imshow(img)
|
plt.imshow(img)
|
||||||
plt.axis('off')
|
|
||||||
plt.show()
|
plt.show()
|
||||||
elif geometry == 'line':
|
elif geometry == 'line':
|
||||||
for pkey in self.debug_segments.keys():
|
for pkey in self.debug_segments.keys():
|
||||||
|
|
@ -323,7 +322,6 @@ class Lattice:
|
||||||
plt.plot([v[0], v[2]], [v[1], v[3]])
|
plt.plot([v[0], v[2]], [v[1], v[3]])
|
||||||
for h in h_s:
|
for h in h_s:
|
||||||
plt.plot([h[0], h[2]], [h[1], h[3]])
|
plt.plot([h[0], h[2]], [h[1], h[3]])
|
||||||
plt.axis('off')
|
|
||||||
plt.show()
|
plt.show()
|
||||||
elif geometry == 'table':
|
elif geometry == 'table':
|
||||||
for pkey in self.debug_tables.keys():
|
for pkey in self.debug_tables.keys():
|
||||||
|
|
@ -350,5 +348,4 @@ class Lattice:
|
||||||
table.cells[i][j].rb[0]],
|
table.cells[i][j].rb[0]],
|
||||||
[table.cells[i][j].lb[1],
|
[table.cells[i][j].lb[1],
|
||||||
table.cells[i][j].rb[1]])
|
table.cells[i][j].rb[1]])
|
||||||
plt.axis('off')
|
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
@ -50,7 +50,7 @@ def _extract_text_objects(layout, LTObject, t=None):
|
||||||
LTObject : object
|
LTObject : object
|
||||||
Text object, either LTChar or LTTextLineHorizontal.
|
Text object, either LTChar or LTTextLineHorizontal.
|
||||||
|
|
||||||
t : list
|
t : list (optional, default: None)
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
@ -84,6 +84,7 @@ class Pdf:
|
||||||
|
|
||||||
pagenos : list
|
pagenos : list
|
||||||
List of dicts which specify pdf page ranges.
|
List of dicts which specify pdf page ranges.
|
||||||
|
(optional, default: [{'start': 1, 'end': 1}])
|
||||||
|
|
||||||
char_margin : float
|
char_margin : float
|
||||||
Chars closer than char_margin are grouped together to form a
|
Chars closer than char_margin are grouped together to form a
|
||||||
|
|
@ -96,23 +97,6 @@ class Pdf:
|
||||||
word_margin : float
|
word_margin : float
|
||||||
Insert blank spaces between chars if distance between words
|
Insert blank spaces between chars if distance between words
|
||||||
is greater than word_margin. (optional, default: 0.1)
|
is greater than word_margin. (optional, default: 0.1)
|
||||||
|
|
||||||
Attributes
|
|
||||||
----------
|
|
||||||
temp : string
|
|
||||||
Path to temporary directory.
|
|
||||||
|
|
||||||
lattice_objects : dict
|
|
||||||
List of text objects.
|
|
||||||
|
|
||||||
stream_objects : dict
|
|
||||||
List of text objects.
|
|
||||||
|
|
||||||
width : dict
|
|
||||||
List of dicts with width of each pdf page.
|
|
||||||
|
|
||||||
height : dict
|
|
||||||
List of dicts with height of each pdf page.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, pdfname, pagenos=[{'start': 1, 'end': 1}],
|
def __init__(self, pdfname, pagenos=[{'start': 1, 'end': 1}],
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ def _group_rows(text, ytol=2):
|
||||||
|
|
||||||
ytol : int
|
ytol : int
|
||||||
Tolerance to account for when grouping rows
|
Tolerance to account for when grouping rows
|
||||||
together. (default: 2, optional)
|
together. (optional, default: 2)
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|
@ -206,5 +206,4 @@ class Stream:
|
||||||
)
|
)
|
||||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||||
plt.axis('off')
|
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
|
||||||
|
|
@ -39,9 +39,9 @@ class Table:
|
||||||
horizontal : list
|
horizontal : list
|
||||||
List of horizontal line segments.
|
List of horizontal line segments.
|
||||||
|
|
||||||
jtol : int, default: 2, optional
|
jtol : int
|
||||||
Tolerance to account for when comparing joint and line
|
Tolerance to account for when comparing joint and line
|
||||||
coordinates.
|
coordinates. (optional, default: 2)
|
||||||
"""
|
"""
|
||||||
for v in vertical:
|
for v in vertical:
|
||||||
# find closest x coord
|
# find closest x coord
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,10 @@ from camelot.stream import Stream
|
||||||
|
|
||||||
|
|
||||||
doc = """
|
doc = """
|
||||||
camelot parses tables from PDFs!
|
Camelot: PDF parsing made simpler!
|
||||||
|
|
||||||
usage:
|
usage:
|
||||||
camelot.py [options] <method> [<args>...]
|
camelot [options] <method> [<args>...]
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help Show this screen.
|
-h, --help Show this screen.
|
||||||
|
|
@ -38,7 +38,7 @@ lattice_doc = """
|
||||||
Lattice method looks for lines between data to form a table.
|
Lattice method looks for lines between data to form a table.
|
||||||
|
|
||||||
usage:
|
usage:
|
||||||
camelot.py lattice [options] [--] <file>
|
camelot lattice [options] [--] <file>
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-F, --fill <fill> Fill data in horizontal and/or vertical spanning
|
-F, --fill <fill> Fill data in horizontal and/or vertical spanning
|
||||||
|
|
@ -59,7 +59,7 @@ stream_doc = """
|
||||||
Stream method looks for spaces between data to form a table.
|
Stream method looks for spaces between data to form a table.
|
||||||
|
|
||||||
usage:
|
usage:
|
||||||
camelot.py stream [options] [--] <file>
|
camelot stream [options] [--] <file>
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-n, --ncols <ncols> Number of columns. [default: 0]
|
-n, --ncols <ncols> Number of columns. [default: 0]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue