Rename kwargs and add tests
parent
f6aa21c31f
commit
50b4468aff
|
|
@ -70,10 +70,10 @@ def cli(ctx, *args, **kwargs):
|
|||
@click.option('-shift', '--shift_text', default=['l', 't'],
|
||||
type=click.Choice(['', 'l', 'r', 't', 'b']), multiple=True,
|
||||
help='Direction in which text in a spanning cell will flow.')
|
||||
@click.option('-l', '--line_close_tol', default=2,
|
||||
@click.option('-l', '--line_tol', default=2,
|
||||
help='Tolerance parameter used to merge close vertical'
|
||||
' and horizontal lines.')
|
||||
@click.option('-j', '--joint_close_tol', default=2,
|
||||
@click.option('-j', '--joint_tol', default=2,
|
||||
help='Tolerance parameter used to decide whether'
|
||||
' the detected lines and points lie close to each other.')
|
||||
@click.option('-block', '--threshold_blocksize', default=15,
|
||||
|
|
@ -137,11 +137,11 @@ def lattice(c, *args, **kwargs):
|
|||
' where x1, y1 -> left-top and x2, y2 -> right-bottom.')
|
||||
@click.option('-C', '--columns', default=[], multiple=True,
|
||||
help='X coordinates of column separators.')
|
||||
@click.option('-e', '--edge_close_tol', default=50, help='Tolerance parameter'
|
||||
@click.option('-e', '--edge_tol', default=50, help='Tolerance parameter'
|
||||
' for extending textedges vertically.')
|
||||
@click.option('-r', '--row_close_tol', default=2, help='Tolerance parameter'
|
||||
@click.option('-r', '--row_tol', default=2, help='Tolerance parameter'
|
||||
' used to combine text vertically, to generate rows.')
|
||||
@click.option('-c', '--col_close_tol', default=0, help='Tolerance parameter'
|
||||
@click.option('-c', '--column_tol', default=0, help='Tolerance parameter'
|
||||
' used to combine text horizontally, to generate columns.')
|
||||
@click.option('-plot', '--plot_type',
|
||||
type=click.Choice(['text', 'grid', 'contour', 'textedge']),
|
||||
|
|
|
|||
|
|
@ -53,11 +53,11 @@ class TextEdge(object):
|
|||
return '<TextEdge x={} y0={} y1={} align={} valid={}>'.format(
|
||||
round(self.x, 2), round(self.y0, 2), round(self.y1, 2), self.align, self.is_valid)
|
||||
|
||||
def update_coords(self, x, y0, edge_close_tol=50):
|
||||
def update_coords(self, x, y0, edge_tol=50):
|
||||
"""Updates the text edge's x and bottom y coordinates and sets
|
||||
the is_valid attribute.
|
||||
"""
|
||||
if np.isclose(self.y0, y0, atol=edge_close_tol):
|
||||
if np.isclose(self.y0, y0, atol=edge_tol):
|
||||
self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
|
||||
self.y0 = y0
|
||||
self.intersections += 1
|
||||
|
|
@ -72,8 +72,8 @@ class TextEdges(object):
|
|||
the PDF page. The dict has three keys based on the alignments,
|
||||
and each key's value is a list of camelot.core.TextEdge objects.
|
||||
"""
|
||||
def __init__(self, edge_close_tol=50):
|
||||
self.edge_close_tol = edge_close_tol
|
||||
def __init__(self, edge_tol=50):
|
||||
self.edge_tol = edge_tol
|
||||
self._textedges = {'left': [], 'right': [], 'middle': []}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -115,7 +115,7 @@ class TextEdges(object):
|
|||
self.add(textline, align)
|
||||
else:
|
||||
self._textedges[align][idx].update_coords(
|
||||
x_coord, textline.y0, edge_close_tol=self.edge_close_tol)
|
||||
x_coord, textline.y0, edge_tol=self.edge_tol)
|
||||
|
||||
def generate(self, textlines):
|
||||
"""Generates the text edges dict based on horizontal text
|
||||
|
|
@ -359,7 +359,7 @@ class Table(object):
|
|||
cell.left = cell.right = cell.top = cell.bottom = True
|
||||
return self
|
||||
|
||||
def set_edges(self, vertical, horizontal, joint_close_tol=2):
|
||||
def set_edges(self, vertical, horizontal, joint_tol=2):
|
||||
"""Sets a cell's edges to True depending on whether the cell's
|
||||
coordinates overlap with the line's coordinates within a
|
||||
tolerance.
|
||||
|
|
@ -376,11 +376,11 @@ class Table(object):
|
|||
# find closest x coord
|
||||
# iterate over y coords and find closest start and end points
|
||||
i = [i for i, t in enumerate(self.cols)
|
||||
if np.isclose(v[0], t[0], atol=joint_close_tol)]
|
||||
if np.isclose(v[0], t[0], atol=joint_tol)]
|
||||
j = [j for j, t in enumerate(self.rows)
|
||||
if np.isclose(v[3], t[0], atol=joint_close_tol)]
|
||||
if np.isclose(v[3], t[0], atol=joint_tol)]
|
||||
k = [k for k, t in enumerate(self.rows)
|
||||
if np.isclose(v[1], t[0], atol=joint_close_tol)]
|
||||
if np.isclose(v[1], t[0], atol=joint_tol)]
|
||||
if not j:
|
||||
continue
|
||||
J = j[0]
|
||||
|
|
@ -427,11 +427,11 @@ class Table(object):
|
|||
# find closest y coord
|
||||
# iterate over x coords and find closest start and end points
|
||||
i = [i for i, t in enumerate(self.rows)
|
||||
if np.isclose(h[1], t[0], atol=joint_close_tol)]
|
||||
if np.isclose(h[1], t[0], atol=joint_tol)]
|
||||
j = [j for j, t in enumerate(self.cols)
|
||||
if np.isclose(h[0], t[0], atol=joint_close_tol)]
|
||||
if np.isclose(h[0], t[0], atol=joint_tol)]
|
||||
k = [k for k, t in enumerate(self.cols)
|
||||
if np.isclose(h[2], t[0], atol=joint_close_tol)]
|
||||
if np.isclose(h[2], t[0], atol=joint_tol)]
|
||||
if not j:
|
||||
continue
|
||||
J = j[0]
|
||||
|
|
|
|||
|
|
@ -40,10 +40,13 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
|||
flag_size : bool, optional (default: False)
|
||||
Flag text based on font size. Useful to detect
|
||||
super/subscripts. Adds <s></s> around flagged text.
|
||||
row_close_tol^ : int, optional (default: 2)
|
||||
strip_text : str, optional (default: '')
|
||||
Characters that should be stripped from a string before
|
||||
assigning it to a cell.
|
||||
row_tol^ : int, optional (default: 2)
|
||||
Tolerance parameter used to combine text vertically,
|
||||
to generate rows.
|
||||
col_close_tol^ : int, optional (default: 0)
|
||||
column_tol^ : int, optional (default: 0)
|
||||
Tolerance parameter used to combine text horizontally,
|
||||
to generate columns.
|
||||
process_background* : bool, optional (default: False)
|
||||
|
|
@ -59,10 +62,10 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
|||
shift_text* : list, optional (default: ['l', 't'])
|
||||
{'l', 'r', 't', 'b'}
|
||||
Direction in which text in a spanning cell will flow.
|
||||
line_close_tol* : int, optional (default: 2)
|
||||
line_tol* : int, optional (default: 2)
|
||||
Tolerance parameter used to merge close vertical and horizontal
|
||||
lines.
|
||||
joint_close_tol* : int, optional (default: 2)
|
||||
joint_tol* : int, optional (default: 2)
|
||||
Tolerance parameter used to decide whether the detected lines
|
||||
and points lie close to each other.
|
||||
threshold_blocksize* : int, optional (default: 15)
|
||||
|
|
@ -79,6 +82,8 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
|
|||
Number of times for erosion/dilation is applied.
|
||||
|
||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
|
||||
resolution* : int, optional (default: 300)
|
||||
Resolution used for PDF to PNG conversion.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
|
|||
|
|
@ -47,16 +47,16 @@ class Lattice(BaseParser):
|
|||
Direction in which text in a spanning cell will flow.
|
||||
split_text : bool, optional (default: False)
|
||||
Split text that spans across multiple cells.
|
||||
strip_text : str, optional (default: '')
|
||||
Characters that should be stripped from a string before
|
||||
assigning it to a cell.
|
||||
flag_size : bool, optional (default: False)
|
||||
Flag text based on font size. Useful to detect
|
||||
super/subscripts. Adds <s></s> around flagged text.
|
||||
line_close_tol : int, optional (default: 2)
|
||||
strip_text : str, optional (default: '')
|
||||
Characters that should be stripped from a string before
|
||||
assigning it to a cell.
|
||||
line_tol : int, optional (default: 2)
|
||||
Tolerance parameter used to merge close vertical and horizontal
|
||||
lines.
|
||||
joint_close_tol : int, optional (default: 2)
|
||||
joint_tol : int, optional (default: 2)
|
||||
Tolerance parameter used to decide whether the detected lines
|
||||
and points lie close to each other.
|
||||
threshold_blocksize : int, optional (default: 15)
|
||||
|
|
@ -73,12 +73,14 @@ class Lattice(BaseParser):
|
|||
Number of times for erosion/dilation is applied.
|
||||
|
||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
|
||||
resolution : int, optional (default: 300)
|
||||
Resolution used for PDF to PNG conversion.
|
||||
|
||||
"""
|
||||
def __init__(self, table_areas=None, process_background=False,
|
||||
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
||||
split_text=False, flag_size=False, strip_text='', line_close_tol=2,
|
||||
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
||||
split_text=False, flag_size=False, strip_text='', line_tol=2,
|
||||
joint_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
||||
iterations=0, resolution=300, **kwargs):
|
||||
self.table_areas = table_areas
|
||||
self.process_background = process_background
|
||||
|
|
@ -88,8 +90,8 @@ class Lattice(BaseParser):
|
|||
self.split_text = split_text
|
||||
self.flag_size = flag_size
|
||||
self.strip_text = strip_text
|
||||
self.line_close_tol = line_close_tol
|
||||
self.joint_close_tol = joint_close_tol
|
||||
self.line_tol = line_tol
|
||||
self.joint_tol = joint_tol
|
||||
self.threshold_blocksize = threshold_blocksize
|
||||
self.threshold_constant = threshold_constant
|
||||
self.iterations = iterations
|
||||
|
|
@ -283,9 +285,9 @@ class Lattice(BaseParser):
|
|||
rows.extend([tk[1], tk[3]])
|
||||
# sort horizontal and vertical segments
|
||||
cols = merge_close_lines(
|
||||
sorted(cols), line_close_tol=self.line_close_tol)
|
||||
sorted(cols), line_tol=self.line_tol)
|
||||
rows = merge_close_lines(
|
||||
sorted(rows, reverse=True), line_close_tol=self.line_close_tol)
|
||||
sorted(rows, reverse=True), line_tol=self.line_tol)
|
||||
# make grid using x and y coord of shortlisted rows and cols
|
||||
cols = [(cols[i], cols[i + 1])
|
||||
for i in range(0, len(cols) - 1)]
|
||||
|
|
@ -302,7 +304,7 @@ class Lattice(BaseParser):
|
|||
|
||||
table = Table(cols, rows)
|
||||
# set table edges to True using ver+hor lines
|
||||
table = table.set_edges(v_s, h_s, joint_close_tol=self.joint_close_tol)
|
||||
table = table.set_edges(v_s, h_s, joint_tol=self.joint_tol)
|
||||
# set table border edges to True
|
||||
table = table.set_border()
|
||||
# set spanning cells to True
|
||||
|
|
@ -315,7 +317,7 @@ class Lattice(BaseParser):
|
|||
for t in self.t_bbox[direction]:
|
||||
indices, error = get_table_index(
|
||||
table, t, direction, split_text=self.split_text,
|
||||
flag_size=self.flag_size)
|
||||
flag_size=self.flag_size, strip_text=self.strip_text)
|
||||
if indices[:2] != (-1, -1):
|
||||
pos_errors.append(error)
|
||||
indices = Lattice._reduce_index(table, indices, shift_text=self.shift_text)
|
||||
|
|
|
|||
|
|
@ -35,34 +35,34 @@ class Stream(BaseParser):
|
|||
are comma-separated.
|
||||
split_text : bool, optional (default: False)
|
||||
Split text that spans across multiple cells.
|
||||
strip_text : str, optional (default: '')
|
||||
Characters that should be stripped from a string before
|
||||
assigning it to a cell.
|
||||
flag_size : bool, optional (default: False)
|
||||
Flag text based on font size. Useful to detect
|
||||
super/subscripts. Adds <s></s> around flagged text.
|
||||
edge_close_tol : int, optional (default: 50)
|
||||
strip_text : str, optional (default: '')
|
||||
Characters that should be stripped from a string before
|
||||
assigning it to a cell.
|
||||
edge_tol : int, optional (default: 50)
|
||||
Tolerance parameter for extending textedges vertically.
|
||||
row_close_tol : int, optional (default: 2)
|
||||
row_tol : int, optional (default: 2)
|
||||
Tolerance parameter used to combine text vertically,
|
||||
to generate rows.
|
||||
col_close_tol : int, optional (default: 0)
|
||||
column_tol : int, optional (default: 0)
|
||||
Tolerance parameter used to combine text horizontally,
|
||||
to generate columns.
|
||||
|
||||
"""
|
||||
def __init__(self, table_areas=None, columns=None, split_text=False,
|
||||
flag_size=False, strip_text='', edge_close_tol=50, row_close_tol=2,
|
||||
col_close_tol=0, **kwargs):
|
||||
flag_size=False, strip_text='', edge_tol=50, row_tol=2,
|
||||
column_tol=0, **kwargs):
|
||||
self.table_areas = table_areas
|
||||
self.columns = columns
|
||||
self._validate_columns()
|
||||
self.split_text = split_text
|
||||
self.flag_size = flag_size
|
||||
self.strip_text = strip_text
|
||||
self.edge_close_tol = edge_close_tol
|
||||
self.row_close_tol = row_close_tol
|
||||
self.col_close_tol = col_close_tol
|
||||
self.edge_tol = edge_tol
|
||||
self.row_tol = row_tol
|
||||
self.column_tol = column_tol
|
||||
|
||||
@staticmethod
|
||||
def _text_bbox(t_bbox):
|
||||
|
|
@ -88,7 +88,7 @@ class Stream(BaseParser):
|
|||
return text_bbox
|
||||
|
||||
@staticmethod
|
||||
def _group_rows(text, row_close_tol=2):
|
||||
def _group_rows(text, row_tol=2):
|
||||
"""Groups PDFMiner text objects into rows vertically
|
||||
within a tolerance.
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ class Stream(BaseParser):
|
|||
----------
|
||||
text : list
|
||||
List of PDFMiner text objects.
|
||||
row_close_tol : int, optional (default: 2)
|
||||
row_tol : int, optional (default: 2)
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -112,7 +112,7 @@ class Stream(BaseParser):
|
|||
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
||||
# type(obj) is LTChar]):
|
||||
if t.get_text().strip():
|
||||
if not np.isclose(row_y, t.y0, atol=row_close_tol):
|
||||
if not np.isclose(row_y, t.y0, atol=row_tol):
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
temp = []
|
||||
row_y = t.y0
|
||||
|
|
@ -122,7 +122,7 @@ class Stream(BaseParser):
|
|||
return rows
|
||||
|
||||
@staticmethod
|
||||
def _merge_columns(l, col_close_tol=0):
|
||||
def _merge_columns(l, column_tol=0):
|
||||
"""Merges column boundaries horizontally if they overlap
|
||||
or lie within a tolerance.
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ class Stream(BaseParser):
|
|||
----------
|
||||
l : list
|
||||
List of column x-coordinate tuples.
|
||||
col_close_tol : int, optional (default: 0)
|
||||
column_tol : int, optional (default: 0)
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -144,17 +144,17 @@ class Stream(BaseParser):
|
|||
merged.append(higher)
|
||||
else:
|
||||
lower = merged[-1]
|
||||
if col_close_tol >= 0:
|
||||
if column_tol >= 0:
|
||||
if (higher[0] <= lower[1] or
|
||||
np.isclose(higher[0], lower[1], atol=col_close_tol)):
|
||||
np.isclose(higher[0], lower[1], atol=column_tol)):
|
||||
upper_bound = max(lower[1], higher[1])
|
||||
lower_bound = min(lower[0], higher[0])
|
||||
merged[-1] = (lower_bound, upper_bound)
|
||||
else:
|
||||
merged.append(higher)
|
||||
elif col_close_tol < 0:
|
||||
elif column_tol < 0:
|
||||
if higher[0] <= lower[1]:
|
||||
if np.isclose(higher[0], lower[1], atol=abs(col_close_tol)):
|
||||
if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
|
||||
merged.append(higher)
|
||||
else:
|
||||
upper_bound = max(lower[1], higher[1])
|
||||
|
|
@ -191,7 +191,7 @@ class Stream(BaseParser):
|
|||
return rows
|
||||
|
||||
@staticmethod
|
||||
def _add_columns(cols, text, row_close_tol):
|
||||
def _add_columns(cols, text, row_tol):
|
||||
"""Adds columns to existing list by taking into account
|
||||
the text that lies outside the current column x-coordinates.
|
||||
|
||||
|
|
@ -210,7 +210,7 @@ class Stream(BaseParser):
|
|||
|
||||
"""
|
||||
if text:
|
||||
text = Stream._group_rows(text, row_close_tol=row_close_tol)
|
||||
text = Stream._group_rows(text, row_tol=row_tol)
|
||||
elements = [len(r) for r in text]
|
||||
new_cols = [(t.x0, t.x1)
|
||||
for r in text if len(r) == max(elements) for t in r]
|
||||
|
|
@ -259,7 +259,7 @@ class Stream(BaseParser):
|
|||
# TODO: add support for arabic text #141
|
||||
# sort textlines in reading order
|
||||
textlines.sort(key=lambda x: (-x.y0, x.x0))
|
||||
textedges = TextEdges(edge_close_tol=self.edge_close_tol)
|
||||
textedges = TextEdges(edge_tol=self.edge_tol)
|
||||
# generate left, middle and right textedges
|
||||
textedges.generate(textlines)
|
||||
# select relevant edges
|
||||
|
|
@ -301,7 +301,7 @@ class Stream(BaseParser):
|
|||
self.t_bbox = t_bbox
|
||||
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
|
||||
rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_tol=self.row_tol)
|
||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||
elements = [len(r) for r in rows_grouped]
|
||||
|
||||
|
|
@ -332,7 +332,7 @@ class Stream(BaseParser):
|
|||
warnings.warn("No tables found in table area {}".format(
|
||||
table_idx + 1))
|
||||
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
|
||||
cols = self._merge_columns(sorted(cols), col_close_tol=self.col_close_tol)
|
||||
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
|
||||
inner_text = []
|
||||
for i in range(1, len(cols)):
|
||||
left = cols[i - 1][1]
|
||||
|
|
@ -344,7 +344,7 @@ class Stream(BaseParser):
|
|||
for t in self.t_bbox[direction]
|
||||
if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
|
||||
inner_text.extend(outer_text)
|
||||
cols = self._add_columns(cols, inner_text, self.row_close_tol)
|
||||
cols = self._add_columns(cols, inner_text, self.row_tol)
|
||||
cols = self._join_columns(cols, text_x_min, text_x_max)
|
||||
|
||||
return cols, rows
|
||||
|
|
@ -360,7 +360,7 @@ class Stream(BaseParser):
|
|||
for t in self.t_bbox[direction]:
|
||||
indices, error = get_table_index(
|
||||
table, t, direction, split_text=self.split_text,
|
||||
flag_size=self.flag_size)
|
||||
flag_size=self.flag_size, strip_text=self.strip_text)
|
||||
if indices[:2] != (-1, -1):
|
||||
pos_errors.append(error)
|
||||
for r_idx, c_idx, text in indices:
|
||||
|
|
|
|||
|
|
@ -20,16 +20,16 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
|
|||
|
||||
stream_kwargs = [
|
||||
'columns',
|
||||
'row_close_tol',
|
||||
'col_close_tol'
|
||||
'row_tol',
|
||||
'column_tol'
|
||||
]
|
||||
lattice_kwargs = [
|
||||
'process_background',
|
||||
'line_size_scaling',
|
||||
'copy_text',
|
||||
'shift_text',
|
||||
'line_close_tol',
|
||||
'joint_close_tol',
|
||||
'line_tol',
|
||||
'joint_tol',
|
||||
'threshold_blocksize',
|
||||
'threshold_constant',
|
||||
'iterations'
|
||||
|
|
@ -281,14 +281,14 @@ def text_in_bbox(bbox, text):
|
|||
return t_bbox
|
||||
|
||||
|
||||
def merge_close_lines(ar, line_close_tol=2):
|
||||
def merge_close_lines(ar, line_tol=2):
|
||||
"""Merges lines which are within a tolerance by calculating a
|
||||
moving mean, based on their x or y axis projections.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ar : list
|
||||
line_close_tol : int, optional (default: 2)
|
||||
line_tol : int, optional (default: 2)
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
@ -301,7 +301,7 @@ def merge_close_lines(ar, line_close_tol=2):
|
|||
ret.append(a)
|
||||
else:
|
||||
temp = ret[-1]
|
||||
if np.isclose(temp, a, atol=line_close_tol):
|
||||
if np.isclose(temp, a, atol=line_tol):
|
||||
temp = (temp + a) / 2.0
|
||||
ret[-1] = temp
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ You can solve this by passing ``flag_size=True``, which will enclose the supersc
|
|||
Control how text is grouped into rows
|
||||
-------------------------------------
|
||||
|
||||
You can pass ``row_close_tol=<+int>`` to group the rows closer together, as shown below.
|
||||
You can pass ``row_tol=<+int>`` to group the rows closer together, as shown below.
|
||||
|
||||
::
|
||||
|
||||
|
|
@ -337,7 +337,7 @@ You can pass ``row_close_tol=<+int>`` to group the rows closer together, as show
|
|||
|
||||
::
|
||||
|
||||
>>> tables = camelot.read_pdf('group_rows.pdf', flavor='stream', row_close_tol=10)
|
||||
>>> tables = camelot.read_pdf('group_rows.pdf', flavor='stream', row_tol=10)
|
||||
>>> tables[0].df
|
||||
|
||||
.. tip::
|
||||
|
|
|
|||
141
tests/data.py
141
tests/data.py
|
|
@ -312,6 +312,63 @@ data_stream_flag_size = [
|
|||
["ALL STATES", "513.38", "436.02", "-", "25.57", "51.06", "14.18", "-", "8.21", "11.83", "11.08"]
|
||||
]
|
||||
|
||||
data_stream_strip_text = [
|
||||
["V i n s a u Ve r r e", ""],
|
||||
["Les Blancs", "12.5CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"],
|
||||
["A.O.P Vacqueyras", ""],
|
||||
["Domaine de Montvac « Melodine » 2016", "10 €"],
|
||||
["A.O.P Châteauneuf du Pape", ""],
|
||||
["Domaine de Beaurenard 2017", "13 €"],
|
||||
["A.O.P Côteaux du Languedoc", ""],
|
||||
["Villa Tempora « Un temps pour elle » 2014", "9 €"],
|
||||
["A.O.P Côtes de Provence", ""],
|
||||
["Château Grand Boise 2017", "9 €"],
|
||||
["Les Rosés", "12,5 CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de la Florane « A fleur de Pampre » 2016", "8 €"],
|
||||
["Famille Coulon (Domaine Beaurenard) Biotifulfox 2017", "8 €"],
|
||||
["A.O.P Vacqueyras", ""],
|
||||
["Domaine de Montvac 2017", "9 €"],
|
||||
["A.O.P Languedoc", ""],
|
||||
["Domaine de Joncas « Nébla » 2015", "8 €"],
|
||||
["Villa Tempora « L’arroseur arrosé » 2015", "9 €"],
|
||||
["A.O.P Côtes de Provence", ""],
|
||||
["Château Grand Boise « Sainte Victoire » 2017", "9 €"],
|
||||
["Château Léoube 2016", "10 €"]
|
||||
]
|
||||
|
||||
data_stream_edge_tol = [
|
||||
["Key figures", ""],
|
||||
["", "2016"],
|
||||
["(all amounts in EUR)", ""],
|
||||
["C\nlass A", ""],
|
||||
["N\net Asset Value at 31 December", "5,111,372"],
|
||||
["N\number of outstanding units at 31 December", "49,136"],
|
||||
["N\net Asset Value per unit at 31 December", "104.03"],
|
||||
["C\nlass B", ""],
|
||||
["N\net Asset Value at 31 December", "49,144,825"],
|
||||
["N\number of outstanding units at 31 December", "471,555"],
|
||||
["N\net Asset Value per unit at 31 December", "104.22"],
|
||||
["T\notal for the Fund", ""],
|
||||
["N\net Asset Value at 31 December", "54,256,197"],
|
||||
["N\number of outstanding units at 31 December", "520,691"],
|
||||
["I\nnvestment result", ""],
|
||||
["Direct result", "-"],
|
||||
["Revaluation", "2,076,667"],
|
||||
["Costs", "(106,870)"],
|
||||
["T\notal investment result for the period1", "1,969,797"],
|
||||
["I\nnvestment result per unit2", ""],
|
||||
["Direct result", "-"],
|
||||
["Revaluation", "3.99"],
|
||||
["Costs", "(0.21)"],
|
||||
["T\notal investment result per unit", "3.78"],
|
||||
["1 The results cover the period from inception of the Fund at 8 April 2016 through 31 December 2016.", ""],
|
||||
["2 The result per unit is calculated using the total number of outstanding unit as per the end of the", ""],
|
||||
["period.", ""]
|
||||
]
|
||||
|
||||
data_lattice = [
|
||||
["Cycle \nName", "KI \n(1/km)", "Distance \n(mi)", "Percent Fuel Savings", "", "", ""],
|
||||
["", "", "", "Improved \nSpeed", "Decreased \nAccel", "Eliminate \nStops", "Decreased \nIdle"],
|
||||
|
|
@ -485,49 +542,49 @@ data_lattice_shift_text_right_bottom = [
|
|||
]
|
||||
|
||||
data_arabic = [
|
||||
['ً\n\xa0\nﺎﺒﺣﺮﻣ', 'ﻥﺎﻄﻠﺳ\xa0ﻲﻤﺳﺍ'],
|
||||
['ﻝﺎﻤﺸﻟﺍ\xa0ﺎﻨﻴﻟﻭﺭﺎﻛ\xa0ﺔﻳﻻﻭ\xa0ﻦﻣ\xa0ﺎﻧﺍ', '؟ﺖﻧﺍ\xa0ﻦﻳﺍ\xa0ﻦﻣ'],
|
||||
['1234', 'ﻂﻄﻗ\xa047\xa0ﻱﺪﻨﻋ'],
|
||||
['؟ﻙﺎﺒﺷ\xa0ﺖﻧﺍ\xa0ﻞﻫ', 'ﺔﻳﺰﻴﻠﺠﻧﻻﺍ\xa0ﻲﻓ\xa0Jeremy\xa0ﻲﻤﺳﺍ'],
|
||||
['Jeremy\xa0is\xa0ﻲﻣﺮﺟ\xa0in\xa0Arabic', '']
|
||||
["ً\n\xa0\nﺎﺒﺣﺮﻣ", "ﻥﺎﻄﻠﺳ\xa0ﻲﻤﺳﺍ"],
|
||||
["ﻝﺎﻤﺸﻟﺍ\xa0ﺎﻨﻴﻟﻭﺭﺎﻛ\xa0ﺔﻳﻻﻭ\xa0ﻦﻣ\xa0ﺎﻧﺍ", "؟ﺖﻧﺍ\xa0ﻦﻳﺍ\xa0ﻦﻣ"],
|
||||
["1234", "ﻂﻄﻗ\xa047\xa0ﻱﺪﻨﻋ"],
|
||||
["؟ﻙﺎﺒﺷ\xa0ﺖﻧﺍ\xa0ﻞﻫ", "ﺔﻳﺰﻴﻠﺠﻧﻻﺍ\xa0ﻲﻓ\xa0Jeremy\xa0ﻲﻤﺳﺍ"],
|
||||
["Jeremy\xa0is\xa0ﻲﻣﺮﺟ\xa0in\xa0Arabic", ""]
|
||||
]
|
||||
|
||||
data_stream_layout_kwargs = [
|
||||
['V i n s a u Ve r r e', ''],
|
||||
['Les Blancs', '12.5CL'],
|
||||
['A.O.P Côtes du Rhône', ''],
|
||||
['Domaine de la Guicharde « Autour de la chapelle » 2016', '8 €'],
|
||||
['A.O.P Vacqueyras', ''],
|
||||
['Domaine de Montvac « Melodine » 2016', '10 €'],
|
||||
['A.O.P Châteauneuf du Pape', ''],
|
||||
['Domaine de Beaurenard 2017', '13 €'],
|
||||
['A.O.P Côteaux du Languedoc', ''],
|
||||
['Villa Tempora « Un temps pour elle » 2014', '9 €'],
|
||||
['A.O.P Côtes de Provence', ''],
|
||||
['Château Grand Boise 2017', '9 €'],
|
||||
['Les Rosés', '12,5 CL'],
|
||||
['A.O.P Côtes du Rhône', ''],
|
||||
['Domaine de la Florane « A fleur de Pampre » 2016', '8 €'],
|
||||
['Famille Coulon (Domaine Beaurenard) Biotifulfox 2017', '8 €'],
|
||||
['A.O.P Vacqueyras', ''],
|
||||
['Domaine de Montvac 2017', '9 €'],
|
||||
['A.O.P Languedoc', ''],
|
||||
['Domaine de Joncas « Nébla » 2015', '8 €'],
|
||||
['Villa Tempora « L’arroseur arrosé » 2015', '9 €'],
|
||||
['A.O.P Côtes de Provence', ''],
|
||||
['Château Grand Boise « Sainte Victoire » 2017', '9 €'],
|
||||
['Château Léoube 2016', '10 €'],
|
||||
['Les Rouges', '12,CL'],
|
||||
['A.O.P Côtes du Rhône', ''],
|
||||
['Domaine de Dionysos « La Cigalette »', '8 €'],
|
||||
['Château Saint Estève d’Uchaux « Grande Réserve » 2014', '9 €'],
|
||||
['Domaine de la Guicharde « Cuvée Massillan » 2016', '9 €'],
|
||||
['Domaine de la Florane « Terre Pourpre » 2014', '10 €'],
|
||||
['L’Oratoire St Martin « Réserve des Seigneurs » 2015', '11 €'],
|
||||
['A.O.P Saint Joseph', ''],
|
||||
['Domaine Monier Perréol « Châtelet » 2015', '13 €'],
|
||||
['A.O.P Châteauneuf du Pape', ''],
|
||||
['Domaine de Beaurenard 2011', '15 €'],
|
||||
['A.O.P Cornas', ''],
|
||||
['Domaine Lionnet « Terre Brûlée » 2012', '15 €']
|
||||
["V i n s a u Ve r r e", ""],
|
||||
["Les Blancs", "12.5CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"],
|
||||
["A.O.P Vacqueyras", ""],
|
||||
["Domaine de Montvac « Melodine » 2016", "10 €"],
|
||||
["A.O.P Châteauneuf du Pape", ""],
|
||||
["Domaine de Beaurenard 2017", "13 €"],
|
||||
["A.O.P Côteaux du Languedoc", ""],
|
||||
["Villa Tempora « Un temps pour elle » 2014", "9 €"],
|
||||
["A.O.P Côtes de Provence", ""],
|
||||
["Château Grand Boise 2017", "9 €"],
|
||||
["Les Rosés", "12,5 CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de la Florane « A fleur de Pampre » 2016", "8 €"],
|
||||
["Famille Coulon (Domaine Beaurenard) Biotifulfox 2017", "8 €"],
|
||||
["A.O.P Vacqueyras", ""],
|
||||
["Domaine de Montvac 2017", "9 €"],
|
||||
["A.O.P Languedoc", ""],
|
||||
["Domaine de Joncas « Nébla » 2015", "8 €"],
|
||||
["Villa Tempora « L’arroseur arrosé » 2015", "9 €"],
|
||||
["A.O.P Côtes de Provence", ""],
|
||||
["Château Grand Boise « Sainte Victoire » 2017", "9 €"],
|
||||
["Château Léoube 2016", "10 €"],
|
||||
["Les Rouges", "12,CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de Dionysos « La Cigalette »", "8 €"],
|
||||
["Château Saint Estève d’Uchaux « Grande Réserve » 2014", "9 €"],
|
||||
["Domaine de la Guicharde « Cuvée Massillan » 2016", "9 €"],
|
||||
["Domaine de la Florane « Terre Pourpre » 2014", "10 €"],
|
||||
["L’Oratoire St Martin « Réserve des Seigneurs » 2015", "11 €"],
|
||||
["A.O.P Saint Joseph", ""],
|
||||
["Domaine Monier Perréol « Châtelet » 2015", "13 €"],
|
||||
["A.O.P Châteauneuf du Pape", ""],
|
||||
["Domaine de Beaurenard 2011", "15 €"],
|
||||
["A.O.P Cornas", ""],
|
||||
["Domaine Lionnet « Terre Brûlée » 2012", "15 €"]
|
||||
]
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -81,7 +81,7 @@ def test_stream_columns():
|
|||
|
||||
filename = os.path.join(testdir, "mexican_towns.pdf")
|
||||
tables = camelot.read_pdf(
|
||||
filename, flavor="stream", columns=["67,180,230,425,475"], row_close_tol=10)
|
||||
filename, flavor="stream", columns=["67,180,230,425,475"], row_tol=10)
|
||||
assert df.equals(tables[0].df)
|
||||
|
||||
|
||||
|
|
@ -102,6 +102,22 @@ def test_stream_flag_size():
|
|||
assert df.equals(tables[0].df)
|
||||
|
||||
|
||||
def test_stream_strip_text():
|
||||
df = pd.DataFrame(data_stream_strip_text)
|
||||
|
||||
filename = os.path.join(testdir, "detect_vertical_false.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream", strip_text="\n")
|
||||
assert df.equals(tables[0].df)
|
||||
|
||||
|
||||
def test_stream_edge_tol():
|
||||
df = pd.DataFrame(data_stream_edge_tol)
|
||||
|
||||
filename = os.path.join(testdir, "edge_tolerance.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream", edge_tol=500)
|
||||
assert df.equals(tables[0].df)
|
||||
|
||||
|
||||
def test_stream_layout_kwargs():
|
||||
df = pd.DataFrame(data_stream_layout_kwargs)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue