Deprecate Stream ncolumns
parent
72c2a0020f
commit
10eda3f204
|
|
@ -211,14 +211,14 @@ def _add_columns(cols, text, ytol):
|
|||
class Stream:
|
||||
"""Stream looks for spaces between text elements to form a table.
|
||||
|
||||
If you want to give columns, ncolumns, ytol or mtol for each table
|
||||
If you want to give columns, ytol or mtol for each table
|
||||
when specifying multiple table areas, make sure that their length
|
||||
is equal to the length of table_area. Mapping between them is based
|
||||
on index.
|
||||
|
||||
Also, if you want to specify columns for the first table and
|
||||
ncolumns for the second table in a pdf having two tables, pass
|
||||
columns as ['x1,x2,x3,x4', ''] and ncolumns as [-1, 5].
|
||||
If you don't want to specify columns for the some tables in a pdf
|
||||
page having multiple tables, pass them as empty strings.
|
||||
For example: ['', 'x1,x2,x3,x4', '']
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
|
@ -233,10 +233,6 @@ class Stream:
|
|||
x-coordinates in PDFMiner's coordinate space.
|
||||
(optional, default: None)
|
||||
|
||||
ncolumns : list
|
||||
List of ints specifying the number of columns in each table.
|
||||
(optional, default: None)
|
||||
|
||||
headers : list
|
||||
List of strings where each string is a csv header for a table.
|
||||
(optional, default: None)
|
||||
|
|
@ -269,14 +265,13 @@ class Stream:
|
|||
LTTextLineHorizontals in order to select table_area, columns.
|
||||
(optional, default: False)
|
||||
"""
|
||||
def __init__(self, table_area=None, columns=None, ncolumns=None,
|
||||
headers=None, ytol=[2], mtol=[0], margins=(1.0, 0.5, 0.1),
|
||||
def __init__(self, table_area=None, columns=None, headers=None,
|
||||
ytol=[2], mtol=[0], margins=(1.0, 0.5, 0.1),
|
||||
split_text=False, flag_size=True, debug=False):
|
||||
|
||||
self.method = 'stream'
|
||||
self.table_area = table_area
|
||||
self.columns = columns
|
||||
self.ncolumns = ncolumns
|
||||
self.headers = headers
|
||||
self.ytol = ytol
|
||||
self.mtol = mtol
|
||||
|
|
@ -318,9 +313,6 @@ class Stream:
|
|||
if self.columns is not None:
|
||||
if len(self.table_area) != len(self.columns):
|
||||
raise ValueError("Length of columns should be equal to table_area.")
|
||||
if self.ncolumns is not None:
|
||||
if len(self.table_area) != len(self.ncolumns):
|
||||
raise ValueError("Length of ncolumns should be equal to table_area.")
|
||||
if self.headers is not None:
|
||||
if len(self.table_area) != len(self.headers):
|
||||
raise ValueError("Length of headers should be equal to table_area.")
|
||||
|
|
@ -371,18 +363,6 @@ class Stream:
|
|||
cols.insert(0, text_x_min)
|
||||
cols.append(text_x_max)
|
||||
cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
|
||||
else:
|
||||
if self.ncolumns is not None and self.ncolumns[table_no] != -1:
|
||||
ncols = self.ncolumns[table_no]
|
||||
cols = [(t.x0, t.x1)
|
||||
for r in rows_grouped if len(r) == ncols for t in r]
|
||||
cols = _merge_columns(sorted(cols), mtol=self.mtol[table_no])
|
||||
if len(cols) != self.ncolumns[table_no]:
|
||||
logging.warning("{}: The number of columns after merge"
|
||||
" isn't the same as what you specified."
|
||||
" Change the value of mtol.".format(
|
||||
os.path.basename(bname)))
|
||||
cols = _join_columns(cols, text_x_min, text_x_max)
|
||||
else:
|
||||
guess = True
|
||||
ncols = max(set(elements), key=elements.count)
|
||||
|
|
|
|||
Loading…
Reference in New Issue