Log warning when len(header) != len(cols)
parent
adb948d363
commit
2a203a1865
|
|
@ -79,10 +79,10 @@ def _fill_spanning(t, fill=None):
|
||||||
t : object
|
t : object
|
||||||
camelot.table.Table
|
camelot.table.Table
|
||||||
|
|
||||||
fill : string
|
fill : list
|
||||||
{'h', 'v', 'hv'}
|
{'h', 'v'}
|
||||||
Specify to fill spanning cells in horizontal, vertical or both
|
Specify to fill spanning cells in horizontal or vertical
|
||||||
directions.
|
direction.
|
||||||
(optional, default: None)
|
(optional, default: None)
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
|
@ -90,26 +90,19 @@ def _fill_spanning(t, fill=None):
|
||||||
t : object
|
t : object
|
||||||
camelot.table.Table
|
camelot.table.Table
|
||||||
"""
|
"""
|
||||||
if fill == "h":
|
for f in fill:
|
||||||
for i in range(len(t.cells)):
|
if f == "h":
|
||||||
for j in range(len(t.cells[i])):
|
for i in range(len(t.cells)):
|
||||||
if t.cells[i][j].get_text().strip() == '':
|
for j in range(len(t.cells[i])):
|
||||||
if t.cells[i][j].spanning_h:
|
if t.cells[i][j].get_text().strip() == '':
|
||||||
t.cells[i][j].add_text(t.cells[i][j - 1].get_text())
|
if t.cells[i][j].spanning_h:
|
||||||
elif fill == "v":
|
t.cells[i][j].add_text(t.cells[i][j - 1].get_text())
|
||||||
for i in range(len(t.cells)):
|
elif f == "v":
|
||||||
for j in range(len(t.cells[i])):
|
for i in range(len(t.cells)):
|
||||||
if t.cells[i][j].get_text().strip() == '':
|
for j in range(len(t.cells[i])):
|
||||||
if t.cells[i][j].spanning_v:
|
if t.cells[i][j].get_text().strip() == '':
|
||||||
t.cells[i][j].add_text(t.cells[i - 1][j].get_text())
|
if t.cells[i][j].spanning_v:
|
||||||
elif fill == "hv":
|
t.cells[i][j].add_text(t.cells[i - 1][j].get_text())
|
||||||
for i in range(len(t.cells)):
|
|
||||||
for j in range(len(t.cells[i])):
|
|
||||||
if t.cells[i][j].get_text().strip() == '':
|
|
||||||
if t.cells[i][j].spanning_h:
|
|
||||||
t.cells[i][j].add_text(t.cells[i][j - 1].get_text())
|
|
||||||
elif t.cells[i][j].spanning_v:
|
|
||||||
t.cells[i][j].add_text(t.cells[i - 1][j].get_text())
|
|
||||||
return t
|
return t
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -131,8 +124,8 @@ class Lattice:
|
||||||
|
|
||||||
fill : list
|
fill : list
|
||||||
List of strings specifying directions to fill spanning cells.
|
List of strings specifying directions to fill spanning cells.
|
||||||
{'h', 'v', 'hv'} to fill spanning cells in horizontal, vertical
|
{'h', 'v'} to fill spanning cells in horizontal or vertical
|
||||||
or both directions.
|
direction.
|
||||||
(optional, default: None)
|
(optional, default: None)
|
||||||
|
|
||||||
headers : list
|
headers : list
|
||||||
|
|
@ -256,7 +249,6 @@ class Lattice:
|
||||||
if self.headers is not None:
|
if self.headers is not None:
|
||||||
if len(self.table_area) != len(self.headers):
|
if len(self.table_area) != len(self.headers):
|
||||||
raise ValueError("Length of headers should be equal to table_area.")
|
raise ValueError("Length of headers should be equal to table_area.")
|
||||||
self.headers = [h.split(',') for h in headers]
|
|
||||||
|
|
||||||
areas = []
|
areas = []
|
||||||
for area in self.table_area:
|
for area in self.table_area:
|
||||||
|
|
@ -316,10 +308,12 @@ class Lattice:
|
||||||
rows = [(rows[i], rows[i + 1])
|
rows = [(rows[i], rows[i + 1])
|
||||||
for i in range(0, len(rows) - 1)]
|
for i in range(0, len(rows) - 1)]
|
||||||
|
|
||||||
if self.headers is not None and len(self.headers[table_no]) != len(cols):
|
if self.headers is not None and self.headers[table_no] != [""]:
|
||||||
logging.warning("Length of header ({0}) specified for table is not"
|
self.headers[table_no] = self.headers[table_no].split(',')
|
||||||
" equal to the number of columns ({1}) detected.".format(
|
if len(self.headers[table_no]) != len(cols):
|
||||||
len(self.headers[table_no]), len(cols)))
|
logging.warning("Length of header ({0}) specified for table is not"
|
||||||
|
" equal to the number of columns ({1}) detected.".format(
|
||||||
|
len(self.headers[table_no]), len(cols)))
|
||||||
while len(self.headers[table_no]) != len(cols):
|
while len(self.headers[table_no]) != len(cols):
|
||||||
self.headers[table_no].append('')
|
self.headers[table_no].append('')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -325,7 +325,6 @@ class Stream:
|
||||||
if self.headers is not None:
|
if self.headers is not None:
|
||||||
if len(self.table_area) != len(self.headers):
|
if len(self.table_area) != len(self.headers):
|
||||||
raise ValueError("Length of headers should be equal to table_area.")
|
raise ValueError("Length of headers should be equal to table_area.")
|
||||||
self.headers = [h.split(',') for h in headers]
|
|
||||||
|
|
||||||
table_bbox = {}
|
table_bbox = {}
|
||||||
for area in self.table_area:
|
for area in self.table_area:
|
||||||
|
|
@ -418,10 +417,12 @@ class Stream:
|
||||||
cols = _add_columns(cols, inner_text, self.ytol[table_no])
|
cols = _add_columns(cols, inner_text, self.ytol[table_no])
|
||||||
cols = _join_columns(cols, text_x_min, text_x_max)
|
cols = _join_columns(cols, text_x_min, text_x_max)
|
||||||
|
|
||||||
if self.headers is not None and len(self.headers[table_no]) != len(cols):
|
if self.headers is not None and self.headers[table_no] != [""]:
|
||||||
logging.warning("Length of header ({0}) specified for table is not"
|
self.headers[table_no] = self.headers[table_no].split(',')
|
||||||
" equal to the number of columns ({1}) detected.".format(
|
if len(self.headers[table_no]) != len(cols):
|
||||||
len(self.headers[table_no]), len(cols)))
|
logging.warning("Length of header ({0}) specified for table is not"
|
||||||
|
" equal to the number of columns ({1}) detected.".format(
|
||||||
|
len(self.headers[table_no]), len(cols)))
|
||||||
while len(self.headers[table_no]) != len(cols):
|
while len(self.headers[table_no]) != len(cols):
|
||||||
self.headers[table_no].append('')
|
self.headers[table_no].append('')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -605,7 +605,7 @@ def split_textline(table, textline, direction, flag_size=True):
|
||||||
if isinstance(obj, LTChar):
|
if isinstance(obj, LTChar):
|
||||||
if (col[0] <= (obj.x0 + obj.x1) / 2 <= col[1] and
|
if (col[0] <= (obj.x0 + obj.x1) / 2 <= col[1] and
|
||||||
(obj.y0 + obj.y1) / 2 >= cut[1]):
|
(obj.y0 + obj.y1) / 2 >= cut[1]):
|
||||||
cut_text.append((cut[0], c, obj.get_text()))
|
cut_text.append((cut[0], c, obj))
|
||||||
break
|
break
|
||||||
elif isinstance(obj, LTAnno):
|
elif isinstance(obj, LTAnno):
|
||||||
cut_text.append((cut[0], c, obj))
|
cut_text.append((cut[0], c, obj))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue