Log warning when len(header) != len(cols)

pull/2/head
Vinayak Mehta 2016-10-17 18:16:39 +05:30 committed by GitHub
parent adb948d363
commit 2a203a1865
3 changed files with 32 additions and 37 deletions

View File

@ -79,10 +79,10 @@ def _fill_spanning(t, fill=None):
t : object t : object
camelot.table.Table camelot.table.Table
fill : string fill : list
{'h', 'v', 'hv'} {'h', 'v'}
Specify to fill spanning cells in horizontal, vertical or both Specify to fill spanning cells in horizontal or vertical
directions. direction.
(optional, default: None) (optional, default: None)
Returns Returns
@ -90,26 +90,19 @@ def _fill_spanning(t, fill=None):
t : object t : object
camelot.table.Table camelot.table.Table
""" """
if fill == "h": for f in fill:
for i in range(len(t.cells)): if f == "h":
for j in range(len(t.cells[i])): for i in range(len(t.cells)):
if t.cells[i][j].get_text().strip() == '': for j in range(len(t.cells[i])):
if t.cells[i][j].spanning_h: if t.cells[i][j].get_text().strip() == '':
t.cells[i][j].add_text(t.cells[i][j - 1].get_text()) if t.cells[i][j].spanning_h:
elif fill == "v": t.cells[i][j].add_text(t.cells[i][j - 1].get_text())
for i in range(len(t.cells)): elif f == "v":
for j in range(len(t.cells[i])): for i in range(len(t.cells)):
if t.cells[i][j].get_text().strip() == '': for j in range(len(t.cells[i])):
if t.cells[i][j].spanning_v: if t.cells[i][j].get_text().strip() == '':
t.cells[i][j].add_text(t.cells[i - 1][j].get_text()) if t.cells[i][j].spanning_v:
elif fill == "hv": t.cells[i][j].add_text(t.cells[i - 1][j].get_text())
for i in range(len(t.cells)):
for j in range(len(t.cells[i])):
if t.cells[i][j].get_text().strip() == '':
if t.cells[i][j].spanning_h:
t.cells[i][j].add_text(t.cells[i][j - 1].get_text())
elif t.cells[i][j].spanning_v:
t.cells[i][j].add_text(t.cells[i - 1][j].get_text())
return t return t
@ -131,8 +124,8 @@ class Lattice:
fill : list fill : list
List of strings specifying directions to fill spanning cells. List of strings specifying directions to fill spanning cells.
{'h', 'v', 'hv'} to fill spanning cells in horizontal, vertical {'h', 'v'} to fill spanning cells in horizontal or vertical
or both directions. direction.
(optional, default: None) (optional, default: None)
headers : list headers : list
@ -256,7 +249,6 @@ class Lattice:
if self.headers is not None: if self.headers is not None:
if len(self.table_area) != len(self.headers): if len(self.table_area) != len(self.headers):
raise ValueError("Length of headers should be equal to table_area.") raise ValueError("Length of headers should be equal to table_area.")
self.headers = [h.split(',') for h in headers]
areas = [] areas = []
for area in self.table_area: for area in self.table_area:
@ -316,10 +308,12 @@ class Lattice:
rows = [(rows[i], rows[i + 1]) rows = [(rows[i], rows[i + 1])
for i in range(0, len(rows) - 1)] for i in range(0, len(rows) - 1)]
if self.headers is not None and len(self.headers[table_no]) != len(cols): if self.headers is not None and self.headers[table_no] != [""]:
logging.warning("Length of header ({0}) specified for table is not" self.headers[table_no] = self.headers[table_no].split(',')
" equal to the number of columns ({1}) detected.".format( if len(self.headers[table_no]) != len(cols):
len(self.headers[table_no]), len(cols))) logging.warning("Length of header ({0}) specified for table is not"
" equal to the number of columns ({1}) detected.".format(
len(self.headers[table_no]), len(cols)))
while len(self.headers[table_no]) != len(cols): while len(self.headers[table_no]) != len(cols):
self.headers[table_no].append('') self.headers[table_no].append('')

View File

@ -325,7 +325,6 @@ class Stream:
if self.headers is not None: if self.headers is not None:
if len(self.table_area) != len(self.headers): if len(self.table_area) != len(self.headers):
raise ValueError("Length of headers should be equal to table_area.") raise ValueError("Length of headers should be equal to table_area.")
self.headers = [h.split(',') for h in headers]
table_bbox = {} table_bbox = {}
for area in self.table_area: for area in self.table_area:
@ -418,10 +417,12 @@ class Stream:
cols = _add_columns(cols, inner_text, self.ytol[table_no]) cols = _add_columns(cols, inner_text, self.ytol[table_no])
cols = _join_columns(cols, text_x_min, text_x_max) cols = _join_columns(cols, text_x_min, text_x_max)
if self.headers is not None and len(self.headers[table_no]) != len(cols): if self.headers is not None and self.headers[table_no] != [""]:
logging.warning("Length of header ({0}) specified for table is not" self.headers[table_no] = self.headers[table_no].split(',')
" equal to the number of columns ({1}) detected.".format( if len(self.headers[table_no]) != len(cols):
len(self.headers[table_no]), len(cols))) logging.warning("Length of header ({0}) specified for table is not"
" equal to the number of columns ({1}) detected.".format(
len(self.headers[table_no]), len(cols)))
while len(self.headers[table_no]) != len(cols): while len(self.headers[table_no]) != len(cols):
self.headers[table_no].append('') self.headers[table_no].append('')

View File

@ -605,7 +605,7 @@ def split_textline(table, textline, direction, flag_size=True):
if isinstance(obj, LTChar): if isinstance(obj, LTChar):
if (col[0] <= (obj.x0 + obj.x1) / 2 <= col[1] and if (col[0] <= (obj.x0 + obj.x1) / 2 <= col[1] and
(obj.y0 + obj.y1) / 2 >= cut[1]): (obj.y0 + obj.y1) / 2 >= cut[1]):
cut_text.append((cut[0], c, obj.get_text())) cut_text.append((cut[0], c, obj))
break break
elif isinstance(obj, LTAnno): elif isinstance(obj, LTAnno):
cut_text.append((cut[0], c, obj)) cut_text.append((cut[0], c, obj))