[PyConIndia] Add informative error when no text in bounding box

pull/92/head
pravar agrawal 2019-10-18 01:42:08 +05:30
parent 56f3b54f62
commit ece17ecd41
1 changed files with 12 additions and 6 deletions

View File

@ -4,6 +4,7 @@ from __future__ import division
import os import os
import logging import logging
import warnings import warnings
import sys
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -95,12 +96,17 @@ class Stream(BaseParser):
Tuple (x0, y0, x1, y1) in pdf coordinate space. Tuple (x0, y0, x1, y1) in pdf coordinate space.
""" """
xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]]) try:
ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]]) xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]]) ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]]) xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
text_bbox = (xmin, ymin, xmax, ymax) ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
return text_bbox text_bbox = (xmin, ymin, xmax, ymax)
return text_bbox
except ValueError as err:
print(err)
print("Defined values for table area does not contain any text to extract. Exiting program...")
sys.exit()
@staticmethod @staticmethod
def _group_rows(text, row_tol=2): def _group_rows(text, row_tol=2):