Apply mask at threshold level
parent
03f301b25c
commit
eaca147b9d
|
|
@ -97,17 +97,24 @@ def find_lines(threshold, regions=None, direction='horizontal',
|
||||||
raise ValueError("Specify direction as either 'vertical' or"
|
raise ValueError("Specify direction as either 'vertical' or"
|
||||||
" 'horizontal'")
|
" 'horizontal'")
|
||||||
|
|
||||||
|
if regions is not None:
|
||||||
|
region_mask = np.zeros(threshold.shape)
|
||||||
|
for region in regions:
|
||||||
|
x, y, w, h = region
|
||||||
|
region_mask[y : y + h, x : x + w] = 1
|
||||||
|
threshold = np.multiply(threshold, region_mask)
|
||||||
|
|
||||||
threshold = cv2.erode(threshold, el)
|
threshold = cv2.erode(threshold, el)
|
||||||
threshold = cv2.dilate(threshold, el)
|
threshold = cv2.dilate(threshold, el)
|
||||||
dmask = cv2.dilate(threshold, el, iterations=iterations)
|
dmask = cv2.dilate(threshold, el, iterations=iterations)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_, contours, _ = cv2.findContours(
|
_, contours, _ = cv2.findContours(
|
||||||
threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# for opencv backward compatibility
|
# for opencv backward compatibility
|
||||||
contours, _ = cv2.findContours(
|
contours, _ = cv2.findContours(
|
||||||
threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
for c in contours:
|
for c in contours:
|
||||||
x, y, w, h = cv2.boundingRect(c)
|
x, y, w, h = cv2.boundingRect(c)
|
||||||
|
|
@ -117,12 +124,6 @@ def find_lines(threshold, regions=None, direction='horizontal',
|
||||||
lines.append(((x1 + x2) // 2, y2, (x1 + x2) // 2, y1))
|
lines.append(((x1 + x2) // 2, y2, (x1 + x2) // 2, y1))
|
||||||
elif direction == 'horizontal':
|
elif direction == 'horizontal':
|
||||||
lines.append((x1, (y1 + y2) // 2, x2, (y1 + y2) // 2))
|
lines.append((x1, (y1 + y2) // 2, x2, (y1 + y2) // 2))
|
||||||
if regions is not None:
|
|
||||||
region_mask = np.zeros(dmask.shape)
|
|
||||||
for region in regions:
|
|
||||||
x, y, w, h = region
|
|
||||||
region_mask[y : y + h, x : x + w] = 1
|
|
||||||
dmask = np.multiply(dmask, region_mask)
|
|
||||||
|
|
||||||
return dmask, lines
|
return dmask, lines
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -232,9 +232,22 @@ class Lattice(BaseParser):
|
||||||
stderr=subprocess.STDOUT)
|
stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
def _generate_table_bbox(self):
|
def _generate_table_bbox(self):
|
||||||
|
def scale_areas(areas):
|
||||||
|
scaled_areas = []
|
||||||
|
for area in areas:
|
||||||
|
x1, y1, x2, y2 = area.split(",")
|
||||||
|
x1 = float(x1)
|
||||||
|
y1 = float(y1)
|
||||||
|
x2 = float(x2)
|
||||||
|
y2 = float(y2)
|
||||||
|
x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers)
|
||||||
|
scaled_areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
|
||||||
|
return scaled_areas
|
||||||
|
|
||||||
self.image, self.threshold = adaptive_threshold(
|
self.image, self.threshold = adaptive_threshold(
|
||||||
self.imagename, process_background=self.process_background,
|
self.imagename, process_background=self.process_background,
|
||||||
blocksize=self.threshold_blocksize, c=self.threshold_constant)
|
blocksize=self.threshold_blocksize, c=self.threshold_constant)
|
||||||
|
|
||||||
image_width = self.image.shape[1]
|
image_width = self.image.shape[1]
|
||||||
image_height = self.image.shape[0]
|
image_height = self.image.shape[0]
|
||||||
image_width_scaler = image_width / float(self.pdf_width)
|
image_width_scaler = image_width / float(self.pdf_width)
|
||||||
|
|
@ -247,15 +260,8 @@ class Lattice(BaseParser):
|
||||||
if self.table_areas is None:
|
if self.table_areas is None:
|
||||||
regions = None
|
regions = None
|
||||||
if self.table_regions is not None:
|
if self.table_regions is not None:
|
||||||
regions = []
|
regions = scale_areas(self.table_regions)
|
||||||
for region in self.table_regions:
|
|
||||||
x1, y1, x2, y2 = region.split(",")
|
|
||||||
x1 = float(x1)
|
|
||||||
y1 = float(y1)
|
|
||||||
x2 = float(x2)
|
|
||||||
y2 = float(y2)
|
|
||||||
x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers)
|
|
||||||
regions.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
|
|
||||||
vertical_mask, vertical_segments = find_lines(
|
vertical_mask, vertical_segments = find_lines(
|
||||||
self.threshold, regions=regions, direction='vertical',
|
self.threshold, regions=regions, direction='vertical',
|
||||||
line_scale=self.line_scale, iterations=self.iterations)
|
line_scale=self.line_scale, iterations=self.iterations)
|
||||||
|
|
@ -273,15 +279,7 @@ class Lattice(BaseParser):
|
||||||
self.threshold, direction='horizontal', line_scale=self.line_scale,
|
self.threshold, direction='horizontal', line_scale=self.line_scale,
|
||||||
iterations=self.iterations)
|
iterations=self.iterations)
|
||||||
|
|
||||||
areas = []
|
areas = scale_areas(self.table_areas)
|
||||||
for area in self.table_areas:
|
|
||||||
x1, y1, x2, y2 = area.split(",")
|
|
||||||
x1 = float(x1)
|
|
||||||
y1 = float(y1)
|
|
||||||
x2 = float(x2)
|
|
||||||
y2 = float(y2)
|
|
||||||
x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers)
|
|
||||||
areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
|
|
||||||
table_bbox = find_joints(areas, vertical_mask, horizontal_mask)
|
table_bbox = find_joints(areas, vertical_mask, horizontal_mask)
|
||||||
|
|
||||||
self.table_bbox_unscaled = copy.deepcopy(table_bbox)
|
self.table_bbox_unscaled = copy.deepcopy(table_bbox)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue