diff --git a/camelot/image_processing.py b/camelot/image_processing.py index 8707d48..3051852 100644 --- a/camelot/image_processing.py +++ b/camelot/image_processing.py @@ -97,17 +97,24 @@ def find_lines(threshold, regions=None, direction='horizontal', raise ValueError("Specify direction as either 'vertical' or" " 'horizontal'") + if regions is not None: + region_mask = np.zeros(threshold.shape) + for region in regions: + x, y, w, h = region + region_mask[y : y + h, x : x + w] = 1 + threshold = np.multiply(threshold, region_mask) + threshold = cv2.erode(threshold, el) threshold = cv2.dilate(threshold, el) dmask = cv2.dilate(threshold, el, iterations=iterations) try: _, contours, _ = cv2.findContours( - threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) except ValueError: # for opencv backward compatibility contours, _ = cv2.findContours( - threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for c in contours: x, y, w, h = cv2.boundingRect(c) @@ -117,12 +124,6 @@ def find_lines(threshold, regions=None, direction='horizontal', lines.append(((x1 + x2) // 2, y2, (x1 + x2) // 2, y1)) elif direction == 'horizontal': lines.append((x1, (y1 + y2) // 2, x2, (y1 + y2) // 2)) - if regions is not None: - region_mask = np.zeros(dmask.shape) - for region in regions: - x, y, w, h = region - region_mask[y : y + h, x : x + w] = 1 - dmask = np.multiply(dmask, region_mask) return dmask, lines diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index e061f65..ab7d3be 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -232,9 +232,22 @@ class Lattice(BaseParser): stderr=subprocess.STDOUT) def _generate_table_bbox(self): + def scale_areas(areas): + scaled_areas = [] + for area in areas: + x1, y1, x2, y2 = area.split(",") + x1 = float(x1) + y1 = float(y1) + x2 = float(x2) + y2 = float(y2) + x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers) + scaled_areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1))) + return scaled_areas + self.image, self.threshold = adaptive_threshold( self.imagename, process_background=self.process_background, blocksize=self.threshold_blocksize, c=self.threshold_constant) + image_width = self.image.shape[1] image_height = self.image.shape[0] image_width_scaler = image_width / float(self.pdf_width) @@ -247,15 +260,8 @@ class Lattice(BaseParser): if self.table_areas is None: regions = None if self.table_regions is not None: - regions = [] - for region in self.table_regions: - x1, y1, x2, y2 = region.split(",") - x1 = float(x1) - y1 = float(y1) - x2 = float(x2) - y2 = float(y2) - x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers) - regions.append((x1, y1, abs(x2 - x1), abs(y2 - y1))) + regions = scale_areas(self.table_regions) + vertical_mask, vertical_segments = find_lines( self.threshold, regions=regions, direction='vertical', line_scale=self.line_scale, iterations=self.iterations) @@ -273,15 +279,7 @@ class Lattice(BaseParser): self.threshold, direction='horizontal', line_scale=self.line_scale, iterations=self.iterations) - areas = [] - for area in self.table_areas: - x1, y1, x2, y2 = area.split(",") - x1 = float(x1) - y1 = float(y1) - x2 = float(x2) - y2 = float(y2) - x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers) - areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1))) + areas = scale_areas(self.table_areas) table_bbox = find_joints(areas, vertical_mask, horizontal_mask) self.table_bbox_unscaled = copy.deepcopy(table_bbox)