Add saturation thresholding option
parent
d17dc43ab2
commit
2ebd6073c0
|
|
@ -100,6 +100,9 @@ def cli(ctx, *args, **kwargs):
|
|||
@click.option(
|
||||
"-back", "--process_background", is_flag=True, help="Process background lines."
|
||||
)
|
||||
@click.option(
|
||||
"-color", "--process_color_background", is_flag=True, help="Increase contrast for better background line processing."
|
||||
)
|
||||
@click.option(
|
||||
"-scale",
|
||||
"--line_scale",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import cv2
|
|||
import numpy as np
|
||||
|
||||
|
||||
def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
|
||||
def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2, process_color_background=False, saturation_threshold=5):
|
||||
"""Thresholds an image using OpenCV's adaptiveThreshold.
|
||||
|
||||
Parameters
|
||||
|
|
@ -36,6 +36,17 @@ def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
|
|||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
if process_background:
|
||||
if process_color_background:
|
||||
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||||
initial = hsv[:, :, 1]
|
||||
hsv[initial > saturation_threshold, 0] = 0
|
||||
hsv[initial > saturation_threshold, 1] = 255
|
||||
hsv[initial > saturation_threshold, 2] = 0
|
||||
hsv[initial <= saturation_threshold, 0] = 128
|
||||
hsv[initial <= saturation_threshold, 1] = 0
|
||||
hsv[initial <= saturation_threshold, 2] = 255
|
||||
hsv[initial == 255, 1] = 0
|
||||
gray = cv2.cvtColor(hsv, cv2.COLOR_BGR2GRAY)
|
||||
threshold = cv2.adaptiveThreshold(
|
||||
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c
|
||||
)
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ def read_pdf(
|
|||
to generate columns.
|
||||
process_background* : bool, optional (default: False)
|
||||
Process background lines.
|
||||
process_color_background* : bool, optional (default: False)
|
||||
Increase contrast for better background line processing.
|
||||
line_scale* : int, optional (default: 15)
|
||||
Line size scaling factor. The larger the value the smaller
|
||||
the detected lines. Making it very large will lead to text
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ class Lattice(BaseParser):
|
|||
table_regions=None,
|
||||
table_areas=None,
|
||||
process_background=False,
|
||||
process_color_background=False,
|
||||
line_scale=15,
|
||||
copy_text=None,
|
||||
shift_text=["l", "t"],
|
||||
|
|
@ -116,6 +117,7 @@ class Lattice(BaseParser):
|
|||
self.table_regions = table_regions
|
||||
self.table_areas = table_areas
|
||||
self.process_background = process_background
|
||||
self.process_color_background = process_color_background
|
||||
self.line_scale = line_scale
|
||||
self.copy_text = copy_text
|
||||
self.shift_text = shift_text
|
||||
|
|
@ -236,6 +238,7 @@ class Lattice(BaseParser):
|
|||
self.image, self.threshold = adaptive_threshold(
|
||||
self.imagename,
|
||||
process_background=self.process_background,
|
||||
process_color_background=self.process_color_background,
|
||||
blocksize=self.threshold_blocksize,
|
||||
c=self.threshold_constant,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ def download_url(url):
|
|||
stream_kwargs = ["columns", "edge_tol", "row_tol", "column_tol"]
|
||||
lattice_kwargs = [
|
||||
"process_background",
|
||||
"process_color_background",
|
||||
"line_scale",
|
||||
"copy_text",
|
||||
"shift_text",
|
||||
|
|
|
|||
Loading…
Reference in New Issue