Add resolution
parent
e0090fbb0a
commit
3f5af18738
|
|
@ -84,6 +84,8 @@ def cli(ctx, *args, **kwargs):
|
||||||
' may be zero or negative as well.')
|
' may be zero or negative as well.')
|
||||||
@click.option('-I', '--iterations', default=0,
|
@click.option('-I', '--iterations', default=0,
|
||||||
help='Number of times for erosion/dilation will be applied.')
|
help='Number of times for erosion/dilation will be applied.')
|
||||||
|
@click.option('-res', '--resolution', default=300,
|
||||||
|
help='Resolution used for PDF to PNG conversion.')
|
||||||
@click.option('-plot', '--plot_type',
|
@click.option('-plot', '--plot_type',
|
||||||
type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']),
|
type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']),
|
||||||
help='Plot elements found on PDF page for visual debugging.')
|
help='Plot elements found on PDF page for visual debugging.')
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,8 @@ class TextEdges(object):
|
||||||
the PDF page. The dict has three keys based on the alignments,
|
the PDF page. The dict has three keys based on the alignments,
|
||||||
and each key's value is a list of camelot.core.TextEdge objects.
|
and each key's value is a list of camelot.core.TextEdge objects.
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self, edge_close_tol=50):
|
||||||
|
self.edge_close_tol = edge_close_tol
|
||||||
self._textedges = {'left': [], 'right': [], 'middle': []}
|
self._textedges = {'left': [], 'right': [], 'middle': []}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -104,7 +105,7 @@ class TextEdges(object):
|
||||||
te = TextEdge(x, y0, y1, align=align)
|
te = TextEdge(x, y0, y1, align=align)
|
||||||
self._textedges[align].append(te)
|
self._textedges[align].append(te)
|
||||||
|
|
||||||
def update(self, textline, edge_close_tol=50):
|
def update(self, textline):
|
||||||
"""Updates an existing text edge in the current dict.
|
"""Updates an existing text edge in the current dict.
|
||||||
"""
|
"""
|
||||||
for align in ['left', 'right', 'middle']:
|
for align in ['left', 'right', 'middle']:
|
||||||
|
|
@ -114,15 +115,15 @@ class TextEdges(object):
|
||||||
self.add(textline, align)
|
self.add(textline, align)
|
||||||
else:
|
else:
|
||||||
self._textedges[align][idx].update_coords(
|
self._textedges[align][idx].update_coords(
|
||||||
x_coord, textline.y0, edge_close_tol=edge_close_tol)
|
x_coord, textline.y0, edge_close_tol=self.edge_close_tol)
|
||||||
|
|
||||||
def generate(self, textlines, edge_close_tol=50):
|
def generate(self, textlines):
|
||||||
"""Generates the text edges dict based on horizontal text
|
"""Generates the text edges dict based on horizontal text
|
||||||
rows.
|
rows.
|
||||||
"""
|
"""
|
||||||
for tl in textlines:
|
for tl in textlines:
|
||||||
if len(tl.get_text().strip()) > 1: # TODO: hacky
|
if len(tl.get_text().strip()) > 1: # TODO: hacky
|
||||||
self.update(tl, edge_close_tol=edge_close_tol)
|
self.update(tl)
|
||||||
|
|
||||||
def get_relevant(self):
|
def get_relevant(self):
|
||||||
"""Returns the list of relevant text edges (all share the same
|
"""Returns the list of relevant text edges (all share the same
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ class Lattice(BaseParser):
|
||||||
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
||||||
split_text=False, flag_size=False, line_close_tol=2,
|
split_text=False, flag_size=False, line_close_tol=2,
|
||||||
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
||||||
iterations=0, **kwargs):
|
iterations=0, resolution=300, **kwargs):
|
||||||
self.table_areas = table_areas
|
self.table_areas = table_areas
|
||||||
self.process_background = process_background
|
self.process_background = process_background
|
||||||
self.line_size_scaling = line_size_scaling
|
self.line_size_scaling = line_size_scaling
|
||||||
|
|
@ -89,6 +89,7 @@ class Lattice(BaseParser):
|
||||||
self.threshold_blocksize = threshold_blocksize
|
self.threshold_blocksize = threshold_blocksize
|
||||||
self.threshold_constant = threshold_constant
|
self.threshold_constant = threshold_constant
|
||||||
self.iterations = iterations
|
self.iterations = iterations
|
||||||
|
self.resolution = resolution
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _reduce_index(t, idx, shift_text):
|
def _reduce_index(t, idx, shift_text):
|
||||||
|
|
@ -209,7 +210,7 @@ class Lattice(BaseParser):
|
||||||
'-sDEVICE=png16m',
|
'-sDEVICE=png16m',
|
||||||
'-o',
|
'-o',
|
||||||
self.imagename,
|
self.imagename,
|
||||||
'-r600',
|
'-r{}'.format(self.resolution),
|
||||||
self.filename
|
self.filename
|
||||||
]
|
]
|
||||||
gs = get_executable()
|
gs = get_executable()
|
||||||
|
|
|
||||||
|
|
@ -255,9 +255,9 @@ class Stream(BaseParser):
|
||||||
# TODO: add support for arabic text #141
|
# TODO: add support for arabic text #141
|
||||||
# sort textlines in reading order
|
# sort textlines in reading order
|
||||||
textlines.sort(key=lambda x: (-x.y0, x.x0))
|
textlines.sort(key=lambda x: (-x.y0, x.x0))
|
||||||
textedges = TextEdges()
|
textedges = TextEdges(edge_close_tol=self.edge_close_tol)
|
||||||
# generate left, middle and right textedges
|
# generate left, middle and right textedges
|
||||||
textedges.generate(textlines, edge_close_tol=self.edge_close_tol)
|
textedges.generate(textlines)
|
||||||
# select relevant edges
|
# select relevant edges
|
||||||
relevant_textedges = textedges.get_relevant()
|
relevant_textedges = textedges.get_relevant()
|
||||||
self.textedges.extend(relevant_textedges)
|
self.textedges.extend(relevant_textedges)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue