Add resolution
parent
e0090fbb0a
commit
3f5af18738
|
|
@ -84,6 +84,8 @@ def cli(ctx, *args, **kwargs):
|
|||
' may be zero or negative as well.')
|
||||
@click.option('-I', '--iterations', default=0,
|
||||
help='Number of times for erosion/dilation will be applied.')
|
||||
@click.option('-res', '--resolution', default=300,
|
||||
help='Resolution used for PDF to PNG conversion.')
|
||||
@click.option('-plot', '--plot_type',
|
||||
type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']),
|
||||
help='Plot elements found on PDF page for visual debugging.')
|
||||
|
|
|
|||
|
|
@ -72,7 +72,8 @@ class TextEdges(object):
|
|||
the PDF page. The dict has three keys based on the alignments,
|
||||
and each key's value is a list of camelot.core.TextEdge objects.
|
||||
"""
|
||||
def __init__(self):
|
||||
def __init__(self, edge_close_tol=50):
|
||||
self.edge_close_tol = edge_close_tol
|
||||
self._textedges = {'left': [], 'right': [], 'middle': []}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -104,7 +105,7 @@ class TextEdges(object):
|
|||
te = TextEdge(x, y0, y1, align=align)
|
||||
self._textedges[align].append(te)
|
||||
|
||||
def update(self, textline, edge_close_tol=50):
|
||||
def update(self, textline):
|
||||
"""Updates an existing text edge in the current dict.
|
||||
"""
|
||||
for align in ['left', 'right', 'middle']:
|
||||
|
|
@ -114,15 +115,15 @@ class TextEdges(object):
|
|||
self.add(textline, align)
|
||||
else:
|
||||
self._textedges[align][idx].update_coords(
|
||||
x_coord, textline.y0, edge_close_tol=edge_close_tol)
|
||||
x_coord, textline.y0, edge_close_tol=self.edge_close_tol)
|
||||
|
||||
def generate(self, textlines, edge_close_tol=50):
|
||||
def generate(self, textlines):
|
||||
"""Generates the text edges dict based on horizontal text
|
||||
rows.
|
||||
"""
|
||||
for tl in textlines:
|
||||
if len(tl.get_text().strip()) > 1: # TODO: hacky
|
||||
self.update(tl, edge_close_tol=edge_close_tol)
|
||||
self.update(tl)
|
||||
|
||||
def get_relevant(self):
|
||||
"""Returns the list of relevant text edges (all share the same
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class Lattice(BaseParser):
|
|||
line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
|
||||
split_text=False, flag_size=False, line_close_tol=2,
|
||||
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
|
||||
iterations=0, **kwargs):
|
||||
iterations=0, resolution=300, **kwargs):
|
||||
self.table_areas = table_areas
|
||||
self.process_background = process_background
|
||||
self.line_size_scaling = line_size_scaling
|
||||
|
|
@ -89,6 +89,7 @@ class Lattice(BaseParser):
|
|||
self.threshold_blocksize = threshold_blocksize
|
||||
self.threshold_constant = threshold_constant
|
||||
self.iterations = iterations
|
||||
self.resolution = resolution
|
||||
|
||||
@staticmethod
|
||||
def _reduce_index(t, idx, shift_text):
|
||||
|
|
@ -209,7 +210,7 @@ class Lattice(BaseParser):
|
|||
'-sDEVICE=png16m',
|
||||
'-o',
|
||||
self.imagename,
|
||||
'-r600',
|
||||
'-r{}'.format(self.resolution),
|
||||
self.filename
|
||||
]
|
||||
gs = get_executable()
|
||||
|
|
|
|||
|
|
@ -255,9 +255,9 @@ class Stream(BaseParser):
|
|||
# TODO: add support for arabic text #141
|
||||
# sort textlines in reading order
|
||||
textlines.sort(key=lambda x: (-x.y0, x.x0))
|
||||
textedges = TextEdges()
|
||||
textedges = TextEdges(edge_close_tol=self.edge_close_tol)
|
||||
# generate left, middle and right textedges
|
||||
textedges.generate(textlines, edge_close_tol=self.edge_close_tol)
|
||||
textedges.generate(textlines)
|
||||
# select relevant edges
|
||||
relevant_textedges = textedges.get_relevant()
|
||||
self.textedges.extend(relevant_textedges)
|
||||
|
|
|
|||
Loading…
Reference in New Issue