Add resolution

pull/2/head
Vinayak Mehta 2018-12-20 15:01:29 +05:30
parent e0090fbb0a
commit 3f5af18738
4 changed files with 13 additions and 9 deletions

View File

@ -84,6 +84,8 @@ def cli(ctx, *args, **kwargs):
' may be zero or negative as well.') ' may be zero or negative as well.')
@click.option('-I', '--iterations', default=0, @click.option('-I', '--iterations', default=0,
help='Number of times for erosion/dilation will be applied.') help='Number of times for erosion/dilation will be applied.')
@click.option('-res', '--resolution', default=300,
help='Resolution used for PDF to PNG conversion.')
@click.option('-plot', '--plot_type', @click.option('-plot', '--plot_type',
type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']), type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']),
help='Plot elements found on PDF page for visual debugging.') help='Plot elements found on PDF page for visual debugging.')

View File

@ -72,7 +72,8 @@ class TextEdges(object):
the PDF page. The dict has three keys based on the alignments, the PDF page. The dict has three keys based on the alignments,
and each key's value is a list of camelot.core.TextEdge objects. and each key's value is a list of camelot.core.TextEdge objects.
""" """
def __init__(self): def __init__(self, edge_close_tol=50):
self.edge_close_tol = edge_close_tol
self._textedges = {'left': [], 'right': [], 'middle': []} self._textedges = {'left': [], 'right': [], 'middle': []}
@staticmethod @staticmethod
@ -104,7 +105,7 @@ class TextEdges(object):
te = TextEdge(x, y0, y1, align=align) te = TextEdge(x, y0, y1, align=align)
self._textedges[align].append(te) self._textedges[align].append(te)
def update(self, textline, edge_close_tol=50): def update(self, textline):
"""Updates an existing text edge in the current dict. """Updates an existing text edge in the current dict.
""" """
for align in ['left', 'right', 'middle']: for align in ['left', 'right', 'middle']:
@ -114,15 +115,15 @@ class TextEdges(object):
self.add(textline, align) self.add(textline, align)
else: else:
self._textedges[align][idx].update_coords( self._textedges[align][idx].update_coords(
x_coord, textline.y0, edge_close_tol=edge_close_tol) x_coord, textline.y0, edge_close_tol=self.edge_close_tol)
def generate(self, textlines, edge_close_tol=50): def generate(self, textlines):
"""Generates the text edges dict based on horizontal text """Generates the text edges dict based on horizontal text
rows. rows.
""" """
for tl in textlines: for tl in textlines:
if len(tl.get_text().strip()) > 1: # TODO: hacky if len(tl.get_text().strip()) > 1: # TODO: hacky
self.update(tl, edge_close_tol=edge_close_tol) self.update(tl)
def get_relevant(self): def get_relevant(self):
"""Returns the list of relevant text edges (all share the same """Returns the list of relevant text edges (all share the same

View File

@ -76,7 +76,7 @@ class Lattice(BaseParser):
line_size_scaling=15, copy_text=None, shift_text=['l', 't'], line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
split_text=False, flag_size=False, line_close_tol=2, split_text=False, flag_size=False, line_close_tol=2,
joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2, joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
iterations=0, **kwargs): iterations=0, resolution=300, **kwargs):
self.table_areas = table_areas self.table_areas = table_areas
self.process_background = process_background self.process_background = process_background
self.line_size_scaling = line_size_scaling self.line_size_scaling = line_size_scaling
@ -89,6 +89,7 @@ class Lattice(BaseParser):
self.threshold_blocksize = threshold_blocksize self.threshold_blocksize = threshold_blocksize
self.threshold_constant = threshold_constant self.threshold_constant = threshold_constant
self.iterations = iterations self.iterations = iterations
self.resolution = resolution
@staticmethod @staticmethod
def _reduce_index(t, idx, shift_text): def _reduce_index(t, idx, shift_text):
@ -209,7 +210,7 @@ class Lattice(BaseParser):
'-sDEVICE=png16m', '-sDEVICE=png16m',
'-o', '-o',
self.imagename, self.imagename,
'-r600', '-r{}'.format(self.resolution),
self.filename self.filename
] ]
gs = get_executable() gs = get_executable()

View File

@ -255,9 +255,9 @@ class Stream(BaseParser):
# TODO: add support for arabic text #141 # TODO: add support for arabic text #141
# sort textlines in reading order # sort textlines in reading order
textlines.sort(key=lambda x: (-x.y0, x.x0)) textlines.sort(key=lambda x: (-x.y0, x.x0))
textedges = TextEdges() textedges = TextEdges(edge_close_tol=self.edge_close_tol)
# generate left, middle and right textedges # generate left, middle and right textedges
textedges.generate(textlines, edge_close_tol=self.edge_close_tol) textedges.generate(textlines)
# select relevant edges # select relevant edges
relevant_textedges = textedges.get_relevant() relevant_textedges = textedges.get_relevant()
self.textedges.extend(relevant_textedges) self.textedges.extend(relevant_textedges)