Add resolution

2018-12-20 15:01:29 +05:30 · 2018-12-20 15:01:29 +05:30 · 3f5af18738
parent e0090fbb0a
commit 3f5af18738
4 changed files with 13 additions and 9 deletions
--- a/camelot/cli.py
+++ b/camelot/cli.py
@ -84,6 +84,8 @@ def cli(ctx, *args, **kwargs):
              ' may be zero or negative as well.')
@click.option('-I', '--iterations', default=0,
              help='Number of times for erosion/dilation will be applied.')
@click.option('-res', '--resolution', default=300,
              help='Resolution used for PDF to PNG conversion.')
@click.option('-plot', '--plot_type',
              type=click.Choice(['text', 'grid', 'contour', 'joint', 'line']),
              help='Plot elements found on PDF page for visual debugging.')
--- a/camelot/core.py
+++ b/camelot/core.py
@ -72,7 +72,8 @@ class TextEdges(object):
    the PDF page. The dict has three keys based on the alignments,
    and each key's value is a list of camelot.core.TextEdge objects.
    """
-    def __init__(self):
+    def __init__(self, edge_close_tol=50):
        self.edge_close_tol = edge_close_tol
        self._textedges = {'left': [], 'right': [], 'middle': []}
    @staticmethod
@ -104,7 +105,7 @@ class TextEdges(object):
        te = TextEdge(x, y0, y1, align=align)
        self._textedges[align].append(te)
-    def update(self, textline, edge_close_tol=50):
+    def update(self, textline):
        """Updates an existing text edge in the current dict.
        """
        for align in ['left', 'right', 'middle']:
@ -114,15 +115,15 @@ class TextEdges(object):
                self.add(textline, align)
            else:
                self._textedges[align][idx].update_coords(
-                    x_coord, textline.y0, edge_close_tol=edge_close_tol)
+                    x_coord, textline.y0, edge_close_tol=self.edge_close_tol)
-    def generate(self, textlines, edge_close_tol=50):
+    def generate(self, textlines):
        """Generates the text edges dict based on horizontal text
        rows.
        """
        for tl in textlines:
            if len(tl.get_text().strip()) > 1: # TODO: hacky
-                self.update(tl, edge_close_tol=edge_close_tol)
+                self.update(tl)
    def get_relevant(self):
        """Returns the list of relevant text edges (all share the same
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -76,7 +76,7 @@ class Lattice(BaseParser):
                 line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
                 split_text=False, flag_size=False, line_close_tol=2,
                 joint_close_tol=2, threshold_blocksize=15, threshold_constant=-2,
-                 iterations=0, **kwargs):
+                 iterations=0, resolution=300, **kwargs):
        self.table_areas = table_areas
        self.process_background = process_background
        self.line_size_scaling = line_size_scaling
@ -89,6 +89,7 @@ class Lattice(BaseParser):
        self.threshold_blocksize = threshold_blocksize
        self.threshold_constant = threshold_constant
        self.iterations = iterations
        self.resolution = resolution
    @staticmethod
    def _reduce_index(t, idx, shift_text):
@ -209,7 +210,7 @@ class Lattice(BaseParser):
            '-sDEVICE=png16m',
            '-o',
            self.imagename,
-            '-r600',
+            '-r{}'.format(self.resolution),
            self.filename
        ]
        gs = get_executable()
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -255,9 +255,9 @@ class Stream(BaseParser):
        # TODO: add support for arabic text #141
        # sort textlines in reading order
        textlines.sort(key=lambda x: (-x.y0, x.x0))
-        textedges = TextEdges()
+        textedges = TextEdges(edge_close_tol=self.edge_close_tol)
        # generate left, middle and right textedges
-        textedges.generate(textlines, edge_close_tol=self.edge_close_tol)
+        textedges.generate(textlines)
        # select relevant edges
        relevant_textedges = textedges.get_relevant()
        self.textedges.extend(relevant_textedges)