From 04383920b41bbdf461a4868df8054ebb477ab667 Mon Sep 17 00:00:00 2001
From: Vinayak Mehta <vmehta94@gmail.com>
Date: Sat, 8 Sep 2018 05:38:43 +0530
Subject: [PATCH] Rename parser keyword arguments

---
 camelot/core.py             | 16 +++++-----
 camelot/image_processing.py | 26 ++++++++--------
 camelot/parsers/lattice.py  | 59 ++++++++++++++++++++-----------------
 camelot/parsers/stream.py   | 36 +++++++++++-----------
 camelot/utils.py            | 12 ++++----
 5 files changed, 77 insertions(+), 72 deletions(-)

diff --git a/camelot/core.py b/camelot/core.py
index 015c533..e3f9bb3 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -139,14 +139,14 @@ class Table(object):
                 cell.left = cell.right = cell.top = cell.bottom = True
         return self
 
-    def set_edges(self, vertical, horizontal, jtol=2):
+    def set_edges(self, vertical, horizontal, joint_close_tol=2):
         """
 
         Parameters
         ----------
         vertical
         horizontal
-        jtol
+        joint_close_tol
 
         Returns
         -------
@@ -156,11 +156,11 @@ class Table(object):
             # find closest x coord
             # iterate over y coords and find closest start and end points
             i = [i for i, t in enumerate(self.cols)
-                 if np.isclose(v[0], t[0], atol=jtol)]
+                 if np.isclose(v[0], t[0], atol=joint_close_tol)]
             j = [j for j, t in enumerate(self.rows)
-                 if np.isclose(v[3], t[0], atol=jtol)]
+                 if np.isclose(v[3], t[0], atol=joint_close_tol)]
             k = [k for k, t in enumerate(self.rows)
-                 if np.isclose(v[1], t[0], atol=jtol)]
+                 if np.isclose(v[1], t[0], atol=joint_close_tol)]
             if not j:
                 continue
             J = j[0]
@@ -207,11 +207,11 @@ class Table(object):
             # find closest y coord
             # iterate over x coords and find closest start and end points
             i = [i for i, t in enumerate(self.rows)
-                 if np.isclose(h[1], t[0], atol=jtol)]
+                 if np.isclose(h[1], t[0], atol=joint_close_tol)]
             j = [j for j, t in enumerate(self.cols)
-                 if np.isclose(h[0], t[0], atol=jtol)]
+                 if np.isclose(h[0], t[0], atol=joint_close_tol)]
             k = [k for k, t in enumerate(self.cols)
-                 if np.isclose(h[2], t[0], atol=jtol)]
+                 if np.isclose(h[2], t[0], atol=joint_close_tol)]
             if not j:
                 continue
             J = j[0]
diff --git a/camelot/image_processing.py b/camelot/image_processing.py
index a1526ef..bdd82fb 100644
--- a/camelot/image_processing.py
+++ b/camelot/image_processing.py
@@ -7,13 +7,13 @@ import numpy as np
 from .utils import merge_tuples
 
 
-def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
+def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
     """
 
     Parameters
     ----------
     imagename
-    invert
+    process_background
     blocksize
     c
 
@@ -24,7 +24,7 @@ def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
     img = cv2.imread(imagename)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
-    if invert:
+    if process_background:
         threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
             cv2.THRESH_BINARY, blocksize, c)
     else:
@@ -33,14 +33,14 @@ def adaptive_threshold(imagename, invert=False, blocksize=15, c=-2):
     return img, threshold
 
 
-def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
+def find_lines(threshold, direction='horizontal', line_size_scaling=15, iterations=0):
     """
 
     Parameters
     ----------
     threshold
     direction
-    scale
+    line_size_scaling
     iterations
 
     Returns
@@ -50,10 +50,10 @@ def find_lines(threshold, direction='horizontal', scale=15, iterations=0):
     lines = []
 
     if direction == 'vertical':
-        size = threshold.shape[0] // scale
+        size = threshold.shape[0] // line_size_scaling
         el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
     elif direction == 'horizontal':
-        size = threshold.shape[1] // scale
+        size = threshold.shape[1] // line_size_scaling
         el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
     elif direction is None:
         raise ValueError("Specify direction as either 'vertical' or"
@@ -148,19 +148,19 @@ def find_table_joints(contours, vertical, horizontal):
     return tables
 
 
-def remove_lines(threshold, line_scale=15):
+def remove_lines(threshold, line_size_scaling=15):
     """
 
     Parameters
     ----------
     threshold
-    line_scale
+    line_size_scaling
 
     Returns
     -------
 
     """
-    size = threshold.shape[0] // line_scale
+    size = threshold.shape[0] // line_size_scaling
     vertical_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
     horizontal_erode_el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
     dilate_el = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
@@ -176,19 +176,19 @@ def remove_lines(threshold, line_scale=15):
     return threshold
 
 
-def find_cuts(threshold, char_scale=200):
+def find_cuts(threshold, char_size_scaling=200):
     """
 
     Parameters
     ----------
     threshold
-    char_scale
+    char_size_scaling
 
     Returns
     -------
 
     """
-    size = threshold.shape[0] // char_scale
+    size = threshold.shape[0] // char_size_scaling
     char_el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
 
     threshold = cv2.erode(threshold, char_el)
diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index bf4fca3..ba79230 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -10,7 +10,7 @@ import pandas as pd
 from .base import BaseParser
 from ..core import Table
 from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
-                     merge_close_values, get_table_index, compute_accuracy,
+                     merge_close_lines, get_table_index, compute_accuracy,
                      count_empty_strings, encode_, setup_logging)
 from ..image_processing import (adaptive_threshold, find_lines,
                                 find_table_contours, find_table_joints)
@@ -23,23 +23,24 @@ class Lattice(BaseParser):
     """
 
     """
-    def __init__(self, table_area=None, fill=None, mtol=2, jtol=2,
-                 blocksize=15, threshold_constant=-2, scale=15, iterations=0,
-                 invert=False, margins=(1.0, 0.5, 0.1), split_text=False,
-                 flag_size=True, shift_text=['l', 't'], debug=None):
+    def __init__(self, table_area=None, process_background=False,
+                 line_size_scaling=15, copy_text=None, shift_text=['l', 't'],
+                 split_text=False, flag_size=True, line_close_tol=2,
+                 joint_close_tol=2, blocksize=15, threshold_constant=-2,
+                 iterations=0, margins=(1.0, 0.5, 0.1), debug=None):
         self.table_area = table_area
-        self.fill = fill
-        self.mtol = mtol
-        self.jtol = jtol
-        self.blocksize = blocksize
-        self.threshold_constant = threshold_constant
-        self.scale = scale
-        self.iterations = iterations
-        self.invert = invert
-        self.char_margin, self.line_margin, self.word_margin = margins
+        self.process_background = process_background
+        self.line_size_scaling = line_size_scaling
+        self.copy_text = copy_text
+        self.shift_text = shift_text
         self.split_text = split_text
         self.flag_size = flag_size
-        self.shift_text = shift_text
+        self.line_close_tol = line_close_tol
+        self.joint_close_tol = joint_close_tol
+        self.blocksize = blocksize
+        self.threshold_constant = threshold_constant
+        self.iterations = iterations
+        self.char_margin, self.line_margin, self.word_margin = margins
         self.debug = debug
 
     @staticmethod
@@ -67,8 +68,8 @@ class Lattice(BaseParser):
         return indices
 
     @staticmethod
-    def _fill_spanning(t, fill=None):
-        for f in fill:
+    def _copy_spanning_text(t, copy_text=None):
+        for f in copy_text:
             if f == "h":
                 for i in range(len(t.cells)):
                     for j in range(len(t.cells[i])):
@@ -96,7 +97,7 @@ class Lattice(BaseParser):
             stderr=subprocess.STDOUT)
 
     def _generate_table_bbox(self):
-        self.image, self.threshold = adaptive_threshold(self.imagename, invert=self.invert,
+        self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
             blocksize=self.blocksize, c=self.threshold_constant)
         image_width = self.image.shape[1]
         image_height = self.image.shape[0]
@@ -107,10 +108,12 @@ class Lattice(BaseParser):
         image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
         pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
 
-        vertical_mask, vertical_segments = find_lines(self.threshold,
-            direction='vertical', scale=self.scale, iterations=self.iterations)
-        horizontal_mask, horizontal_segments = find_lines(self.threshold,
-            direction='horizontal', scale=self.scale, iterations=self.iterations)
+        vertical_mask, vertical_segments = find_lines(
+            self.threshold, direction='vertical',
+            line_size_scaling=self.line_size_scaling, iterations=self.iterations)
+        horizontal_mask, horizontal_segments = find_lines(
+            self.threshold, direction='horizontal',
+            line_size_scaling=self.line_size_scaling, iterations=self.iterations)
 
         if self.table_area is not None:
             areas = []
@@ -149,8 +152,10 @@ class Lattice(BaseParser):
         cols.extend([tk[0], tk[2]])
         rows.extend([tk[1], tk[3]])
         # sort horizontal and vertical segments
-        cols = merge_close_values(sorted(cols), mtol=self.mtol)
-        rows = merge_close_values(sorted(rows, reverse=True), mtol=self.mtol)
+        cols = merge_close_lines(
+            sorted(cols), line_close_tol=self.line_close_tol)
+        rows = merge_close_lines(
+            sorted(rows, reverse=True), line_close_tol=self.line_close_tol)
         # make grid using x and y coord of shortlisted rows and cols
         cols = [(cols[i], cols[i + 1])
                 for i in range(0, len(cols) - 1)]
@@ -167,7 +172,7 @@ class Lattice(BaseParser):
 
         table = Table(cols, rows)
         # set table edges to True using ver+hor lines
-        table = table.set_edges(v_s, h_s, jtol=self.jtol)
+        table = table.set_edges(v_s, h_s, joint_close_tol=self.joint_close_tol)
         # set spanning cells to True
         table = table.set_span()
         # set table border edges to True
@@ -186,8 +191,8 @@ class Lattice(BaseParser):
                         table.cells[r_idx][c_idx].text = text
         accuracy = compute_accuracy([[100, pos_errors]])
 
-        if self.fill is not None:
-            table = Lattice._fill_spanning(table, fill=self.fill)
+        if self.copy_text is not None:
+            table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
 
         data = table.data
         data = encode_(data)
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 1976505..12f4b6b 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -18,17 +18,17 @@ class Stream(BaseParser):
     """
 
     """
-    def __init__(self, table_area=None, columns=None, ytol=2, mtol=0,
-                 margins=(1.0, 0.5, 0.1), split_text=False, flag_size=True,
-                 debug=None):
+    def __init__(self, table_area=None, columns=None, split_text=False,
+                 flag_size=True, row_close_tol=2, col_close_tol=0,
+                 margins=(1.0, 0.5, 0.1), debug=None):
         self.table_area = table_area
         self.columns = columns
         self._validate_columns()
-        self.ytol = ytol
-        self.mtol = mtol
-        self.char_margin, self.line_margin, self.word_margin = margins
         self.split_text = split_text
         self.flag_size = flag_size
+        self.row_close_tol = row_close_tol
+        self.col_close_tol = col_close_tol
+        self.char_margin, self.line_margin, self.word_margin = margins
         self.debug = debug
 
     @staticmethod
@@ -41,7 +41,7 @@ class Stream(BaseParser):
         return text_bbox
 
     @staticmethod
-    def _group_rows(text, ytol=2):
+    def _group_rows(text, row_close_tol=2):
         row_y = 0
         rows = []
         temp = []
@@ -50,7 +50,7 @@ class Stream(BaseParser):
             # if t.get_text().strip() and all([obj.upright for obj in t._objs if
             # type(obj) is LTChar]):
             if t.get_text().strip():
-                if not np.isclose(row_y, t.y0, atol=ytol):
+                if not np.isclose(row_y, t.y0, atol=row_close_tol):
                     rows.append(sorted(temp, key=lambda t: t.x0))
                     temp = []
                     row_y = t.y0
@@ -60,24 +60,24 @@ class Stream(BaseParser):
         return rows
 
     @staticmethod
-    def _merge_columns(l, mtol=0):
+    def _merge_columns(l, col_close_tol=0):
         merged = []
         for higher in l:
             if not merged:
                 merged.append(higher)
             else:
                 lower = merged[-1]
-                if mtol >= 0:
+                if col_close_tol >= 0:
                     if (higher[0] <= lower[1] or
-                            np.isclose(higher[0], lower[1], atol=mtol)):
+                            np.isclose(higher[0], lower[1], atol=col_close_tol)):
                         upper_bound = max(lower[1], higher[1])
                         lower_bound = min(lower[0], higher[0])
                         merged[-1] = (lower_bound, upper_bound)
                     else:
                         merged.append(higher)
-                elif mtol < 0:
+                elif col_close_tol < 0:
                     if higher[0] <= lower[1]:
-                        if np.isclose(higher[0], lower[1], atol=abs(mtol)):
+                        if np.isclose(higher[0], lower[1], atol=abs(col_close_tol)):
                             merged.append(higher)
                         else:
                             upper_bound = max(lower[1], higher[1])
@@ -99,9 +99,9 @@ class Stream(BaseParser):
         return rows
 
     @staticmethod
-    def _add_columns(cols, text, ytol):
+    def _add_columns(cols, text, row_close_tol):
         if text:
-            text = Stream._group_rows(text, ytol=ytol)
+            text = Stream._group_rows(text, row_close_tol=row_close_tol)
             elements = [len(r) for r in text]
             new_cols = [(t.x0, t.x1)
                 for r in text if len(r) == max(elements) for t in r]
@@ -149,7 +149,7 @@ class Stream(BaseParser):
             self.t_bbox[direction].sort(key=lambda x: (-x.y0, x.x0))
 
         text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
-        rows_grouped = self._group_rows(self.t_bbox['horizontal'], ytol=self.ytol)
+        rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol)
         rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
         elements = [len(r) for r in rows_grouped]
 
@@ -170,7 +170,7 @@ class Stream(BaseParser):
                     os.path.basename(self.rootname)))
             cols = [(t.x0, t.x1)
                 for r in rows_grouped if len(r) == ncols for t in r]
-            cols = self._merge_columns(sorted(cols), mtol=self.mtol)
+            cols = self._merge_columns(sorted(cols), col_close_tol=self.col_close_tol)
             inner_text = []
             for i in range(1, len(cols)):
                 left = cols[i - 1][1]
@@ -182,7 +182,7 @@ class Stream(BaseParser):
                             for t in self.t_bbox[direction]
                             if t.x0 > cols[-1][1] or t.x1 < cols[0][0]]
             inner_text.extend(outer_text)
-            cols = self._add_columns(cols, inner_text, self.ytol)
+            cols = self._add_columns(cols, inner_text, self.row_close_tol)
             cols = self._join_columns(cols, text_x_min, text_x_max)
 
         return cols, rows
diff --git a/camelot/utils.py b/camelot/utils.py
index c957a4e..d132b5a 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -236,13 +236,13 @@ def text_in_bbox(bbox, text):
     return t_bbox
 
 
-def remove_close_values(ar, mtol=2):
+def remove_close_lines(ar, line_close_tol=2):
     """
 
     Parameters
     ----------
     ar
-    mtol
+    line_close_tol
 
     Returns
     -------
@@ -254,20 +254,20 @@ def remove_close_values(ar, mtol=2):
             ret.append(a)
         else:
             temp = ret[-1]
-            if np.isclose(temp, a, atol=mtol):
+            if np.isclose(temp, a, atol=line_close_tol):
                 pass
             else:
                 ret.append(a)
     return ret
 
 
-def merge_close_values(ar, mtol=2):
+def merge_close_lines(ar, line_close_tol=2):
     """
 
     Parameters
     ----------
     ar
-    mtol
+    line_close_tol
 
     Returns
     -------
@@ -279,7 +279,7 @@ def merge_close_values(ar, mtol=2):
             ret.append(a)
         else:
             temp = ret[-1]
-            if np.isclose(temp, a, atol=mtol):
+            if np.isclose(temp, a, atol=line_close_tol):
                 temp = (temp + a) / 2.0
                 ret[-1] = temp
             else: