From 49d3f0f3aa1547d66eccb33ef0e4e0ebaad78d21 Mon Sep 17 00:00:00 2001
From: Frh <francois.huet+github@gmail.com>
Date: Fri, 10 Apr 2020 16:34:30 -0700
Subject: [PATCH] Rename table_bbox (singular) to table_areas

The object is an index of bounding boxes, in some cases given by users.
It's called areas in one section of the code making it systematic.
---
 camelot/parsers/lattice.py | 22 +++++++++++-----------
 camelot/parsers/stream.py  | 28 ++++++++++++++--------------
 camelot/plotting.py        | 10 +++++-----
 tests/test_common.py       |  1 +
 4 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
index a96f8df..eefd443 100644
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@@ -207,7 +207,7 @@ class Lattice(BaseParser):
                                 t.cells[i][j].text = t.cells[i - 1][j].text
         return t
 
-    def _generate_table_bbox(self):
+    def _generate_table_areas(self):
         def scale_areas(areas):
             scaled_areas = []
             for area in areas:
@@ -258,7 +258,7 @@ class Lattice(BaseParser):
             )
 
             contours = find_contours(vertical_mask, horizontal_mask)
-            table_bbox = find_joints(contours, vertical_mask, horizontal_mask)
+            table_areas = find_joints(contours, vertical_mask, horizontal_mask)
         else:
             vertical_mask, vertical_segments = find_lines(
                 self.threshold,
@@ -274,20 +274,20 @@ class Lattice(BaseParser):
             )
 
             areas = scale_areas(self.table_areas)
-            table_bbox = find_joints(areas, vertical_mask, horizontal_mask)
+            table_areas = find_joints(areas, vertical_mask, horizontal_mask)
 
-        self.table_bbox_unscaled = copy.deepcopy(table_bbox)
+        self.table_areas_unscaled = copy.deepcopy(table_areas)
 
         [
-            self.table_bbox,
+            self.table_areas,
             self.vertical_segments,
             self.horizontal_segments
         ] = scale_image(
-            table_bbox, vertical_segments, horizontal_segments, pdf_scalers
+            table_areas, vertical_segments, horizontal_segments, pdf_scalers
         )
 
     def _generate_columns_and_rows(self, table_idx, tk):
-        # select elements which lie within table_bbox
+        # select elements which lie within table_areas
         t_bbox = {}
         v_s, h_s = segments_in_bbox(
             tk, self.vertical_segments, self.horizontal_segments
@@ -300,7 +300,7 @@ class Lattice(BaseParser):
 
         self.t_bbox = t_bbox
 
-        cols, rows = zip(*self.table_bbox[tk])
+        cols, rows = zip(*self.table_areas[tk])
         cols, rows = list(cols), list(rows)
         cols.extend([tk[0], tk[2]])
         rows.extend([tk[1], tk[3]])
@@ -366,7 +366,7 @@ class Lattice(BaseParser):
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
         table._text = _text
-        table._image = (self.pdf_image, self.table_bbox_unscaled)
+        table._image = (self.pdf_image, self.table_areas_unscaled)
         table._segments = (self.vertical_segments, self.horizontal_segments)
         table._textedges = None
 
@@ -391,12 +391,12 @@ class Lattice(BaseParser):
             return []
 
         self._generate_image_file()
-        self._generate_table_bbox()
+        self._generate_table_areas()
 
         _tables = []
         # sort tables based on y-coord
         for table_idx, tk in enumerate(
-            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+            sorted(self.table_areas.keys(), key=lambda x: x[1], reverse=True)
         ):
             cols, rows, v_s, h_s = self._generate_columns_and_rows(
                 table_idx, tk)
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 0d393f3..ae236a1 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -299,14 +299,14 @@ class Stream(BaseParser):
         relevant_textedges = textedges.get_relevant()
         self.textedges.extend(relevant_textedges)
         # guess table areas using textlines and relevant edges
-        table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
+        table_areas = textedges.get_table_areas(textlines, relevant_textedges)
         # treat whole page as table area if no table areas found
-        if not table_bbox:
-            table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
+        if not table_areas:
+            table_areas = {(0, 0, self.pdf_width, self.pdf_height): None}
 
-        return table_bbox
+        return table_areas
 
-    def _generate_table_bbox(self):
+    def _generate_table_areas(self):
         self.textedges = []
         if self.table_areas is None:
             all_text_segments = self.horizontal_text + self.vertical_text
@@ -325,20 +325,20 @@ class Stream(BaseParser):
                         (x1, y2, x2, y1), all_text_segments)
                     text_segments.extend(region_text)
             # find tables based on nurminen's detection algorithm
-            table_bbox = self._nurminen_table_detection(text_segments)
+            table_areas = self._nurminen_table_detection(text_segments)
         else:
-            table_bbox = {}
+            table_areas = {}
             for area in self.table_areas:
                 x1, y1, x2, y2 = area.split(",")
                 x1 = float(x1)
                 y1 = float(y1)
                 x2 = float(x2)
                 y2 = float(y2)
-                table_bbox[(x1, y2, x2, y1)] = None
-        self.table_bbox = table_bbox
+                table_areas[(x1, y2, x2, y1)] = None
+        self.table_areas = table_areas
 
     def _generate_columns_and_rows(self, table_idx, tk):
-        # select elements which lie within table_bbox
+        # select elements which lie within table_areas
         t_bbox = {}
         t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
         t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
@@ -464,7 +464,7 @@ class Stream(BaseParser):
         _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
         table._text = _text
         self.generate_image()
-        table._image = (self.pdf_image, self.table_bbox)
+        table._image = (self.pdf_image, self.table_areas)
         table._segments = None
         table._textedges = self.textedges
 
@@ -492,13 +492,13 @@ class Stream(BaseParser):
             return []
 
         # Identify plausible areas within the doc where tables lie,
-        # populate table_bbox keys with these areas.
-        self._generate_table_bbox()
+        # populate table_areas keys with these areas.
+        self._generate_table_areas()
 
         _tables = []
         # sort tables based on y-coord
         for table_idx, tk in enumerate(
-            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+            sorted(self.table_areas.keys(), key=lambda x: x[1], reverse=True)
         ):
             cols, rows = self._generate_columns_and_rows(table_idx, tk)
             table = self._generate_table(table_idx, cols, rows)
diff --git a/camelot/plotting.py b/camelot/plotting.py
index 75d4449..07ba0a6 100644
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@@ -121,7 +121,7 @@ class PlotMethods(object):
         fig : matplotlib.fig.Figure
 
         """
-        img, table_bbox = table._image
+        img, table_areas = table._image
         _FOR_LATTICE = table.flavor == "lattice"
         fig = plt.figure()
         ax = fig.add_subplot(111, aspect="equal")
@@ -137,7 +137,7 @@ class PlotMethods(object):
                     )
                 )
 
-        for t in table_bbox.keys():
+        for t in table_areas.keys():
             ax.add_patch(
                 patches.Rectangle(
                     (t[0], t[1]), t[2] - t[0], t[3] - t[1],
@@ -204,13 +204,13 @@ class PlotMethods(object):
         fig : matplotlib.fig.Figure
 
         """
-        img, table_bbox = table._image
+        img, table_areas = table._image
         fig = plt.figure()
         ax = fig.add_subplot(111, aspect="equal")
         x_coord = []
         y_coord = []
-        for k in table_bbox.keys():
-            for coord in table_bbox[k]:
+        for k in table_areas.keys():
+            for coord in table_areas[k]:
                 x_coord.append(coord[0])
                 y_coord.append(coord[1])
         ax.plot(x_coord, y_coord, "ro")
diff --git a/tests/test_common.py b/tests/test_common.py
index 20941e8..ac20d0b 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -192,6 +192,7 @@ def test_stream_vertical_header():
 
     filename = os.path.join(testdir, "vertical_header.pdf")
     tables = camelot.read_pdf(filename, flavor="stream")
+    assert len(tables) == 1
     assert_frame_equal(df, tables[0].df)