Rename table_bbox (singular) to table_areas

The object is an index of bounding boxes, in some cases given by users. It's called areas in one section of the code making it systematic.
2020-04-10 16:34:30 -07:00 · 2020-04-10 16:34:30 -07:00 · 49d3f0f3aa
parent 270c76a3e7
commit 49d3f0f3aa
4 changed files with 31 additions and 30 deletions
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -207,7 +207,7 @@ class Lattice(BaseParser):
                                t.cells[i][j].text = t.cells[i - 1][j].text
        return t

-    def _generate_table_bbox(self):
+    def _generate_table_areas(self):
        def scale_areas(areas):
            scaled_areas = []
            for area in areas:
@ -258,7 +258,7 @@ class Lattice(BaseParser):
            )

            contours = find_contours(vertical_mask, horizontal_mask)
-            table_bbox = find_joints(contours, vertical_mask, horizontal_mask)
+            table_areas = find_joints(contours, vertical_mask, horizontal_mask)
        else:
            vertical_mask, vertical_segments = find_lines(
                self.threshold,
@ -274,20 +274,20 @@ class Lattice(BaseParser):
            )

            areas = scale_areas(self.table_areas)
-            table_bbox = find_joints(areas, vertical_mask, horizontal_mask)
+            table_areas = find_joints(areas, vertical_mask, horizontal_mask)

-        self.table_bbox_unscaled = copy.deepcopy(table_bbox)
+        self.table_areas_unscaled = copy.deepcopy(table_areas)

        [
-            self.table_bbox,
+            self.table_areas,
            self.vertical_segments,
            self.horizontal_segments
        ] = scale_image(
-            table_bbox, vertical_segments, horizontal_segments, pdf_scalers
+            table_areas, vertical_segments, horizontal_segments, pdf_scalers
        )

    def _generate_columns_and_rows(self, table_idx, tk):
-        # select elements which lie within table_bbox
+        # select elements which lie within table_areas
        t_bbox = {}
        v_s, h_s = segments_in_bbox(
            tk, self.vertical_segments, self.horizontal_segments
@ -300,7 +300,7 @@ class Lattice(BaseParser):

        self.t_bbox = t_bbox

-        cols, rows = zip(*self.table_bbox[tk])
+        cols, rows = zip(*self.table_areas[tk])
        cols, rows = list(cols), list(rows)
        cols.extend([tk[0], tk[2]])
        rows.extend([tk[1], tk[3]])
@ -366,7 +366,7 @@ class Lattice(BaseParser):
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
        table._text = _text
-        table._image = (self.pdf_image, self.table_bbox_unscaled)
+        table._image = (self.pdf_image, self.table_areas_unscaled)
        table._segments = (self.vertical_segments, self.horizontal_segments)
        table._textedges = None

@ -391,12 +391,12 @@ class Lattice(BaseParser):
            return []

        self._generate_image_file()
-        self._generate_table_bbox()
+        self._generate_table_areas()

        _tables = []
        # sort tables based on y-coord
        for table_idx, tk in enumerate(
-            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+            sorted(self.table_areas.keys(), key=lambda x: x[1], reverse=True)
        ):
            cols, rows, v_s, h_s = self._generate_columns_and_rows(
                table_idx, tk)
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -299,14 +299,14 @@ class Stream(BaseParser):
        relevant_textedges = textedges.get_relevant()
        self.textedges.extend(relevant_textedges)
        # guess table areas using textlines and relevant edges
-        table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
+        table_areas = textedges.get_table_areas(textlines, relevant_textedges)
        # treat whole page as table area if no table areas found
-        if not table_bbox:
-            table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
+        if not table_areas:
+            table_areas = {(0, 0, self.pdf_width, self.pdf_height): None}

-        return table_bbox
+        return table_areas

-    def _generate_table_bbox(self):
+    def _generate_table_areas(self):
        self.textedges = []
        if self.table_areas is None:
            all_text_segments = self.horizontal_text + self.vertical_text
@ -325,20 +325,20 @@ class Stream(BaseParser):
                        (x1, y2, x2, y1), all_text_segments)
                    text_segments.extend(region_text)
            # find tables based on nurminen's detection algorithm
-            table_bbox = self._nurminen_table_detection(text_segments)
+            table_areas = self._nurminen_table_detection(text_segments)
        else:
-            table_bbox = {}
+            table_areas = {}
            for area in self.table_areas:
                x1, y1, x2, y2 = area.split(",")
                x1 = float(x1)
                y1 = float(y1)
                x2 = float(x2)
                y2 = float(y2)
-                table_bbox[(x1, y2, x2, y1)] = None
-        self.table_bbox = table_bbox
+                table_areas[(x1, y2, x2, y1)] = None
+        self.table_areas = table_areas

    def _generate_columns_and_rows(self, table_idx, tk):
-        # select elements which lie within table_bbox
+        # select elements which lie within table_areas
        t_bbox = {}
        t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
        t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
@ -464,7 +464,7 @@ class Stream(BaseParser):
        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
        table._text = _text
        self.generate_image()
-        table._image = (self.pdf_image, self.table_bbox)
+        table._image = (self.pdf_image, self.table_areas)
        table._segments = None
        table._textedges = self.textedges

@ -492,13 +492,13 @@ class Stream(BaseParser):
            return []

        # Identify plausible areas within the doc where tables lie,
-        # populate table_bbox keys with these areas.
-        self._generate_table_bbox()
+        # populate table_areas keys with these areas.
+        self._generate_table_areas()

        _tables = []
        # sort tables based on y-coord
        for table_idx, tk in enumerate(
-            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+            sorted(self.table_areas.keys(), key=lambda x: x[1], reverse=True)
        ):
            cols, rows = self._generate_columns_and_rows(table_idx, tk)
            table = self._generate_table(table_idx, cols, rows)
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -121,7 +121,7 @@ class PlotMethods(object):
        fig : matplotlib.fig.Figure

        """
-        img, table_bbox = table._image
+        img, table_areas = table._image
        _FOR_LATTICE = table.flavor == "lattice"
        fig = plt.figure()
        ax = fig.add_subplot(111, aspect="equal")
@ -137,7 +137,7 @@ class PlotMethods(object):
                    )
                )

-        for t in table_bbox.keys():
+        for t in table_areas.keys():
            ax.add_patch(
                patches.Rectangle(
                    (t[0], t[1]), t[2] - t[0], t[3] - t[1],
@ -204,13 +204,13 @@ class PlotMethods(object):
        fig : matplotlib.fig.Figure

        """
-        img, table_bbox = table._image
+        img, table_areas = table._image
        fig = plt.figure()
        ax = fig.add_subplot(111, aspect="equal")
        x_coord = []
        y_coord = []
-        for k in table_bbox.keys():
-            for coord in table_bbox[k]:
+        for k in table_areas.keys():
+            for coord in table_areas[k]:
                x_coord.append(coord[0])
                y_coord.append(coord[1])
        ax.plot(x_coord, y_coord, "ro")
--- a/tests/test_common.py
+++ b/tests/test_common.py
@ -192,6 +192,7 @@ def test_stream_vertical_header():

    filename = os.path.join(testdir, "vertical_header.pdf")
    tables = camelot.read_pdf(filename, flavor="stream")
+    assert len(tables) == 1
    assert_frame_equal(df, tables[0].df)