Further refactoring

2020-04-24 21:11:31 -07:00 · 2020-04-24 21:11:31 -07:00 · bb842f21b9
parent f42557ab8b
commit bb842f21b9
8 changed files with 430 additions and 699 deletions
--- a/camelot/core.py
+++ b/camelot/core.py
@ -15,8 +15,6 @@ from .utils import (
    get_index_closest_point,
    get_textline_coords,
    build_file_path_in_temp_dir,
-    compute_accuracy,
-    compute_whitespace,
    export_pdf_as_png
 )

@ -141,9 +139,9 @@ class TextAlignments(object):

    def __init__(self, alignment_names):
        # For each possible alignment, list of tuples coordinate/textlines
-        self._textedges = {}
+        self._text_alignments = {}
        for alignment_name in alignment_names:
-            self._textedges[alignment_name] = []
+            self._text_alignments[alignment_name] = []

    @staticmethod
    def _create_new_text_alignment(coord, textline, align):
@ -156,12 +154,12 @@ class TextAlignments(object):
        """Updates an existing text edge in the current dict.
        """
        coords = get_textline_coords(textline)
-        for alignment, edge_array in self._textedges.items():
-            coord = coords[alignment]
+        for alignment_id, alignment_array in self._text_alignments.items():
+            coord = coords[alignment_id]

            # Find the index of the closest existing element (or 0 if none)
            idx_closest = get_index_closest_point(
-                coord, edge_array, fn=lambda x: x.coord
+                coord, alignment_array, fn=lambda x: x.coord
            )

            # Check if the edges before/after are close enough
@ -169,17 +167,25 @@ class TextAlignments(object):
            idx_insert = None
            if idx_closest is None:
                idx_insert = 0
-            elif np.isclose(edge_array[idx_closest].coord, coord, atol=0.5):
-                self._update_edge(edge_array[idx_closest], coord, textline)
-            elif edge_array[idx_closest].coord < coord:
+            elif np.isclose(
+                alignment_array[idx_closest].coord,
+                coord,
+                atol=0.5
+            ):
+                self._update_edge(
+                    alignment_array[idx_closest],
+                    coord,
+                    textline
+                )
+            elif alignment_array[idx_closest].coord < coord:
                idx_insert = idx_closest + 1
            else:
                idx_insert = idx_closest
            if idx_insert is not None:
-                new_edge = self._create_new_text_alignment(
-                    coord, textline, alignment
+                new_alignment = self._create_new_text_alignment(
+                    coord, textline, alignment_id
                )
-                edge_array.insert(idx_insert, new_edge)
+                alignment_array.insert(idx_insert, new_alignment)


 class TextEdges(TextAlignments):
@ -201,7 +207,7 @@ class TextEdges(TextAlignments):
        """Adds a new text edge to the current dict.
        """
        te = self._create_new_text_alignment(coord, textline, align)
-        self._textedges[align].append(te)
+        self._text_alignments[align].append(te)

    def _update_edge(self, edge, coord, textline):
        edge.update_coords(coord, textline, self.edge_tol)
@ -221,15 +227,15 @@ class TextEdges(TextAlignments):
        """
        intersections_sum = {
            "left": sum(
-                len(te.textlines) for te in self._textedges["left"]
+                len(te.textlines) for te in self._text_alignments["left"]
                if te.is_valid
            ),
            "right": sum(
-                len(te.textlines) for te in self._textedges["right"]
+                len(te.textlines) for te in self._text_alignments["right"]
                if te.is_valid
            ),
            "middle": sum(
-                len(te.textlines) for te in self._textedges["middle"]
+                len(te.textlines) for te in self._text_alignments["middle"]
                if te.is_valid
            ),
        }
@ -240,7 +246,7 @@ class TextEdges(TextAlignments):
        relevant_align = max(intersections_sum.items(), key=itemgetter(1))[0]
        return list(filter(
            lambda te: te.is_valid,
-            self._textedges[relevant_align])
+            self._text_alignments[relevant_align])
        )

    def get_table_areas(self, textlines, relevant_textedges):
@ -443,9 +449,9 @@ class Table(object):
        self.filename = None
        self.order = None
        self.page = None
-        self.flavor = None      # Flavor of the parser that generated the table
-        self.pdf_size = None    # Dimensions of the original PDF page
-        self.debug_info = None  # Field holding debug data
+        self.flavor = None         # Flavor of the parser used
+        self.pdf_size = None       # Dimensions of the original PDF page
+        self.parse_details = None  # Field holding debug data

        self._image = None
        self._image_path = None  # Temporary file to hold an image of the pdf
@ -485,31 +491,6 @@ class Table(object):
        }
        return report

-    def record_parse_metadata(self, parser):
-        """Record data about the origin of the table
-        """
-        self.flavor = parser.id
-        self.filename = parser.filename
-        self.debug_info = parser.debug_info
-        pos_errors = parser.compute_parse_errors(self)
-        self.accuracy = compute_accuracy([[100, pos_errors]])
-
-        if parser.copy_text is not None:
-            self.copy_spanning_text(parser.copy_text)
-
-        data = self.data
-        self.df = pd.DataFrame(data)
-        self.shape = self.df.shape
-
-        self.whitespace = compute_whitespace(data)
-        self.pdf_size = (parser.pdf_width, parser.pdf_height)
-
-        _text = []
-        _text.extend(
-            [(t.x0, t.y0, t.x1, t.y1) for t in parser.horizontal_text])
-        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in parser.vertical_text])
-        self._text = _text
-
    def get_pdf_image(self):
        """Compute pdf image and cache it
        """
--- a/camelot/parsers/base.py
+++ b/camelot/parsers/base.py
@ -3,11 +3,18 @@
 import os
 import warnings

+import numpy as np
+import pandas as pd
+
 from ..utils import (
+    bbox_from_str,
+    bbox_from_textlines,
+    compute_accuracy,
+    compute_whitespace,
    get_text_objects,
    get_table_index,
    text_in_bbox,
-    bbox_from_str,
+    text_in_bbox_per_axis,
 )
 from ..core import Table

@ -42,7 +49,7 @@ class BaseParser(object):
        self.t_bbox = None

        # For plotting details of parsing algorithms
-        self.debug_info = {} if debug else None
+        self.parse_details = {} if debug else None

    def prepare_page_parse(self, filename, layout, dimensions,
                           page_idx, layout_kwargs):
@ -63,9 +70,9 @@ class BaseParser(object):
        self.pdf_width, self.pdf_height = self.dimensions
        self.rootname, __ = os.path.splitext(self.filename)

-        if self.debug_info is not None:
-            self.debug_info["table_regions"] = self.table_regions
-            self.debug_info["table_areas"] = self.table_areas
+        if self.parse_details is not None:
+            self.parse_details["table_regions"] = self.table_regions
+            self.parse_details["table_areas"] = self.table_areas

    def _apply_regions_filter(self, textlines):
        """If regions have been specified, filter textlines to these regions.
@ -194,6 +201,31 @@ class BaseParser(object):

        return _tables

+    def record_parse_metadata(self, table):
+        """Record data about the origin of the table
+        """
+        table.flavor = self.id
+        table.filename = self.filename
+        table.parse_details = self.parse_details
+        pos_errors = self.compute_parse_errors(table)
+        table.accuracy = compute_accuracy([[100, pos_errors]])
+
+        if self.copy_text is not None:
+            table.copy_spanning_text(self.copy_text)
+
+        data = table.data
+        table.df = pd.DataFrame(data)
+        table.shape = table.df.shape
+
+        table.whitespace = compute_whitespace(data)
+        table.pdf_size = (self.pdf_width, self.pdf_height)
+
+        _text = []
+        _text.extend(
+            [(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+        table._text = _text
+

 class TextBaseParser(BaseParser):
    """Base class for all text parsers.
@ -211,15 +243,17 @@ class TextBaseParser(BaseParser):
        edge_tol=50,
        row_tol=2,
        column_tol=0,
+        debug=False,
        **kwargs
    ):
        super().__init__(
-            "stream",
+            parser_id,
            table_regions=table_regions,
            table_areas=table_areas,
            split_text=split_text,
            strip_text=strip_text,
            flag_size=flag_size,
+            debug=debug,
        )
        self.columns = columns
        self._validate_columns()
@ -227,4 +261,271 @@ class TextBaseParser(BaseParser):
        self.row_tol = row_tol
        self.column_tol = column_tol

-        self.textedges = None
+    @staticmethod
+    def _group_rows(text, row_tol=2):
+        """Groups PDFMiner text objects into rows vertically
+        within a tolerance.
+
+        Parameters
+        ----------
+        text : list
+            List of PDFMiner text objects.
+        row_tol : int, optional (default: 2)
+
+        Returns
+        -------
+        rows : list
+            Two-dimensional list of text objects grouped into rows.
+
+        """
+        row_y = None
+        rows = []
+        temp = []
+        non_empty_text = [t for t in text if t.get_text().strip()]
+        for t in non_empty_text:
+            # is checking for upright necessary?
+            # if t.get_text().strip() and all([obj.upright \
+            #   for obj in t._objs
+            # if type(obj) is LTChar]):
+            if row_y is None:
+                row_y = t.y0
+            elif not np.isclose(row_y, t.y0, atol=row_tol):
+                rows.append(sorted(temp, key=lambda t: t.x0))
+                temp = []
+                # We update the row's bottom as we go, to be forgiving if there
+                # is a gradual change across multiple columns.
+                row_y = t.y0
+            temp.append(t)
+        rows.append(sorted(temp, key=lambda t: t.x0))
+        return rows
+
+    @staticmethod
+    def _merge_columns(l, column_tol=0):
+        """Merges column boundaries horizontally if they overlap
+        or lie within a tolerance.
+
+        Parameters
+        ----------
+        l : list
+            List of column x-coordinate tuples.
+        column_tol : int, optional (default: 0)
+
+        Returns
+        -------
+        merged : list
+            List of merged column x-coordinate tuples.
+
+        """
+        merged = []
+        for higher in l:
+            if not merged:
+                merged.append(higher)
+            else:
+                lower = merged[-1]
+                if column_tol >= 0:
+                    if higher[0] <= lower[1] or np.isclose(
+                        higher[0], lower[1], atol=column_tol
+                    ):
+                        upper_bound = max(lower[1], higher[1])
+                        lower_bound = min(lower[0], higher[0])
+                        merged[-1] = (lower_bound, upper_bound)
+                    else:
+                        merged.append(higher)
+                elif column_tol < 0:
+                    if higher[0] <= lower[1]:
+                        if np.isclose(higher[0], lower[1],
+                                      atol=abs(column_tol)):
+                            merged.append(higher)
+                        else:
+                            upper_bound = max(lower[1], higher[1])
+                            lower_bound = min(lower[0], higher[0])
+                            merged[-1] = (lower_bound, upper_bound)
+                    else:
+                        merged.append(higher)
+        return merged
+
+    @staticmethod
+    def _join_rows(rows_grouped, text_y_max, text_y_min):
+        """Makes row coordinates continuous. For the row to "touch"
+        we split the existing gap between them in half.
+
+        Parameters
+        ----------
+        rows_grouped : list
+            Two-dimensional list of text objects grouped into rows.
+        text_y_max : int
+        text_y_min : int
+
+        Returns
+        -------
+        rows : list
+            List of continuous row y-coordinate tuples.
+
+        """
+        row_boundaries = [
+            [
+                max(t.y1 for t in r),
+                min(t.y0 for t in r)
+            ]
+            for r in rows_grouped
+        ]
+        for i in range(0, len(row_boundaries)-1):
+            top_row = row_boundaries[i]
+            bottom_row = row_boundaries[i+1]
+            top_row[1] = bottom_row[0] = (top_row[1] + bottom_row[0]) / 2
+        row_boundaries[0][0] = text_y_max
+        row_boundaries[-1][1] = text_y_min
+        return row_boundaries
+
+    @staticmethod
+    def _add_columns(cols, text, row_tol):
+        """Adds columns to existing list by taking into account
+        the text that lies outside the current column x-coordinates.
+
+        Parameters
+        ----------
+        cols : list
+            List of column x-coordinate tuples.
+        text : list
+            List of PDFMiner text objects.
+        ytol : int
+
+        Returns
+        -------
+        cols : list
+            Updated list of column x-coordinate tuples.
+
+        """
+        if text:
+            text = TextBaseParser._group_rows(text, row_tol=row_tol)
+            elements = [len(r) for r in text]
+            new_cols = [
+                (t.x0, t.x1)
+                for r in text if len(r) == max(elements)
+                for t in r
+            ]
+            cols.extend(TextBaseParser._merge_columns(sorted(new_cols)))
+        return cols
+
+    @staticmethod
+    def _join_columns(cols, text_x_min, text_x_max):
+        """Makes column coordinates continuous.
+
+        Parameters
+        ----------
+        cols : list
+            List of column x-coordinate tuples.
+        text_x_min : int
+        text_y_max : int
+
+        Returns
+        -------
+        cols : list
+            Updated list of column x-coordinate tuples.
+
+        """
+        cols = sorted(cols)
+        cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
+        cols.insert(0, text_x_min)
+        cols.append(text_x_max)
+        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
+        return cols
+
+    def _validate_columns(self):
+        if self.table_areas is not None and self.columns is not None:
+            if len(self.table_areas) != len(self.columns):
+                raise ValueError("Length of table_areas and columns"
+                                 " should be equal")
+
+    def _generate_columns_and_rows(self, bbox, table_idx):
+        # select elements which lie within table_bbox
+        self.t_bbox = text_in_bbox_per_axis(
+            bbox,
+            self.horizontal_text,
+            self.vertical_text
+        )
+
+        text_x_min, text_y_min, text_x_max, text_y_max = bbox_from_textlines(
+            self.t_bbox["horizontal"] + self.t_bbox["vertical"]
+        )
+        rows_grouped = self._group_rows(
+            self.t_bbox["horizontal"], row_tol=self.row_tol)
+        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
+        elements = [len(r) for r in rows_grouped]
+
+        if self.columns is not None and self.columns[table_idx] != "":
+            # user has to input boundary columns too
+            # take (0, pdf_width) by default
+            # similar to else condition
+            # len can't be 1
+            cols = self.columns[table_idx].split(",")
+            cols = [float(c) for c in cols]
+            cols.insert(0, text_x_min)
+            cols.append(text_x_max)
+            cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
+        else:
+            # calculate mode of the list of number of elements in
+            # each row to guess the number of columns
+            ncols = max(set(elements), key=elements.count)
+            if ncols == 1:
+                # if mode is 1, the page usually contains not tables
+                # but there can be cases where the list can be skewed,
+                # try to remove all 1s from list in this case and
+                # see if the list contains elements, if yes, then use
+                # the mode after removing 1s
+                elements = list(filter(lambda x: x != 1, elements))
+                if elements:
+                    ncols = max(set(elements), key=elements.count)
+                else:
+                    warnings.warn(
+                        "No tables found in table area {}"
+                        .format(table_idx + 1)
+                    )
+            cols = [
+                (t.x0, t.x1)
+                for r in rows_grouped
+                if len(r) == ncols
+                for t in r
+            ]
+            cols = self._merge_columns(
+                sorted(cols),
+                column_tol=self.column_tol
+            )
+            inner_text = []
+            for i in range(1, len(cols)):
+                left = cols[i - 1][1]
+                right = cols[i][0]
+                inner_text.extend(
+                    [
+                        t
+                        for direction in self.t_bbox
+                        for t in self.t_bbox[direction]
+                        if t.x0 > left and t.x1 < right
+                    ]
+                )
+            outer_text = [
+                t
+                for direction in self.t_bbox
+                for t in self.t_bbox[direction]
+                if t.x0 > cols[-1][1] or t.x1 < cols[0][0]
+            ]
+            inner_text.extend(outer_text)
+            cols = self._add_columns(cols, inner_text, self.row_tol)
+            cols = self._join_columns(cols, text_x_min, text_x_max)
+
+        return cols, rows, None, None
+
+    def record_parse_metadata(self, table):
+        """Record data about the origin of the table
+        """
+        super().record_parse_metadata(table)
+        # for plotting
+        table._bbox = self.table_bbox
+        table._segments = None
+
+    def _generate_table(self, table_idx, cols, rows, **kwargs):
+        table = self._initialize_new_table(table_idx, cols, rows)
+        table = table.set_all_edges()
+        self.record_parse_metadata(table)
+
+        return table
--- a/camelot/parsers/hybrid.py
+++ b/camelot/parsers/hybrid.py
@ -5,7 +5,6 @@ from __future__ import division

 import numpy as np
 import copy
-import warnings

 from .base import TextBaseParser
 from ..core import (
@ -17,7 +16,6 @@ from ..core import (
 from ..utils import (
    bbox_from_str,
    text_in_bbox,
-    text_in_bbox_per_axis,
    bbox_from_textlines,
    distance_tl_to_bbox,
    find_columns_coordinates
@ -142,11 +140,11 @@ def search_header_from_body_bbox(body_bbox, textlines, col_anchors, max_v_gap):

 class AlignmentCounter(object):
    """
-    Represents all textlines aligned with a textline for each alignment.
+    For a given textline, represent all other textlines aligned with it.

-    A textline can be vertically aligned with others by having matching left,
-    right, or middle edge, and horizontally aligned by having matching top,
-    bottom, or center edge.
+    A textline can be vertically aligned with others if their bbox match on
+    left, right, or middle coord, and horizontally aligned if they match top,
+    bottom, or center coord.

    """

@ -210,15 +208,15 @@ class AlignmentCounter(object):


 class TextNetworks(TextAlignments):
-    """Text elements connected via both vertical (top, bottom, middle) and
-    horizontal (left, right, and middle) alignments found on the PDF page.
+    """Text elements connected by vertical AND horizontal alignments.
+
    The alignment dict has six keys based on the hor/vert alignments,
    and each key's value is a list of camelot.core.TextAlignment objects.
    """

    def __init__(self):
        super().__init__(ALL_ALIGNMENTS)
-        # For each textline, dictionary "edge type" to
+        # For each textline, dictionary "alignment type" to
        # "number of textlines aligned"
        self._textlines_alignments = {}

@ -226,10 +224,10 @@ class TextNetworks(TextAlignments):
        edge.register_aligned_textline(textline, coord)

    def _register_all_text_lines(self, textlines):
-        """Add all textlines to our edge repository to
+        """Add all textlines to our network repository to
        identify alignments.
        """
-        # Identify all the edge alignments
+        # Identify all the alignments
        for tl in textlines:
            if len(tl.get_text().strip()) > 0:
                self._register_textline(tl)
@ -237,7 +235,7 @@ class TextNetworks(TextAlignments):
    def _compute_alignment_counts(self):
        """Build a dictionary textline -> alignment object.
        """
-        for align_id, textedges in self._textedges.items():
+        for align_id, textedges in self._text_alignments.items():
            for textedge in textedges:
                for textline in textedge.textlines:
                    alignments = self._textlines_alignments.get(
@ -254,8 +252,8 @@ class TextNetworks(TextAlignments):
        the core table.
        """
        h_gaps, v_gaps = [], []
-        for align_id in self._textedges:
-            edge_array = self._textedges[align_id]
+        for align_id in self._text_alignments:
+            edge_array = self._text_alignments[align_id]
            gaps = []
            vertical = align_id in HORIZONTAL_ALIGNMENTS
            sort_function = (lambda tl: tl.y0) \
@ -299,7 +297,7 @@ class TextNetworks(TextAlignments):
        removed_singletons = True
        while removed_singletons:
            removed_singletons = False
-            for alignment_id, textalignments in self._textedges.items():
+            for alignment_id, textalignments in self._text_alignments.items():
                # For each alignment edge, remove items if they are singletons
                # either horizontally or vertically
                for ta in textalignments:
@ -313,7 +311,7 @@ class TextNetworks(TextAlignments):
            self._textlines_alignments = {}
            self._compute_alignment_counts()

-    def _most_connected_textline(self):
+    def most_connected_textline(self):
        """ Retrieve the textline that is most connected across vertical and
        horizontal axis.

@ -340,7 +338,7 @@ class TextNetworks(TextAlignments):
        # alignments across horizontal and vertical axis.
        # It will serve as a reference axis along which to collect the average
        # spacing between rows/cols.
-        most_aligned_tl = self._most_connected_textline()
+        most_aligned_tl = self.most_connected_textline()
        if most_aligned_tl is None:
            return None

@ -378,7 +376,7 @@ class TextNetworks(TextAlignments):
        )
        return gaps_hv

-    def _build_bbox_candidate(self, gaps_hv, debug_info=None):
+    def _build_bbox_candidate(self, gaps_hv, parse_details=None):
        """ Seed the process with the textline with the highest alignment
        score, then expand the bbox with textlines within threshold.

@ -387,7 +385,7 @@ class TextNetworks(TextAlignments):
        gaps_hv : tuple
             The maximum distance allowed to consider surrounding lines/columns
             as part of the same table.
-        debug_info : array (optional)
+        parse_details : array (optional)
            Optional parameter array, in which to store extra information
            to help later visualization of the table creation.
        """
@ -396,23 +394,23 @@ class TextNetworks(TextAlignments):
        # It will serve both as a starting point for the table boundary
        # search, and as a way to estimate the average spacing between
        # rows/cols.
-        most_aligned_tl = self._most_connected_textline()
+        most_aligned_tl = self.most_connected_textline()

        # Calculate the 75th percentile of the horizontal/vertical
        # gaps between textlines.  Use this as a reference for a threshold
        # to not exceed while looking for table boundaries.
        max_h_gap, max_v_gap = gaps_hv[0], gaps_hv[1]

-        if debug_info is not None:
+        if parse_details is not None:
            # Store debug info
-            debug_info_search = {
+            parse_details_search = {
                "max_h_gap": max_h_gap,
                "max_v_gap": max_v_gap,
                "iterations": []
            }
-            debug_info.append(debug_info_search)
+            parse_details.append(parse_details_search)
        else:
-            debug_info_search = None
+            parse_details_search = None

        MINIMUM_TEXTLINES_IN_TABLE = 6
        bbox = (most_aligned_tl.x0, most_aligned_tl.y0,
@ -426,9 +424,9 @@ class TextNetworks(TextAlignments):
        tls_in_bbox = [most_aligned_tl]
        last_bbox = None
        while last_bbox != bbox:
-            if debug_info_search is not None:
+            if parse_details_search is not None:
                # Store debug info
-                debug_info_search["iterations"].append(bbox)
+                parse_details_search["iterations"].append(bbox)

            last_bbox = bbox
            # Go through all remaining textlines, expand our bbox
@ -461,35 +459,6 @@ class TextNetworks(TextAlignments):
        self._register_all_text_lines(textlines)
        self._compute_alignment_counts()

-    def plot_alignments(self, ax):
-        """Displays a visualization of the alignments as currently computed.
-        """
-        # FRHTODO: This is too busy and doesn't plot lines
-        most_aligned_tl = sorted(
-            self._textlines_alignments.keys(),
-            key=lambda textline:
-            self._textlines_alignments[textline].alignment_score(),
-            reverse=True
-        )[0]
-
-        ax.add_patch(
-            patches.Rectangle(
-                (most_aligned_tl.x0, most_aligned_tl.y0),
-                most_aligned_tl.x1 - most_aligned_tl.x0,
-                most_aligned_tl.y1 - most_aligned_tl.y0,
-                color="red",
-                alpha=0.5
-            )
-        )
-        for tl, alignments in self._textlines_alignments.items():
-            ax.text(
-                tl.x0 - 5,
-                tl.y0 - 5,
-                f"{alignments.max_h_count()}x{alignments.max_v_count()}",
-                fontsize=5,
-                color="black"
-            )
-

 class Hybrid(TextBaseParser):
    """Hybrid method of parsing looks for spaces between text
@ -555,190 +524,9 @@ class Hybrid(TextBaseParser):
            edge_tol=edge_tol,
            row_tol=row_tol,
            column_tol=column_tol,
+            debug=debug,
        )

-    # FRHTODO: Check if needed, refactor with Stream
-    @staticmethod
-    def _group_rows(text, row_tol=2):
-        """Groups PDFMiner text objects into rows vertically
-        within a tolerance.
-
-        Parameters
-        ----------
-        text : list
-            List of PDFMiner text objects.
-        row_tol : int, optional (default: 2)
-
-        Returns
-        -------
-        rows : list
-            Two-dimensional list of text objects grouped into rows.
-
-        """
-        row_y = None
-        rows = []
-        temp = []
-        non_empty_text = [t for t in text if t.get_text().strip()]
-        for t in non_empty_text:
-            # is checking for upright necessary?
-            # if t.get_text().strip() and all([obj.upright \
-            #   for obj in t._objs
-            # if type(obj) is LTChar]):
-            if row_y is None:
-                row_y = t.y0
-            elif not np.isclose(row_y, t.y0, atol=row_tol):
-                rows.append(sorted(temp, key=lambda t: t.x0))
-                temp = []
-                # We update the row's bottom as we go, to be forgiving if there
-                # is a gradual change across multiple columns.
-                row_y = t.y0
-            temp.append(t)
-        rows.append(sorted(temp, key=lambda t: t.x0))
-        return rows
-
-    # FRHTODO: Check if needed, refactor with Stream
-    @staticmethod
-    def _merge_columns(l, column_tol=0):
-        """Merges column boundaries horizontally if they overlap
-        or lie within a tolerance.
-
-        Parameters
-        ----------
-        l : list
-            List of column x-coordinate tuples.
-        column_tol : int, optional (default: 0)
-
-        Returns
-        -------
-        merged : list
-            List of merged column x-coordinate tuples.
-
-        """
-        merged = []
-        for higher in l:
-            if not merged:
-                merged.append(higher)
-            else:
-                lower = merged[-1]
-                if column_tol >= 0:
-                    if higher[0] <= lower[1] or np.isclose(
-                        higher[0], lower[1], atol=column_tol
-                    ):
-                        upper_bound = max(lower[1], higher[1])
-                        lower_bound = min(lower[0], higher[0])
-                        merged[-1] = (lower_bound, upper_bound)
-                    else:
-                        merged.append(higher)
-                elif column_tol < 0:
-                    if higher[0] <= lower[1]:
-                        if np.isclose(higher[0], lower[1],
-                                      atol=abs(column_tol)):
-                            merged.append(higher)
-                        else:
-                            upper_bound = max(lower[1], higher[1])
-                            lower_bound = min(lower[0], higher[0])
-                            merged[-1] = (lower_bound, upper_bound)
-                    else:
-                        merged.append(higher)
-        return merged
-
-    # FRHTODO: Check if needed, refactor with Stream
-    @staticmethod
-    def _join_rows(rows_grouped, text_y_max, text_y_min):
-        """Makes row coordinates continuous. For the row to "touch"
-        we split the existing gap between them in half.
-
-        Parameters
-        ----------
-        rows_grouped : list
-            Two-dimensional list of text objects grouped into rows.
-        text_y_max : int
-        text_y_min : int
-
-        Returns
-        -------
-        rows : list
-            List of continuous row y-coordinate tuples.
-
-        """
-        row_boundaries = [
-            [
-                max(t.y1 for t in r),
-                min(t.y0 for t in r)
-            ]
-            for r in rows_grouped
-        ]
-        for i in range(0, len(row_boundaries)-1):
-            top_row = row_boundaries[i]
-            bottom_row = row_boundaries[i+1]
-            top_row[1] = bottom_row[0] = (top_row[1] + bottom_row[0]) / 2
-        row_boundaries[0][0] = text_y_max
-        row_boundaries[-1][1] = text_y_min
-        return row_boundaries
-
-    # FRHTODO: Check if needed, refactor with Stream
-    @staticmethod
-    def _add_columns(cols, text, row_tol):
-        """Add columns to existing list by taking into account
-        the text that lies outside the current column x-coordinates.
-
-        Parameters
-        ----------
-        cols : list
-            List of column x-coordinate tuples.
-        text : list
-            List of PDFMiner text objects.
-        ytol : int
-
-        Returns
-        -------
-        cols : list
-            Updated list of column x-coordinate tuples.
-
-        """
-        if text:
-            text = Hybrid._group_rows(text, row_tol=row_tol)
-            elements = [len(r) for r in text]
-            new_cols = [
-                (t.x0, t.x1)
-                for r in text if len(r) == max(elements)
-                for t in r
-            ]
-            cols.extend(Hybrid._merge_columns(sorted(new_cols)))
-        return cols
-
-    # FRHTODO: Check if needed, refactor with Stream
-    @staticmethod
-    def _join_columns(cols, text_x_min, text_x_max):
-        """Makes column coordinates continuous.
-
-        Parameters
-        ----------
-        cols : list
-            List of column x-coordinate tuples.
-        text_x_min : int
-        text_y_max : int
-
-        Returns
-        -------
-        cols : list
-            Updated list of column x-coordinate tuples.
-
-        """
-        cols = sorted(cols)
-        cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
-        cols.insert(0, text_x_min)
-        cols.append(text_x_max)
-        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
-        return cols
-
-    # FRHTODO: Check is needed, refactor with Stream
-    def _validate_columns(self):
-        if self.table_areas is not None and self.columns is not None:
-            if len(self.table_areas) != len(self.columns):
-                raise ValueError("Length of table_areas and columns"
-                                 " should be equal")
-
    def _generate_table_bbox(self):
        if self.table_areas is not None:
            table_bbox = {}
@ -756,25 +544,21 @@ class Hybrid(TextBaseParser):

        textlines_processed = {}
        self.table_bbox = {}
-        if self.debug_info is not None:
-            debug_info_edges_searches = []
-            self.debug_info["edges_searches"] = debug_info_edges_searches
-            debug_info_bboxes_searches = []
-            self.debug_info["bboxes_searches"] = debug_info_bboxes_searches
+        if self.parse_details is not None:
+            parse_details_network_searches = []
+            self.parse_details["network_searches"] = \
+                parse_details_network_searches
+            parse_details_bbox_searches = []
+            self.parse_details["bbox_searches"] = parse_details_bbox_searches
        else:
-            debug_info_edges_searches = None
-            debug_info_bboxes_searches = None
+            parse_details_network_searches = None
+            parse_details_bbox_searches = None

        while True:
-            self.textedges = TextNetworks()
-            self.textedges.generate(textlines)
-            self.textedges._remove_unconnected_edges()
-            if debug_info_edges_searches is not None:
-                # Preserve the current edge calculation for display debugging
-                debug_info_edges_searches.append(
-                    copy.deepcopy(self.textedges)
-                )
-            gaps_hv = self.textedges._compute_plausible_gaps()
+            text_network = TextNetworks()
+            text_network.generate(textlines)
+            text_network._remove_unconnected_edges()
+            gaps_hv = text_network._compute_plausible_gaps()
            if gaps_hv is None:
                return None
            # edge_tol instructions override the calculated vertical gap
@ -782,13 +566,19 @@ class Hybrid(TextBaseParser):
                gaps_hv[0],
                gaps_hv[1] if self.edge_tol is None else self.edge_tol
            )
-            bbox = self.textedges._build_bbox_candidate(
+            bbox = text_network._build_bbox_candidate(
                edge_tol_hv,
-                debug_info=debug_info_bboxes_searches
+                parse_details=parse_details_bbox_searches
            )
            if bbox is None:
                break

+            if parse_details_network_searches is not None:
+                # Preserve the current edge calculation for display debugging
+                parse_details_network_searches.append(
+                    copy.deepcopy(text_network)
+                )
+
            # Get all the textlines that are at least 50% in the box
            tls_in_bbox = text_in_bbox(bbox, textlines)

@ -808,10 +598,10 @@ class Hybrid(TextBaseParser):
                gaps_hv[1]
            )

-            if self.debug_info is not None:
-                if "col_searches" not in self.debug_info:
-                    self.debug_info["col_searches"] = []
-                self.debug_info["col_searches"].append({
+            if self.parse_details is not None:
+                if "col_searches" not in self.parse_details:
+                    self.parse_details["col_searches"] = []
+                self.parse_details["col_searches"].append({
                    "core_bbox": bbox,
                    "cols_anchors": cols_anchors,
                    "expanded_bbox": expanded_bbox
@ -826,95 +616,3 @@ class Hybrid(TextBaseParser):
                lambda tl: tl not in textlines_processed,
                textlines
            ))
-
-    # FRHTODO: Check is needed, refactor with Stream
-    def _generate_columns_and_rows(self, bbox, table_idx):
-        # select elements which lie within table_bbox
-        self.t_bbox = text_in_bbox_per_axis(
-            bbox,
-            self.horizontal_text,
-            self.vertical_text
-        )
-
-        text_x_min, text_y_min, text_x_max, text_y_max = bbox_from_textlines(
-            self.t_bbox["horizontal"] + self.t_bbox["vertical"]
-        )
-        rows_grouped = self._group_rows(
-            self.t_bbox["horizontal"], row_tol=self.row_tol)
-        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
-        elements = [len(r) for r in rows_grouped]
-
-        if self.columns is not None and self.columns[table_idx] != "":
-            # user has to input boundary columns too
-            # take (0, pdf_width) by default
-            # similar to else condition
-            # len can't be 1
-            cols = self.columns[table_idx].split(",")
-            cols = [float(c) for c in cols]
-            cols.insert(0, text_x_min)
-            cols.append(text_x_max)
-            cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
-        else:
-            # calculate mode of the list of number of elements in
-            # each row to guess the number of columns
-            ncols = max(set(elements), key=elements.count)
-            if ncols == 1:
-                # if mode is 1, the page usually contains not tables
-                # but there can be cases where the list can be skewed,
-                # try to remove all 1s from list in this case and
-                # see if the list contains elements, if yes, then use
-                # the mode after removing 1s
-                elements = list(filter(lambda x: x != 1, elements))
-                if elements:
-                    ncols = max(set(elements), key=elements.count)
-                else:
-                    warnings.warn(
-                        "No tables found in table area {}"
-                        .format(table_idx + 1)
-                    )
-            cols = [
-                (t.x0, t.x1)
-                for r in rows_grouped
-                if len(r) == ncols
-                for t in r
-            ]
-            cols = self._merge_columns(
-                sorted(cols),
-                column_tol=self.column_tol
-            )
-            inner_text = []
-            for i in range(1, len(cols)):
-                left = cols[i - 1][1]
-                right = cols[i][0]
-                inner_text.extend(
-                    [
-                        t
-                        for direction in self.t_bbox
-                        for t in self.t_bbox[direction]
-                        if t.x0 > left and t.x1 < right
-                    ]
-                )
-            outer_text = [
-                t
-                for direction in self.t_bbox
-                for t in self.t_bbox[direction]
-                if t.x0 > cols[-1][1] or t.x1 < cols[0][0]
-            ]
-            inner_text.extend(outer_text)
-            cols = self._add_columns(cols, inner_text, self.row_tol)
-            cols = self._join_columns(cols, text_x_min, text_x_max)
-
-        return cols, rows, None, None
-
-    # FRHTODO: Check is needed, refactor with Stream
-    def _generate_table(self, table_idx, cols, rows, **kwargs):
-        table = self._initialize_new_table(table_idx, cols, rows)
-        table = table.set_all_edges()
-        table.record_parse_metadata(self)
-
-        # for plotting
-        table._bbox = self.table_bbox
-        table._segments = None
-        table._textedges = self.textedges
-
-        return table
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -168,6 +168,15 @@ class Lattice(BaseParser):
            indices.append((r_idx, c_idx, text))
        return indices

+    def record_parse_metadata(self, table):
+        """Record data about the origin of the table
+        """
+        super().record_parse_metadata(table)
+        # for plotting
+        table._image = self.pdf_image  # Reuse the image used for calc
+        table._bbox_unscaled = self.table_bbox_unscaled
+        table._segments = (self.vertical_segments, self.horizontal_segments)
+
    def _generate_table_bbox(self):
        def scale_areas(areas):
            scaled_areas = []
@ -293,12 +302,5 @@ class Lattice(BaseParser):
        # set spanning cells to True
        table = table.set_span()

-        table.record_parse_metadata(self)
-
-        # for plotting
-        table._image = self.pdf_image  # Reuse the image used for calc
-        table._bbox_unscaled = self.table_bbox_unscaled
-        table._segments = (self.vertical_segments, self.horizontal_segments)
-        table._textedges = None
-
+        self.record_parse_metadata(table)
        return table
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@ -1,17 +1,12 @@
 # -*- coding: utf-8 -*-

 from __future__ import division
-import warnings
-
-import numpy as np

 from .base import TextBaseParser
 from ..core import TextEdges
 from ..utils import (
    bbox_from_str,
-    bbox_from_textlines,
-    text_in_bbox,
-    text_in_bbox_per_axis
+    text_in_bbox
 )


@ -79,182 +74,7 @@ class Stream(TextBaseParser):
            row_tol=row_tol,
            column_tol=column_tol,
        )
-
-    @staticmethod
-    def _group_rows(text, row_tol=2):
-        """Groups PDFMiner text objects into rows vertically
-        within a tolerance.
-
-        Parameters
-        ----------
-        text : list
-            List of PDFMiner text objects.
-        row_tol : int, optional (default: 2)
-
-        Returns
-        -------
-        rows : list
-            Two-dimensional list of text objects grouped into rows.
-
-        """
-        row_y = None
-        rows = []
-        temp = []
-        non_empty_text = [t for t in text if t.get_text().strip()]
-        for t in non_empty_text:
-            # is checking for upright necessary?
-            # if t.get_text().strip() and all([obj.upright \
-            #   for obj in t._objs
-            # if type(obj) is LTChar]):
-            if row_y is None:
-                row_y = t.y0
-            elif not np.isclose(row_y, t.y0, atol=row_tol):
-                rows.append(sorted(temp, key=lambda t: t.x0))
-                temp = []
-                # We update the row's bottom as we go, to be forgiving if there
-                # is a gradual change across multiple columns.
-                row_y = t.y0
-            temp.append(t)
-        rows.append(sorted(temp, key=lambda t: t.x0))
-        return rows
-
-    @staticmethod
-    def _merge_columns(l, column_tol=0):
-        """Merges column boundaries horizontally if they overlap
-        or lie within a tolerance.
-
-        Parameters
-        ----------
-        l : list
-            List of column x-coordinate tuples.
-        column_tol : int, optional (default: 0)
-
-        Returns
-        -------
-        merged : list
-            List of merged column x-coordinate tuples.
-
-        """
-        merged = []
-        for higher in l:
-            if not merged:
-                merged.append(higher)
-            else:
-                lower = merged[-1]
-                if column_tol >= 0:
-                    if higher[0] <= lower[1] or np.isclose(
-                        higher[0], lower[1], atol=column_tol
-                    ):
-                        upper_bound = max(lower[1], higher[1])
-                        lower_bound = min(lower[0], higher[0])
-                        merged[-1] = (lower_bound, upper_bound)
-                    else:
-                        merged.append(higher)
-                elif column_tol < 0:
-                    if higher[0] <= lower[1]:
-                        if np.isclose(higher[0], lower[1],
-                                      atol=abs(column_tol)):
-                            merged.append(higher)
-                        else:
-                            upper_bound = max(lower[1], higher[1])
-                            lower_bound = min(lower[0], higher[0])
-                            merged[-1] = (lower_bound, upper_bound)
-                    else:
-                        merged.append(higher)
-        return merged
-
-    @staticmethod
-    def _join_rows(rows_grouped, text_y_max, text_y_min):
-        """Makes row coordinates continuous. For the row to "touch"
-        we split the existing gap between them in half.
-
-        Parameters
-        ----------
-        rows_grouped : list
-            Two-dimensional list of text objects grouped into rows.
-        text_y_max : int
-        text_y_min : int
-
-        Returns
-        -------
-        rows : list
-            List of continuous row y-coordinate tuples.
-
-        """
-        row_boundaries = [
-            [
-                max(t.y1 for t in r),
-                min(t.y0 for t in r)
-            ]
-            for r in rows_grouped
-        ]
-        for i in range(0, len(row_boundaries)-1):
-            top_row = row_boundaries[i]
-            bottom_row = row_boundaries[i+1]
-            top_row[1] = bottom_row[0] = (top_row[1] + bottom_row[0]) / 2
-        row_boundaries[0][0] = text_y_max
-        row_boundaries[-1][1] = text_y_min
-        return row_boundaries
-
-    @staticmethod
-    def _add_columns(cols, text, row_tol):
-        """Adds columns to existing list by taking into account
-        the text that lies outside the current column x-coordinates.
-
-        Parameters
-        ----------
-        cols : list
-            List of column x-coordinate tuples.
-        text : list
-            List of PDFMiner text objects.
-        ytol : int
-
-        Returns
-        -------
-        cols : list
-            Updated list of column x-coordinate tuples.
-
-        """
-        if text:
-            text = Stream._group_rows(text, row_tol=row_tol)
-            elements = [len(r) for r in text]
-            new_cols = [
-                (t.x0, t.x1)
-                for r in text if len(r) == max(elements)
-                for t in r
-            ]
-            cols.extend(Stream._merge_columns(sorted(new_cols)))
-        return cols
-
-    @staticmethod
-    def _join_columns(cols, text_x_min, text_x_max):
-        """Makes column coordinates continuous.
-
-        Parameters
-        ----------
-        cols : list
-            List of column x-coordinate tuples.
-        text_x_min : int
-        text_y_max : int
-
-        Returns
-        -------
-        cols : list
-            Updated list of column x-coordinate tuples.
-
-        """
-        cols = sorted(cols)
-        cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
-        cols.insert(0, text_x_min)
-        cols.append(text_x_max)
-        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
-        return cols
-
-    def _validate_columns(self):
-        if self.table_areas is not None and self.columns is not None:
-            if len(self.table_areas) != len(self.columns):
-                raise ValueError("Length of table_areas and columns"
-                                 " should be equal")
+        self.textedges = []

    def _nurminen_table_detection(self, textlines):
        """A general implementation of the table detection algorithm
@ -281,8 +101,13 @@ class Stream(TextBaseParser):

        return table_bbox

+    def record_parse_metadata(self, table):
+        """Record data about the origin of the table
+        """
+        super().record_parse_metadata(table)
+        table._textedges = self.textedges
+
    def _generate_table_bbox(self):
-        self.textedges = []
        if self.table_areas is None:
            hor_text = self.horizontal_text
            if self.table_regions is not None:
@ -300,93 +125,3 @@ class Stream(TextBaseParser):
            for area_str in self.table_areas:
                table_bbox[bbox_from_str(area_str)] = None
        self.table_bbox = table_bbox
-
-    def _generate_columns_and_rows(self, bbox, table_idx):
-        # select elements which lie within table_bbox
-        self.t_bbox = text_in_bbox_per_axis(
-            bbox,
-            self.horizontal_text,
-            self.vertical_text
-        )
-
-        text_x_min, text_y_min, text_x_max, text_y_max = bbox_from_textlines(
-            self.t_bbox["horizontal"] + self.t_bbox["vertical"]
-        )
-        rows_grouped = self._group_rows(
-            self.t_bbox["horizontal"], row_tol=self.row_tol)
-        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
-        elements = [len(r) for r in rows_grouped]
-
-        if self.columns is not None and self.columns[table_idx] != "":
-            # user has to input boundary columns too
-            # take (0, pdf_width) by default
-            # similar to else condition
-            # len can't be 1
-            cols = self.columns[table_idx].split(",")
-            cols = [float(c) for c in cols]
-            cols.insert(0, text_x_min)
-            cols.append(text_x_max)
-            cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
-        else:
-            # calculate mode of the list of number of elements in
-            # each row to guess the number of columns
-            ncols = max(set(elements), key=elements.count)
-            if ncols == 1:
-                # if mode is 1, the page usually contains not tables
-                # but there can be cases where the list can be skewed,
-                # try to remove all 1s from list in this case and
-                # see if the list contains elements, if yes, then use
-                # the mode after removing 1s
-                elements = list(filter(lambda x: x != 1, elements))
-                if elements:
-                    ncols = max(set(elements), key=elements.count)
-                else:
-                    warnings.warn(
-                        "No tables found in table area {}"
-                        .format(table_idx + 1)
-                    )
-            cols = [
-                (t.x0, t.x1)
-                for r in rows_grouped
-                if len(r) == ncols
-                for t in r
-            ]
-            cols = self._merge_columns(
-                sorted(cols),
-                column_tol=self.column_tol
-            )
-            inner_text = []
-            for i in range(1, len(cols)):
-                left = cols[i - 1][1]
-                right = cols[i][0]
-                inner_text.extend(
-                    [
-                        t
-                        for direction in self.t_bbox
-                        for t in self.t_bbox[direction]
-                        if t.x0 > left and t.x1 < right
-                    ]
-                )
-            outer_text = [
-                t
-                for direction in self.t_bbox
-                for t in self.t_bbox[direction]
-                if t.x0 > cols[-1][1] or t.x1 < cols[0][0]
-            ]
-            inner_text.extend(outer_text)
-            cols = self._add_columns(cols, inner_text, self.row_tol)
-            cols = self._join_columns(cols, text_x_min, text_x_max)
-
-        return cols, rows, None, None
-
-    def _generate_table(self, table_idx, cols, rows, **kwargs):
-        table = self._initialize_new_table(table_idx, cols, rows)
-        table = table.set_all_edges()
-        table.record_parse_metadata(self)
-
-        # for plotting
-        table._bbox = self.table_bbox
-        table._segments = None
-        table._textedges = self.textedges
-
-        return table
--- a/camelot/plotting.py
+++ b/camelot/plotting.py
@ -87,9 +87,9 @@ def draw_parse_constraints(table, ax):
    ax : matplotlib.axes.Axes

    """
-    if table.debug_info:
+    if table.parse_details:
        # Display a bbox per region
-        for region_str in table.debug_info["table_regions"] or []:
+        for region_str in table.parse_details["table_regions"] or []:
            draw_labeled_bbox(
                ax, bbox_from_str(region_str),
                "region: ({region_str})".format(region_str=region_str),
@ -99,7 +99,7 @@ def draw_parse_constraints(table, ax):
                label_pos="bottom,right"
            )
        # Display a bbox per area
-        for area_str in table.debug_info["table_areas"] or []:
+        for area_str in table.parse_details["table_areas"] or []:
            draw_labeled_bbox(
                ax, bbox_from_str(area_str),
                "area: ({area_str})".format(area_str=area_str),
@ -294,8 +294,27 @@ class PlotMethods(object):
        ax.set_ylim(min(ys) - 10, max(ys) + 10)

        if table.flavor == "hybrid":
-            # FRHTODO: Clean this up
-            table.debug_info["edges_searches"][0].plot_alignments(ax)
+            for text_network in table.parse_details["network_searches"]:
+                # FRHTODO: This is too busy and doesn't plot lines
+                most_connected_tl = text_network.most_connected_textline()
+
+                ax.add_patch(
+                    patches.Rectangle(
+                        (most_connected_tl.x0, most_connected_tl.y0),
+                        most_connected_tl.x1 - most_connected_tl.x0,
+                        most_connected_tl.y1 - most_connected_tl.y0,
+                        color="red",
+                        alpha=0.5
+                    )
+                )
+                for tl, alignments in text_network._textlines_alignments.items():
+                    ax.text(
+                        tl.x0 - 5,
+                        tl.y0 - 5,
+                        f"{alignments.max_h_count()}x{alignments.max_v_count()}",
+                        fontsize=5,
+                        color="black"
+                    )
        else:
            for te in table._textedges:
                ax.plot([te.coord, te.coord], [te.y0, te.y1])
@ -372,10 +391,10 @@ class PlotMethods(object):
        draw_pdf(table, ax)
        draw_parse_constraints(table, ax)

-        if table.debug_info is None:
+        if table.parse_details is None:
            return fig
-        debug_info = table.debug_info
-        for box_id, bbox_search in enumerate(debug_info["bboxes_searches"]):
+        parse_details = table.parse_details
+        for box_id, bbox_search in enumerate(parse_details["bbox_searches"]):
            max_h_gap = bbox_search["max_h_gap"]
            max_v_gap = bbox_search["max_v_gap"]
            iterations = bbox_search["iterations"]
@ -403,7 +422,7 @@ class PlotMethods(object):
                    )
                )

-        for box_id, col_search in enumerate(debug_info["col_searches"]):
+        for box_id, col_search in enumerate(parse_details["col_searches"]):
            draw_labeled_bbox(
                ax, col_search["expanded_bbox"],
                "box body + header #{box_id}".format(
@ -422,10 +441,5 @@ class PlotMethods(object):
                linewidth=2,
                label_pos="bottom,left"
            )
-            # self.debug_info["col_searches"].append({
-            #     "core_bbox": bbox,
-            #     "cols_anchors": cols_anchors,
-            #     "expanded_bbox": expanded_bbox
-            # })

        return fig
--- a/tests/files/baseline_plots/test_hybrid_contour_plot.png
+++ b/tests/files/baseline_plots/test_hybrid_contour_plot.png
--- a/tests/files/baseline_plots/test_hybrid_textedge_plot.png
+++ b/tests/files/baseline_plots/test_hybrid_textedge_plot.png