diff --git a/camelot/core.py b/camelot/core.py index f729e46..83ffea5 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -147,7 +147,7 @@ class TextAlignments(object): def _create_new_text_alignment(coord, textline, align): return TextAlignment(coord, textline, align) - def _update_edge(self, edge, coord, textline): + def _update_alignment(self, alignment, coord, textline): return NotImplemented def _register_textline(self, textline): @@ -172,7 +172,7 @@ class TextAlignments(object): coord, atol=0.5 ): - self._update_edge( + self._update_alignment( alignment_array[idx_closest], coord, textline @@ -209,7 +209,7 @@ class TextEdges(TextAlignments): te = self._create_new_text_alignment(coord, textline, align) self._text_alignments[align].append(te) - def _update_edge(self, edge, coord, textline): + def _update_alignment(self, edge, coord, textline): edge.update_coords(coord, textline, self.edge_tol) def generate(self, textlines): diff --git a/camelot/parsers/hybrid.py b/camelot/parsers/hybrid.py index bd98aed..cdea619 100644 --- a/camelot/parsers/hybrid.py +++ b/camelot/parsers/hybrid.py @@ -21,8 +21,6 @@ from ..utils import ( find_columns_coordinates ) -from matplotlib import patches as patches - # maximum number of columns over which a header can spread MAX_COL_SPREAD_IN_HEADER = 3 @@ -220,8 +218,8 @@ class TextNetworks(TextAlignments): # "number of textlines aligned" self._textlines_alignments = {} - def _update_edge(self, edge, coord, textline): - edge.register_aligned_textline(textline, coord) + def _update_alignment(self, alignment, coord, textline): + alignment.register_aligned_textline(textline, coord) def _register_all_text_lines(self, textlines): """Add all textlines to our network repository to diff --git a/camelot/plotting.py b/camelot/plotting.py index a94c1bb..f973498 100644 --- a/camelot/plotting.py +++ b/camelot/plotting.py @@ -8,7 +8,7 @@ except ImportError: else: _HAS_MPL = True -from .utils import bbox_from_str +from .utils import (bbox_from_str, get_textline_coords) def draw_labeled_bbox( @@ -287,16 +287,15 @@ class PlotMethods(object): patches.Rectangle( (t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue", - alpha=0.5 + alpha=0.2 ) ) ax.set_xlim(min(xs) - 10, max(xs) + 10) ax.set_ylim(min(ys) - 10, max(ys) + 10) if table.flavor == "hybrid": - for text_network in table.parse_details["network_searches"]: - # FRHTODO: This is too busy and doesn't plot lines - most_connected_tl = text_network.most_connected_textline() + for network in table.parse_details["network_searches"]: + most_connected_tl = network.most_connected_textline() ax.add_patch( patches.Rectangle( @@ -307,13 +306,48 @@ class PlotMethods(object): alpha=0.5 ) ) - for tl, alignments in text_network._textlines_alignments.items(): + for tl, alignments in network._textlines_alignments.items(): + coords = get_textline_coords(tl) + alignment_id_h, tls_h = alignments.max_v() + alignment_id_v, tls_v = alignments.max_h() + xs = list(map(lambda tl: tl.x0, tls_v)) + ys = list(map(lambda tl: tl.y1, tls_h)) + top_h = max(ys) ax.text( - tl.x0 - 5, - tl.y0 - 5, - f"{alignments.max_h_count()}x{alignments.max_v_count()}", - fontsize=5, - color="black" + coords[alignment_id_h], + top_h + 5, + "{max_h_count}".format(max_h_count=len(tls_h)), + verticalalignment="bottom", + horizontalalignment="center", + fontsize=8, + color="green" + ) + ax.plot( + [coords[alignment_id_h]] * len(ys), ys, + color="green", + linestyle="dashed", + linewidth=1, + marker="o", + markersize=3 + ) + + left_v = min(map(lambda tl: tl.x0, tls_v)) + ax.text( + left_v - 5, + coords[alignment_id_v], + "{max_v_count}".format(max_v_count=len(tls_v)), + verticalalignment="center", + horizontalalignment="right", + fontsize=8, + color="blue" + ) + ax.plot( + xs, [coords[alignment_id_v]] * len(xs), + color="blue", + linestyle="dotted", + linewidth=1, + marker="o", + markersize=3 ) else: for te in table._textedges: diff --git a/tests/files/baseline_plots/test_hybrid_contour_plot.png b/tests/files/baseline_plots/test_hybrid_contour_plot.png index 524d5e0..26d2b57 100644 Binary files a/tests/files/baseline_plots/test_hybrid_contour_plot.png and b/tests/files/baseline_plots/test_hybrid_contour_plot.png differ diff --git a/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png b/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png index fffd520..bddfd42 100644 Binary files a/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png and b/tests/files/baseline_plots/test_hybrid_table_regions_textedge_plot.png differ diff --git a/tests/files/baseline_plots/test_hybrid_textedge_plot.png b/tests/files/baseline_plots/test_hybrid_textedge_plot.png index 6b44d48..e49bb08 100644 Binary files a/tests/files/baseline_plots/test_hybrid_textedge_plot.png and b/tests/files/baseline_plots/test_hybrid_textedge_plot.png differ diff --git a/tests/files/baseline_plots/test_stream_textedge_plot.png b/tests/files/baseline_plots/test_stream_textedge_plot.png index b9ecf7d..8e6a10b 100644 Binary files a/tests/files/baseline_plots/test_stream_textedge_plot.png and b/tests/files/baseline_plots/test_stream_textedge_plot.png differ