More refactoring across stream and hybrid.
Stream now much faster, whole test is 72s instead of 92spull/153/head
parent
adb14d3522
commit
5db49d4fde
|
|
@ -12,6 +12,7 @@ import pandas as pd
|
||||||
from cv2 import cv2
|
from cv2 import cv2
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
get_index_closest_point,
|
||||||
get_textline_coords,
|
get_textline_coords,
|
||||||
build_file_path_in_temp_dir,
|
build_file_path_in_temp_dir,
|
||||||
compute_accuracy,
|
compute_accuracy,
|
||||||
|
|
@ -98,6 +99,42 @@ class BaseTextEdges(object):
|
||||||
for alignment_name in alignment_names:
|
for alignment_name in alignment_names:
|
||||||
self._textedges[alignment_name] = []
|
self._textedges[alignment_name] = []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _create_new_text_edge(coord, textline, align=None):
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
def _update_edge(self, edge, coord, textline):
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
def _register_textline(self, textline):
|
||||||
|
"""Updates an existing text edge in the current dict.
|
||||||
|
"""
|
||||||
|
coords = get_textline_coords(textline)
|
||||||
|
for alignment, edge_array in self._textedges.items():
|
||||||
|
coord = coords[alignment]
|
||||||
|
|
||||||
|
# Find the index of the closest existing element (or 0 if none)
|
||||||
|
idx_closest = get_index_closest_point(
|
||||||
|
coord, edge_array, fn=lambda x: x.coord
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if the edges before/after are close enough
|
||||||
|
# that it can be considered aligned
|
||||||
|
idx_insert = None
|
||||||
|
if idx_closest is None:
|
||||||
|
idx_insert = 0
|
||||||
|
elif np.isclose(edge_array[idx_closest].coord, coord, atol=0.5):
|
||||||
|
self._update_edge(edge_array[idx_closest], coord, textline)
|
||||||
|
elif edge_array[idx_closest].coord < coord:
|
||||||
|
idx_insert = idx_closest + 1
|
||||||
|
else:
|
||||||
|
idx_insert = idx_closest
|
||||||
|
if idx_insert is not None:
|
||||||
|
new_edge = self._create_new_text_edge(
|
||||||
|
coord, textline, align=alignment
|
||||||
|
)
|
||||||
|
edge_array.insert(idx_insert, new_edge)
|
||||||
|
|
||||||
|
|
||||||
class TextEdges(BaseTextEdges):
|
class TextEdges(BaseTextEdges):
|
||||||
"""Defines a dict of left, right and middle text edges found on
|
"""Defines a dict of left, right and middle text edges found on
|
||||||
|
|
@ -109,36 +146,20 @@ class TextEdges(BaseTextEdges):
|
||||||
super().__init__(HORIZONTAL_ALIGNMENTS)
|
super().__init__(HORIZONTAL_ALIGNMENTS)
|
||||||
self.edge_tol = edge_tol
|
self.edge_tol = edge_tol
|
||||||
|
|
||||||
def find(self, x_coord, align):
|
@staticmethod
|
||||||
"""Returns the index of an existing text edge using
|
def _create_new_text_edge(coord, textline, align=None):
|
||||||
the specified x coordinate and alignment.
|
y0 = textline.y0
|
||||||
"""
|
y1 = textline.y1
|
||||||
for i, te in enumerate(self._textedges[align]):
|
return TextEdge(coord, y0, y1, align=align)
|
||||||
if np.isclose(te.coord, x_coord, atol=0.5):
|
|
||||||
return i
|
|
||||||
return None
|
|
||||||
|
|
||||||
def add(self, coord, textline, align):
|
def add(self, coord, textline, align):
|
||||||
"""Adds a new text edge to the current dict.
|
"""Adds a new text edge to the current dict.
|
||||||
"""
|
"""
|
||||||
y0 = textline.y0
|
te = self._create_new_text_edge(coord, textline, align=align)
|
||||||
y1 = textline.y1
|
|
||||||
te = TextEdge(coord, y0, y1, align=align)
|
|
||||||
self._textedges[align].append(te)
|
self._textedges[align].append(te)
|
||||||
|
|
||||||
def update(self, textline):
|
def _update_edge(self, edge, coord, textline):
|
||||||
"""Updates an existing text edge in the current dict.
|
edge.update_coords(coord, textline.y0, self.edge_tol)
|
||||||
"""
|
|
||||||
coords = get_textline_coords(textline)
|
|
||||||
for alignment, edge_array in self._textedges.items():
|
|
||||||
x_coord = coords[alignment]
|
|
||||||
idx = self.find(x_coord, alignment)
|
|
||||||
if idx is None:
|
|
||||||
self.add(x_coord, textline, alignment)
|
|
||||||
else:
|
|
||||||
edge_array[idx].update_coords(
|
|
||||||
x_coord, textline.y0, edge_tol=self.edge_tol
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate(self, textlines):
|
def generate(self, textlines):
|
||||||
"""Generates the text edges dict based on horizontal text
|
"""Generates the text edges dict based on horizontal text
|
||||||
|
|
@ -146,7 +167,7 @@ class TextEdges(BaseTextEdges):
|
||||||
"""
|
"""
|
||||||
for tl in textlines:
|
for tl in textlines:
|
||||||
if len(tl.get_text().strip()) > 1: # TODO: hacky
|
if len(tl.get_text().strip()) > 1: # TODO: hacky
|
||||||
self.update(tl)
|
self._register_textline(tl)
|
||||||
|
|
||||||
def get_relevant(self):
|
def get_relevant(self):
|
||||||
"""Returns the list of relevant text edges (all share the same
|
"""Returns the list of relevant text edges (all share the same
|
||||||
|
|
|
||||||
|
|
@ -267,33 +267,12 @@ class TextEdges2(BaseTextEdges):
|
||||||
self.max_rows = None
|
self.max_rows = None
|
||||||
self.max_cols = None
|
self.max_cols = None
|
||||||
|
|
||||||
def _register_textline(self, textline):
|
@staticmethod
|
||||||
"""Updates an existing text edge in the current dict.
|
def _create_new_text_edge(coord, textline, align=None):
|
||||||
"""
|
return TextEdge2(coord, textline)
|
||||||
coords = get_textline_coords(textline)
|
|
||||||
for alignment, edge_array in self._textedges.items():
|
|
||||||
coord = coords[alignment]
|
|
||||||
|
|
||||||
# Find the index of the closest existing element (or 0 if none)
|
def _update_edge(self, edge, coord, textline):
|
||||||
idx_closest = get_index_closest_point(
|
edge.register_aligned_textline(textline, coord)
|
||||||
coord, edge_array, fn=lambda x: x.coord
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if the edges before/after are close enough
|
|
||||||
# that it can be considered aligned
|
|
||||||
idx_insert = None
|
|
||||||
if idx_closest is None:
|
|
||||||
idx_insert = 0
|
|
||||||
elif np.isclose(edge_array[idx_closest].coord, coord, atol=0.5):
|
|
||||||
closest_edge = edge_array[idx_closest]
|
|
||||||
closest_edge.register_aligned_textline(textline, coord)
|
|
||||||
elif edge_array[idx_closest].coord < coord:
|
|
||||||
idx_insert = idx_closest + 1
|
|
||||||
else:
|
|
||||||
idx_insert = idx_closest
|
|
||||||
if idx_insert is not None:
|
|
||||||
new_edge = TextEdge2(coord, textline)
|
|
||||||
edge_array.insert(idx_insert, new_edge)
|
|
||||||
|
|
||||||
def _register_all_text_lines(self, textlines):
|
def _register_all_text_lines(self, textlines):
|
||||||
"""Add all textlines to our edge repository to
|
"""Add all textlines to our edge repository to
|
||||||
|
|
|
||||||
|
|
@ -298,7 +298,7 @@ class PlotMethods(object):
|
||||||
table.debug_info["edges_searches"][0].plot_alignments(ax)
|
table.debug_info["edges_searches"][0].plot_alignments(ax)
|
||||||
else:
|
else:
|
||||||
for te in table._textedges:
|
for te in table._textedges:
|
||||||
ax.plot([te.x, te.x], [te.y0, te.y1])
|
ax.plot([te.coord, te.coord], [te.y0, te.y1])
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue