Fix plot_text

pull/2/head
Vinayak Mehta 2018-09-23 11:45:20 +05:30
parent 3170a9689f
commit 71d91fbebd
4 changed files with 19 additions and 10 deletions

View File

@ -333,7 +333,8 @@ class Table(object):
""" """
if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']: if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'") raise NotImplementedError("{} cannot be plotted with flavor='stream'".format(
geometry_type))
if geometry_type == 'text': if geometry_type == 'text':
plot_text(self._text) plot_text(self._text)
@ -444,13 +445,25 @@ class TableList(object):
def __getitem__(self, idx): def __getitem__(self, idx):
return self._tables[idx] return self._tables[idx]
def __iter__(self):
self._n = 0
return self
def next(self):
if self._n < len(self):
r = self._tables[self._n]
self._n += 1
return r
else:
raise StopIteration
@staticmethod @staticmethod
def _format_func(table, f): def _format_func(table, f):
return getattr(table, 'to_{}'.format(f)) return getattr(table, 'to_{}'.format(f))
@property @property
def n(self): def n(self):
return len(self._tables) return len(self)
def _write_file(self, f=None, **kwargs): def _write_file(self, f=None, **kwargs):
dirname = kwargs.get('dirname') dirname = kwargs.get('dirname')

View File

@ -88,5 +88,5 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
validate_input(kwargs, flavor=flavor) validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages) p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor) kwargs = remove_extra(kwargs, flavor=flavor)
tables, __ = p.parse(flavor=flavor, **kwargs) tables = p.parse(flavor=flavor, **kwargs)
return tables return tables

View File

@ -1,6 +1,5 @@
import os import os
from ..core import Geometry
from ..utils import get_page_layout, get_text_objects from ..utils import get_page_layout, get_text_objects
@ -17,5 +16,4 @@ class BaseParser(object):
self.horizontal_text = get_text_objects(self.layout, ltype="lh") self.horizontal_text = get_text_objects(self.layout, ltype="lh")
self.vertical_text = get_text_objects(self.layout, ltype="lv") self.vertical_text = get_text_objects(self.layout, ltype="lv")
self.pdf_width, self.pdf_height = self.dimensions self.pdf_width, self.pdf_height = self.dimensions
self.rootname, __ = os.path.splitext(self.filename) self.rootname, __ = os.path.splitext(self.filename)
self.g = Geometry()

View File

@ -8,8 +8,8 @@ def plot_text(text):
ax = fig.add_subplot(111, aspect='equal') ax = fig.add_subplot(111, aspect='equal')
xs, ys = [], [] xs, ys = [], []
for t in text: for t in text:
xs.extend([t[0], t[1]]) xs.extend([t[0], t[2]])
ys.extend([t[2], t[3]]) ys.extend([t[1], t[3]])
ax.add_patch( ax.add_patch(
patches.Rectangle( patches.Rectangle(
(t[0], t[1]), (t[0], t[1]),
@ -57,9 +57,7 @@ def plot_joint(image):
for coord in table_bbox[k]: for coord in table_bbox[k]:
x_coord.append(coord[0]) x_coord.append(coord[0])
y_coord.append(coord[1]) y_coord.append(coord[1])
max_x, max_y = max(x_coord), max(y_coord)
plt.plot(x_coord, y_coord, 'ro') plt.plot(x_coord, y_coord, 'ro')
plt.axis([0, max_x + 100, max_y + 100, 0])
plt.imshow(img) plt.imshow(img)
plt.show() plt.show()