Add fallbacks to image conversion

pull/198/head
Vinayak Mehta 2021-06-28 03:16:54 +05:30
parent a96702987f
commit 3e4e848a09
No known key found for this signature in database
GPG Key ID: 2DE013537A15A9A4
2 changed files with 32 additions and 8 deletions

View File

@ -1,15 +1,41 @@
# -*- coding: utf-8 -*-
import logging
from .poppler_backend import PopplerBackend
from .ghostscript_backend import GhostscriptBackend
logger = logging.getLogger("camelot")
backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
class ImageConversionBackend(object):
def __init__(self, backend="poppler"):
def __init__(self, backend="poppler", use_fallback=True):
if backend not in backends.keys():
raise ValueError(f"Image conversion backend '{backend}' not supported")
self.backend = backend
self.use_fallback = use_fallback
self.fallbacks = list(filter(lambda x: x != backend, backends.keys()))
def convert(self, pdf_path, png_path):
converter = backends[self.backend]()
converter.convert(pdf_path, png_path)
try:
converter = backends[self.backend]()
converter.convert(pdf_path, png_path)
except Exception as e:
logger.info(f"Image conversion backend '{self.backend}' failed with {str(e)}")
if self.use_fallback:
for fallback in self.fallbacks:
logger.info(f"Falling back on '{fallback}'")
try:
converter = backends[self.backend]()
converter.convert(pdf_path, png_path)
except Exception as e:
logger.info(f"Image conversion backend '{fallback}' failed with {str(e)}")
continue
else:
logger.info(f"Image conversion backend '{fallback}' succeeded")
break

View File

@ -129,6 +129,7 @@ class Lattice(BaseParser):
self.threshold_constant = threshold_constant
self.iterations = iterations
self.resolution = resolution
self.backend = ImageConversionBackend()
@staticmethod
def _reduce_index(t, idx, shift_text):
@ -208,10 +209,6 @@ class Lattice(BaseParser):
t.cells[i][j].text = t.cells[i - 1][j].text
return t
def _generate_image(self):
converter = ImageConversionBackend()
converter.convert(self.filename, self.imagename)
def _generate_table_bbox(self):
def scale_areas(areas):
scaled_areas = []
@ -391,7 +388,8 @@ class Lattice(BaseParser):
)
return []
self._generate_image()
self.backend.convert(self.filename, self.imagename)
self._generate_table_bbox()
_tables = []