Add fallbacks to image conversion
parent
a96702987f
commit
3e4e848a09
|
|
@ -1,15 +1,41 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
||||
from .poppler_backend import PopplerBackend
|
||||
from .ghostscript_backend import GhostscriptBackend
|
||||
|
||||
logger = logging.getLogger("camelot")
|
||||
backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
|
||||
|
||||
|
||||
class ImageConversionBackend(object):
|
||||
def __init__(self, backend="poppler"):
|
||||
def __init__(self, backend="poppler", use_fallback=True):
|
||||
if backend not in backends.keys():
|
||||
raise ValueError(f"Image conversion backend '{backend}' not supported")
|
||||
|
||||
self.backend = backend
|
||||
self.use_fallback = use_fallback
|
||||
self.fallbacks = list(filter(lambda x: x != backend, backends.keys()))
|
||||
|
||||
def convert(self, pdf_path, png_path):
|
||||
converter = backends[self.backend]()
|
||||
converter.convert(pdf_path, png_path)
|
||||
try:
|
||||
converter = backends[self.backend]()
|
||||
converter.convert(pdf_path, png_path)
|
||||
except Exception as e:
|
||||
logger.info(f"Image conversion backend '{self.backend}' failed with {str(e)}")
|
||||
|
||||
if self.use_fallback:
|
||||
for fallback in self.fallbacks:
|
||||
logger.info(f"Falling back on '{fallback}'")
|
||||
|
||||
try:
|
||||
converter = backends[self.backend]()
|
||||
converter.convert(pdf_path, png_path)
|
||||
except Exception as e:
|
||||
logger.info(f"Image conversion backend '{fallback}' failed with {str(e)}")
|
||||
|
||||
continue
|
||||
else:
|
||||
logger.info(f"Image conversion backend '{fallback}' succeeded")
|
||||
break
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@ class Lattice(BaseParser):
|
|||
self.threshold_constant = threshold_constant
|
||||
self.iterations = iterations
|
||||
self.resolution = resolution
|
||||
self.backend = ImageConversionBackend()
|
||||
|
||||
@staticmethod
|
||||
def _reduce_index(t, idx, shift_text):
|
||||
|
|
@ -208,10 +209,6 @@ class Lattice(BaseParser):
|
|||
t.cells[i][j].text = t.cells[i - 1][j].text
|
||||
return t
|
||||
|
||||
def _generate_image(self):
|
||||
converter = ImageConversionBackend()
|
||||
converter.convert(self.filename, self.imagename)
|
||||
|
||||
def _generate_table_bbox(self):
|
||||
def scale_areas(areas):
|
||||
scaled_areas = []
|
||||
|
|
@ -391,7 +388,8 @@ class Lattice(BaseParser):
|
|||
)
|
||||
return []
|
||||
|
||||
self._generate_image()
|
||||
self.backend.convert(self.filename, self.imagename)
|
||||
|
||||
self._generate_table_bbox()
|
||||
|
||||
_tables = []
|
||||
|
|
|
|||
Loading…
Reference in New Issue