Add fallbacks to image conversion
parent
a96702987f
commit
3e4e848a09
|
|
@ -1,15 +1,41 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from .poppler_backend import PopplerBackend
|
from .poppler_backend import PopplerBackend
|
||||||
from .ghostscript_backend import GhostscriptBackend
|
from .ghostscript_backend import GhostscriptBackend
|
||||||
|
|
||||||
|
logger = logging.getLogger("camelot")
|
||||||
backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
|
backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
|
||||||
|
|
||||||
|
|
||||||
class ImageConversionBackend(object):
|
class ImageConversionBackend(object):
|
||||||
def __init__(self, backend="poppler"):
|
def __init__(self, backend="poppler", use_fallback=True):
|
||||||
|
if backend not in backends.keys():
|
||||||
|
raise ValueError(f"Image conversion backend '{backend}' not supported")
|
||||||
|
|
||||||
self.backend = backend
|
self.backend = backend
|
||||||
|
self.use_fallback = use_fallback
|
||||||
|
self.fallbacks = list(filter(lambda x: x != backend, backends.keys()))
|
||||||
|
|
||||||
def convert(self, pdf_path, png_path):
|
def convert(self, pdf_path, png_path):
|
||||||
|
try:
|
||||||
converter = backends[self.backend]()
|
converter = backends[self.backend]()
|
||||||
converter.convert(pdf_path, png_path)
|
converter.convert(pdf_path, png_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.info(f"Image conversion backend '{self.backend}' failed with {str(e)}")
|
||||||
|
|
||||||
|
if self.use_fallback:
|
||||||
|
for fallback in self.fallbacks:
|
||||||
|
logger.info(f"Falling back on '{fallback}'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
converter = backends[self.backend]()
|
||||||
|
converter.convert(pdf_path, png_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.info(f"Image conversion backend '{fallback}' failed with {str(e)}")
|
||||||
|
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
logger.info(f"Image conversion backend '{fallback}' succeeded")
|
||||||
|
break
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,7 @@ class Lattice(BaseParser):
|
||||||
self.threshold_constant = threshold_constant
|
self.threshold_constant = threshold_constant
|
||||||
self.iterations = iterations
|
self.iterations = iterations
|
||||||
self.resolution = resolution
|
self.resolution = resolution
|
||||||
|
self.backend = ImageConversionBackend()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _reduce_index(t, idx, shift_text):
|
def _reduce_index(t, idx, shift_text):
|
||||||
|
|
@ -208,10 +209,6 @@ class Lattice(BaseParser):
|
||||||
t.cells[i][j].text = t.cells[i - 1][j].text
|
t.cells[i][j].text = t.cells[i - 1][j].text
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def _generate_image(self):
|
|
||||||
converter = ImageConversionBackend()
|
|
||||||
converter.convert(self.filename, self.imagename)
|
|
||||||
|
|
||||||
def _generate_table_bbox(self):
|
def _generate_table_bbox(self):
|
||||||
def scale_areas(areas):
|
def scale_areas(areas):
|
||||||
scaled_areas = []
|
scaled_areas = []
|
||||||
|
|
@ -391,7 +388,8 @@ class Lattice(BaseParser):
|
||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
self._generate_image()
|
self.backend.convert(self.filename, self.imagename)
|
||||||
|
|
||||||
self._generate_table_bbox()
|
self._generate_table_bbox()
|
||||||
|
|
||||||
_tables = []
|
_tables = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue