From 3e4e848a09c1404b30d0f15432213ba499290f5f Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Mon, 28 Jun 2021 03:16:54 +0530 Subject: [PATCH] Add fallbacks to image conversion --- camelot/backends/image_conversion.py | 32 +++++++++++++++++++++++++--- camelot/parsers/lattice.py | 8 +++---- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/camelot/backends/image_conversion.py b/camelot/backends/image_conversion.py index 6652414..9b07fa0 100644 --- a/camelot/backends/image_conversion.py +++ b/camelot/backends/image_conversion.py @@ -1,15 +1,41 @@ # -*- coding: utf-8 -*- +import logging + from .poppler_backend import PopplerBackend from .ghostscript_backend import GhostscriptBackend +logger = logging.getLogger("camelot") backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend} class ImageConversionBackend(object): - def __init__(self, backend="poppler"): + def __init__(self, backend="poppler", use_fallback=True): + if backend not in backends.keys(): + raise ValueError(f"Image conversion backend '{backend}' not supported") + self.backend = backend + self.use_fallback = use_fallback + self.fallbacks = list(filter(lambda x: x != backend, backends.keys())) def convert(self, pdf_path, png_path): - converter = backends[self.backend]() - converter.convert(pdf_path, png_path) + try: + converter = backends[self.backend]() + converter.convert(pdf_path, png_path) + except Exception as e: + logger.info(f"Image conversion backend '{self.backend}' failed with {str(e)}") + + if self.use_fallback: + for fallback in self.fallbacks: + logger.info(f"Falling back on '{fallback}'") + + try: + converter = backends[self.backend]() + converter.convert(pdf_path, png_path) + except Exception as e: + logger.info(f"Image conversion backend '{fallback}' failed with {str(e)}") + + continue + else: + logger.info(f"Image conversion backend '{fallback}' succeeded") + break diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index ff47bfc..50530cc 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -129,6 +129,7 @@ class Lattice(BaseParser): self.threshold_constant = threshold_constant self.iterations = iterations self.resolution = resolution + self.backend = ImageConversionBackend() @staticmethod def _reduce_index(t, idx, shift_text): @@ -208,10 +209,6 @@ class Lattice(BaseParser): t.cells[i][j].text = t.cells[i - 1][j].text return t - def _generate_image(self): - converter = ImageConversionBackend() - converter.convert(self.filename, self.imagename) - def _generate_table_bbox(self): def scale_areas(areas): scaled_areas = [] @@ -391,7 +388,8 @@ class Lattice(BaseParser): ) return [] - self._generate_image() + self.backend.convert(self.filename, self.imagename) + self._generate_table_bbox() _tables = []