diff --git a/camelot/backends/__init__.py b/camelot/backends/__init__.py new file mode 100644 index 0000000..8d0b91e --- /dev/null +++ b/camelot/backends/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +from .image_conversion import ImageConversionBackend diff --git a/camelot/backends/ghostscript_backend.py b/camelot/backends/ghostscript_backend.py new file mode 100644 index 0000000..e0c2f42 --- /dev/null +++ b/camelot/backends/ghostscript_backend.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + +import ghostscript + + +class GhostscriptBackend(object): + def convert(self, pdf_path, png_path, resolution=300): + gs_args = [ + "gs", + "-q", + "-sDEVICE=png16m", + "-o", + png_path, + f"-r{resolution}", + pdf_path, + ] + ghostscript.Ghostscript(*gs_args) diff --git a/camelot/backends/image_conversion.py b/camelot/backends/image_conversion.py new file mode 100644 index 0000000..6652414 --- /dev/null +++ b/camelot/backends/image_conversion.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +from .poppler_backend import PopplerBackend +from .ghostscript_backend import GhostscriptBackend + +backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend} + + +class ImageConversionBackend(object): + def __init__(self, backend="poppler"): + self.backend = backend + + def convert(self, pdf_path, png_path): + converter = backends[self.backend]() + converter.convert(pdf_path, png_path) diff --git a/camelot/backends/poppler_backend.py b/camelot/backends/poppler_backend.py new file mode 100644 index 0000000..c806098 --- /dev/null +++ b/camelot/backends/poppler_backend.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +from pdftopng import pdftopng + + +class PopplerBackend(object): + def convert(self, pdf_path, png_path): + pdftopng.convert(pdf_path, png_path) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index dac072a..ff47bfc 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -29,6 +29,7 @@ from ..image_processing import ( find_contours, find_joints, ) +from ..backends import ImageConversionBackend logger = logging.getLogger("camelot") @@ -111,7 +112,7 @@ class Lattice(BaseParser): threshold_constant=-2, iterations=0, resolution=300, - **kwargs + **kwargs, ): self.table_regions = table_regions self.table_areas = table_areas @@ -208,9 +209,8 @@ class Lattice(BaseParser): return t def _generate_image(self): - from pdftopng import pdftopng - - pdftopng.convert(pdf_path=self.filename, png_path=self.imagename) + converter = ImageConversionBackend() + converter.convert(self.filename, self.imagename) def _generate_table_bbox(self): def scale_areas(areas): diff --git a/setup.py b/setup.py index 1307fbe..5e99ea8 100644 --- a/setup.py +++ b/setup.py @@ -24,10 +24,7 @@ requires = [ "tabulate>=0.8.9", ] -base_requires = [ - 'opencv-python>=3.4.2.17', - 'pdftopng>=0.1.1' -] +base_requires = ["ghostscript>=0.7", "opencv-python>=3.4.2.17", "pdftopng>=0.1.1"] plot_requires = [ "matplotlib>=2.2.3", @@ -48,36 +45,38 @@ dev_requires = dev_requires + all_requires def setup_package(): - metadata = dict(name=about['__title__'], - version=about['__version__'], - description=about['__description__'], - long_description=readme, - long_description_content_type="text/markdown", - url=about['__url__'], - author=about['__author__'], - author_email=about['__author_email__'], - license=about['__license__'], - packages=find_packages(exclude=('tests',)), - install_requires=requires, - extras_require={ - 'all': all_requires, - 'base': base_requires, - 'dev': dev_requires, - 'plot': plot_requires - }, - entry_points={ - 'console_scripts': [ - 'camelot = camelot.cli:cli', - ], - }, - classifiers=[ - # Trove classifiers - # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8' - ]) + metadata = dict( + name=about["__title__"], + version=about["__version__"], + description=about["__description__"], + long_description=readme, + long_description_content_type="text/markdown", + url=about["__url__"], + author=about["__author__"], + author_email=about["__author_email__"], + license=about["__license__"], + packages=find_packages(exclude=("tests",)), + install_requires=requires, + extras_require={ + "all": all_requires, + "base": base_requires, + "dev": dev_requires, + "plot": plot_requires, + }, + entry_points={ + "console_scripts": [ + "camelot = camelot.cli:cli", + ], + }, + classifiers=[ + # Trove classifiers + # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], + ) try: from setuptools import setup