Add image conversion backends

2021-06-28 01:58:45 +05:30 · 2021-06-28 01:58:45 +05:30 · 8563a09544
parent fdade4502e
commit 8563a09544
6 changed files with 80 additions and 38 deletions
--- a/camelot/backends/init.py
+++ b/camelot/backends/init.py
@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from .image_conversion import ImageConversionBackend
--- a/camelot/backends/ghostscript_backend.py
+++ b/camelot/backends/ghostscript_backend.py
@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+import ghostscript
+
+
+class GhostscriptBackend(object):
+    def convert(self, pdf_path, png_path, resolution=300):
+        gs_args = [
+            "gs",
+            "-q",
+            "-sDEVICE=png16m",
+            "-o",
+            png_path,
+            f"-r{resolution}",
+            pdf_path,
+        ]
+        ghostscript.Ghostscript(*gs_args)
--- a/camelot/backends/image_conversion.py
+++ b/camelot/backends/image_conversion.py
@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+from .poppler_backend import PopplerBackend
+from .ghostscript_backend import GhostscriptBackend
+
+backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
+
+
+class ImageConversionBackend(object):
+    def __init__(self, backend="poppler"):
+        self.backend = backend
+
+    def convert(self, pdf_path, png_path):
+        converter = backends[self.backend]()
+        converter.convert(pdf_path, png_path)
--- a/camelot/backends/poppler_backend.py
+++ b/camelot/backends/poppler_backend.py
@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+
+from pdftopng import pdftopng
+
+
+class PopplerBackend(object):
+    def convert(self, pdf_path, png_path):
+        pdftopng.convert(pdf_path, png_path)
--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -29,6 +29,7 @@ from ..image_processing import (
    find_contours,
    find_joints,
 )
+from ..backends import ImageConversionBackend


 logger = logging.getLogger("camelot")
@ -111,7 +112,7 @@ class Lattice(BaseParser):
        threshold_constant=-2,
        iterations=0,
        resolution=300,
-        **kwargs
+        **kwargs,
    ):
        self.table_regions = table_regions
        self.table_areas = table_areas
@ -208,9 +209,8 @@ class Lattice(BaseParser):
        return t

    def _generate_image(self):
-        from pdftopng import pdftopng
-
-        pdftopng.convert(pdf_path=self.filename, png_path=self.imagename)
+        converter = ImageConversionBackend()
+        converter.convert(self.filename, self.imagename)

    def _generate_table_bbox(self):
        def scale_areas(areas):
--- a/setup.py
+++ b/setup.py
@ -24,10 +24,7 @@ requires = [
    "tabulate>=0.8.9",
 ]

-base_requires = [
-    'opencv-python>=3.4.2.17',
-    'pdftopng>=0.1.1'
-]
+base_requires = ["ghostscript>=0.7", "opencv-python>=3.4.2.17", "pdftopng>=0.1.1"]

 plot_requires = [
    "matplotlib>=2.2.3",
@ -48,36 +45,38 @@ dev_requires = dev_requires + all_requires


 def setup_package():
-    metadata = dict(name=about['__title__'],
-                    version=about['__version__'],
-                    description=about['__description__'],
+    metadata = dict(
+        name=about["__title__"],
+        version=about["__version__"],
+        description=about["__description__"],
        long_description=readme,
        long_description_content_type="text/markdown",
-                    url=about['__url__'],
-                    author=about['__author__'],
-                    author_email=about['__author_email__'],
-                    license=about['__license__'],
-                    packages=find_packages(exclude=('tests',)),
+        url=about["__url__"],
+        author=about["__author__"],
+        author_email=about["__author_email__"],
+        license=about["__license__"],
+        packages=find_packages(exclude=("tests",)),
        install_requires=requires,
        extras_require={
-                        'all': all_requires,
-                        'base': base_requires,
-                        'dev': dev_requires,
-                        'plot': plot_requires
+            "all": all_requires,
+            "base": base_requires,
+            "dev": dev_requires,
+            "plot": plot_requires,
        },
        entry_points={
-                        'console_scripts': [
-                            'camelot = camelot.cli:cli',
+            "console_scripts": [
+                "camelot = camelot.cli:cli",
            ],
        },
        classifiers=[
            # Trove classifiers
            # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
-                        'License :: OSI Approved :: MIT License',
-                        'Programming Language :: Python :: 3.6',
-                        'Programming Language :: Python :: 3.7',
-                        'Programming Language :: Python :: 3.8'
-                    ])
+            "License :: OSI Approved :: MIT License",
+            "Programming Language :: Python :: 3.6",
+            "Programming Language :: Python :: 3.7",
+            "Programming Language :: Python :: 3.8",
+        ],
+    )

    try:
        from setuptools import setup