Add image conversion backends

pull/198/head
Vinayak Mehta 2021-06-28 01:58:45 +05:30
parent fdade4502e
commit 8563a09544
No known key found for this signature in database
GPG Key ID: 2DE013537A15A9A4
6 changed files with 80 additions and 38 deletions

View File

@ -0,0 +1,3 @@
# -*- coding: utf-8 -*-
from .image_conversion import ImageConversionBackend

View File

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
import ghostscript
class GhostscriptBackend(object):
def convert(self, pdf_path, png_path, resolution=300):
gs_args = [
"gs",
"-q",
"-sDEVICE=png16m",
"-o",
png_path,
f"-r{resolution}",
pdf_path,
]
ghostscript.Ghostscript(*gs_args)

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
from .poppler_backend import PopplerBackend
from .ghostscript_backend import GhostscriptBackend
backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
class ImageConversionBackend(object):
def __init__(self, backend="poppler"):
self.backend = backend
def convert(self, pdf_path, png_path):
converter = backends[self.backend]()
converter.convert(pdf_path, png_path)

View File

@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
from pdftopng import pdftopng
class PopplerBackend(object):
def convert(self, pdf_path, png_path):
pdftopng.convert(pdf_path, png_path)

View File

@ -29,6 +29,7 @@ from ..image_processing import (
find_contours, find_contours,
find_joints, find_joints,
) )
from ..backends import ImageConversionBackend
logger = logging.getLogger("camelot") logger = logging.getLogger("camelot")
@ -111,7 +112,7 @@ class Lattice(BaseParser):
threshold_constant=-2, threshold_constant=-2,
iterations=0, iterations=0,
resolution=300, resolution=300,
**kwargs **kwargs,
): ):
self.table_regions = table_regions self.table_regions = table_regions
self.table_areas = table_areas self.table_areas = table_areas
@ -208,9 +209,8 @@ class Lattice(BaseParser):
return t return t
def _generate_image(self): def _generate_image(self):
from pdftopng import pdftopng converter = ImageConversionBackend()
converter.convert(self.filename, self.imagename)
pdftopng.convert(pdf_path=self.filename, png_path=self.imagename)
def _generate_table_bbox(self): def _generate_table_bbox(self):
def scale_areas(areas): def scale_areas(areas):

View File

@ -24,10 +24,7 @@ requires = [
"tabulate>=0.8.9", "tabulate>=0.8.9",
] ]
base_requires = [ base_requires = ["ghostscript>=0.7", "opencv-python>=3.4.2.17", "pdftopng>=0.1.1"]
'opencv-python>=3.4.2.17',
'pdftopng>=0.1.1'
]
plot_requires = [ plot_requires = [
"matplotlib>=2.2.3", "matplotlib>=2.2.3",
@ -48,36 +45,38 @@ dev_requires = dev_requires + all_requires
def setup_package(): def setup_package():
metadata = dict(name=about['__title__'], metadata = dict(
version=about['__version__'], name=about["__title__"],
description=about['__description__'], version=about["__version__"],
long_description=readme, description=about["__description__"],
long_description_content_type="text/markdown", long_description=readme,
url=about['__url__'], long_description_content_type="text/markdown",
author=about['__author__'], url=about["__url__"],
author_email=about['__author_email__'], author=about["__author__"],
license=about['__license__'], author_email=about["__author_email__"],
packages=find_packages(exclude=('tests',)), license=about["__license__"],
install_requires=requires, packages=find_packages(exclude=("tests",)),
extras_require={ install_requires=requires,
'all': all_requires, extras_require={
'base': base_requires, "all": all_requires,
'dev': dev_requires, "base": base_requires,
'plot': plot_requires "dev": dev_requires,
}, "plot": plot_requires,
entry_points={ },
'console_scripts': [ entry_points={
'camelot = camelot.cli:cli', "console_scripts": [
], "camelot = camelot.cli:cli",
}, ],
classifiers=[ },
# Trove classifiers classifiers=[
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # Trove classifiers
'License :: OSI Approved :: MIT License', # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'Programming Language :: Python :: 3.6', "License :: OSI Approved :: MIT License",
'Programming Language :: Python :: 3.7', "Programming Language :: Python :: 3.6",
'Programming Language :: Python :: 3.8' "Programming Language :: Python :: 3.7",
]) "Programming Language :: Python :: 3.8",
],
)
try: try:
from setuptools import setup from setuptools import setup