From d34f8645f7da9324745426bee6e6f7083811a4d6 Mon Sep 17 00:00:00 2001 From: Fakabbir Amin Date: Mon, 15 Jul 2019 15:58:58 +0530 Subject: [PATCH] Replaces Ghostscript with python-pdfbox --- camelot/parsers/lattice.py | 15 +++------------ setup.py | 3 ++- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 197ff9f..74327ac 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -33,7 +33,7 @@ from ..image_processing import ( logger = logging.getLogger("camelot") - +import pdfbox class Lattice(BaseParser): """Lattice method of parsing looks for lines between text @@ -209,17 +209,8 @@ class Lattice(BaseParser): return t def _generate_image(self): - from ..ext.ghostscript import Ghostscript - - self.imagename = "".join([self.rootname, ".png"]) - gs_call = "-q -sDEVICE=png16m -o {} -r300 {}".format( - self.imagename, self.filename - ) - gs_call = gs_call.encode().split() - null = open(os.devnull, "wb") - with Ghostscript(*gs_call, stdout=null) as gs: - pass - null.close() + pdfbox.PDFBox().pdf_to_images(self.filename, outputPrefix=self.rootname) + self.imagename = str(self.rootname) + '1.jpg' def _generate_table_bbox(self): def scale_areas(areas): diff --git a/setup.py b/setup.py index b83f566..f7f97e4 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,8 @@ requires = [ 'openpyxl>=2.5.8', 'pandas>=0.23.4', 'pdfminer.six>=20170720', - 'PyPDF2>=1.26.0' + 'PyPDF2>=1.26.0', + 'python-pdfbox>=0.1.5' ] cv_requires = [