Replaces Ghostscript with python-pdfbox

pull/30/head
Fakabbir Amin 2019-07-15 15:58:58 +05:30
parent f7b94b3e57
commit d34f8645f7
2 changed files with 5 additions and 13 deletions

View File

@ -33,7 +33,7 @@ from ..image_processing import (
logger = logging.getLogger("camelot")
import pdfbox
class Lattice(BaseParser):
"""Lattice method of parsing looks for lines between text
@ -209,17 +209,8 @@ class Lattice(BaseParser):
return t
def _generate_image(self):
from ..ext.ghostscript import Ghostscript
self.imagename = "".join([self.rootname, ".png"])
gs_call = "-q -sDEVICE=png16m -o {} -r300 {}".format(
self.imagename, self.filename
)
gs_call = gs_call.encode().split()
null = open(os.devnull, "wb")
with Ghostscript(*gs_call, stdout=null) as gs:
pass
null.close()
pdfbox.PDFBox().pdf_to_images(self.filename, outputPrefix=self.rootname)
self.imagename = str(self.rootname) + '1.jpg'
def _generate_table_bbox(self):
def scale_areas(areas):

View File

@ -20,7 +20,8 @@ requires = [
'openpyxl>=2.5.8',
'pandas>=0.23.4',
'pdfminer.six>=20170720',
'PyPDF2>=1.26.0'
'PyPDF2>=1.26.0',
'python-pdfbox>=0.1.5'
]
cv_requires = [