Remove pdfseparate subprocess call

pull/2/head
Vinayak Mehta 2016-07-20 16:26:42 +05:30
parent 85ffb00239
commit c612692c42
2 changed files with 14 additions and 7 deletions

View File

@ -8,8 +8,6 @@ The required dependencies include pdfminer, numpy, opencv.
For debugging, matplotlib is required. For runnings tests in the future, nose may be required. For debugging, matplotlib is required. For runnings tests in the future, nose may be required.
camelot also uses poppler-utils, more specifically `pdfseparate` to separate a pdf into pages, with ImageMagick's `convert` to convert each page into an image.
## Install ## Install
## Usage ## Usage

View File

@ -9,9 +9,9 @@ import shutil
import logging import logging
import zipfile import zipfile
import tempfile import tempfile
import subprocess
from docopt import docopt from docopt import docopt
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from PyPDF2 import PdfFileWriter, PdfFileReader
from lattice import lattice from lattice import lattice
from stream import stream from stream import stream
@ -135,12 +135,21 @@ if __name__ == '__main__':
print "separating pdf into pages" print "separating pdf into pages"
print print
if p == ['all']: if p == ['all']:
subprocess.call(['pdfseparate', os.path.join(tmpdir, fname), os.path.join(tmpdir, infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
'pg-%d.pdf')]) for i in range(infile.getNumPages()):
p = infile.getPage(i)
outfile = PdfFileWriter()
outfile.addPage(p)
with open(os.path.join(tmpdir, 'pg-%d.pdf' % (i + 1)), 'wb') as f:
outfile.write(f)
else: else:
for page in p: for page in p:
subprocess.call(['pdfseparate', '-f', page, '-l', page, os.path.join(tmpdir, fname), infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
os.path.join(tmpdir, 'pg-%s.pdf' % page)]) p = infile.getPage(int(page) - 1)
outfile = PdfFileWriter()
outfile.addPage(p)
with open(os.path.join(tmpdir, 'pg-%s.pdf' % page), 'wb') as f:
outfile.write(f)
glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf'))) glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf')))
if args['<method>'] == 'lattice': if args['<method>'] == 'lattice':