Remove pdfseparate subprocess call

pull/2/head
Vinayak Mehta 2016-07-20 16:26:42 +05:30
parent 85ffb00239
commit c612692c42
2 changed files with 14 additions and 7 deletions

View File

@ -8,8 +8,6 @@ The required dependencies include pdfminer, numpy, opencv.
For debugging, matplotlib is required. For runnings tests in the future, nose may be required.
camelot also uses poppler-utils, more specifically `pdfseparate` to separate a pdf into pages, with ImageMagick's `convert` to convert each page into an image.
## Install
## Usage

View File

@ -9,9 +9,9 @@ import shutil
import logging
import zipfile
import tempfile
import subprocess
from docopt import docopt
from werkzeug.utils import secure_filename
from PyPDF2 import PdfFileWriter, PdfFileReader
from lattice import lattice
from stream import stream
@ -135,12 +135,21 @@ if __name__ == '__main__':
print "separating pdf into pages"
print
if p == ['all']:
subprocess.call(['pdfseparate', os.path.join(tmpdir, fname), os.path.join(tmpdir,
'pg-%d.pdf')])
infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
for i in range(infile.getNumPages()):
p = infile.getPage(i)
outfile = PdfFileWriter()
outfile.addPage(p)
with open(os.path.join(tmpdir, 'pg-%d.pdf' % (i + 1)), 'wb') as f:
outfile.write(f)
else:
for page in p:
subprocess.call(['pdfseparate', '-f', page, '-l', page, os.path.join(tmpdir, fname),
os.path.join(tmpdir, 'pg-%s.pdf' % page)])
infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
p = infile.getPage(int(page) - 1)
outfile = PdfFileWriter()
outfile.addPage(p)
with open(os.path.join(tmpdir, 'pg-%s.pdf' % page), 'wb') as f:
outfile.write(f)
glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf')))
if args['<method>'] == 'lattice':