diff --git a/README.md b/README.md index e6712e7..3e11120 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,6 @@ The required dependencies include pdfminer, numpy, opencv. For debugging, matplotlib is required. For runnings tests in the future, nose may be required. -camelot also uses poppler-utils, more specifically `pdfseparate` to separate a pdf into pages, with ImageMagick's `convert` to convert each page into an image. - ## Install ## Usage diff --git a/camelot.py b/camelot.py index 37b91f6..9d72a0f 100755 --- a/camelot.py +++ b/camelot.py @@ -9,9 +9,9 @@ import shutil import logging import zipfile import tempfile -import subprocess from docopt import docopt from werkzeug.utils import secure_filename +from PyPDF2 import PdfFileWriter, PdfFileReader from lattice import lattice from stream import stream @@ -135,12 +135,21 @@ if __name__ == '__main__': print "separating pdf into pages" print if p == ['all']: - subprocess.call(['pdfseparate', os.path.join(tmpdir, fname), os.path.join(tmpdir, - 'pg-%d.pdf')]) + infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb')) + for i in range(infile.getNumPages()): + p = infile.getPage(i) + outfile = PdfFileWriter() + outfile.addPage(p) + with open(os.path.join(tmpdir, 'pg-%d.pdf' % (i + 1)), 'wb') as f: + outfile.write(f) else: for page in p: - subprocess.call(['pdfseparate', '-f', page, '-l', page, os.path.join(tmpdir, fname), - os.path.join(tmpdir, 'pg-%s.pdf' % page)]) + infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb')) + p = infile.getPage(int(page) - 1) + outfile = PdfFileWriter() + outfile.addPage(p) + with open(os.path.join(tmpdir, 'pg-%s.pdf' % page), 'wb') as f: + outfile.write(f) glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf'))) if args[''] == 'lattice':