Remove pdfseparate subprocess call

2016-07-20 16:26:42 +05:30 · 2016-07-20 16:26:42 +05:30 · c612692c42
parent 85ffb00239
commit c612692c42
2 changed files with 14 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -8,8 +8,6 @@ The required dependencies include pdfminer, numpy, opencv.

 For debugging, matplotlib is required. For runnings tests in the future, nose may be required.

-camelot also uses poppler-utils, more specifically `pdfseparate` to separate a pdf into pages, with ImageMagick's `convert` to convert each page into an image.
-
 ## Install

 ## Usage
--- a/camelot.py
+++ b/camelot.py
@ -9,9 +9,9 @@ import shutil
 import logging
 import zipfile
 import tempfile
-import subprocess
 from docopt import docopt
 from werkzeug.utils import secure_filename
+from PyPDF2 import PdfFileWriter, PdfFileReader

 from lattice import lattice
 from stream import stream
@ -135,12 +135,21 @@ if __name__ == '__main__':
    print "separating pdf into pages"
    print
    if p == ['all']:
-        subprocess.call(['pdfseparate', os.path.join(tmpdir, fname), os.path.join(tmpdir,
-                        'pg-%d.pdf')])
+        infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
+        for i in range(infile.getNumPages()):
+            p = infile.getPage(i)
+            outfile = PdfFileWriter()
+            outfile.addPage(p)
+            with open(os.path.join(tmpdir, 'pg-%d.pdf' % (i + 1)), 'wb') as f:
+                outfile.write(f)
    else:
        for page in p:
-            subprocess.call(['pdfseparate', '-f', page, '-l', page, os.path.join(tmpdir, fname),
-                            os.path.join(tmpdir, 'pg-%s.pdf' % page)])
+            infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
+            p = infile.getPage(int(page) - 1)
+            outfile = PdfFileWriter()
+            outfile.addPage(p)
+            with open(os.path.join(tmpdir, 'pg-%s.pdf' % page), 'wb') as f:
+                outfile.write(f)

    glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf')))
    if args['<method>'] == 'lattice':