Remove pdfseparate subprocess call
parent
85ffb00239
commit
c612692c42
|
|
@ -8,8 +8,6 @@ The required dependencies include pdfminer, numpy, opencv.
|
||||||
|
|
||||||
For debugging, matplotlib is required. For runnings tests in the future, nose may be required.
|
For debugging, matplotlib is required. For runnings tests in the future, nose may be required.
|
||||||
|
|
||||||
camelot also uses poppler-utils, more specifically `pdfseparate` to separate a pdf into pages, with ImageMagick's `convert` to convert each page into an image.
|
|
||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
|
||||||
19
camelot.py
19
camelot.py
|
|
@ -9,9 +9,9 @@ import shutil
|
||||||
import logging
|
import logging
|
||||||
import zipfile
|
import zipfile
|
||||||
import tempfile
|
import tempfile
|
||||||
import subprocess
|
|
||||||
from docopt import docopt
|
from docopt import docopt
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
from PyPDF2 import PdfFileWriter, PdfFileReader
|
||||||
|
|
||||||
from lattice import lattice
|
from lattice import lattice
|
||||||
from stream import stream
|
from stream import stream
|
||||||
|
|
@ -135,12 +135,21 @@ if __name__ == '__main__':
|
||||||
print "separating pdf into pages"
|
print "separating pdf into pages"
|
||||||
print
|
print
|
||||||
if p == ['all']:
|
if p == ['all']:
|
||||||
subprocess.call(['pdfseparate', os.path.join(tmpdir, fname), os.path.join(tmpdir,
|
infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
|
||||||
'pg-%d.pdf')])
|
for i in range(infile.getNumPages()):
|
||||||
|
p = infile.getPage(i)
|
||||||
|
outfile = PdfFileWriter()
|
||||||
|
outfile.addPage(p)
|
||||||
|
with open(os.path.join(tmpdir, 'pg-%d.pdf' % (i + 1)), 'wb') as f:
|
||||||
|
outfile.write(f)
|
||||||
else:
|
else:
|
||||||
for page in p:
|
for page in p:
|
||||||
subprocess.call(['pdfseparate', '-f', page, '-l', page, os.path.join(tmpdir, fname),
|
infile = PdfFileReader(open(os.path.join(tmpdir, fname), 'rb'))
|
||||||
os.path.join(tmpdir, 'pg-%s.pdf' % page)])
|
p = infile.getPage(int(page) - 1)
|
||||||
|
outfile = PdfFileWriter()
|
||||||
|
outfile.addPage(p)
|
||||||
|
with open(os.path.join(tmpdir, 'pg-%s.pdf' % page), 'wb') as f:
|
||||||
|
outfile.write(f)
|
||||||
|
|
||||||
glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf')))
|
glob_pdf = sorted(glob.glob(os.path.join(tmpdir, 'pg-*.pdf')))
|
||||||
if args['<method>'] == 'lattice':
|
if args['<method>'] == 'lattice':
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue