Remove imagemagick subprocess call

pull/2/head
Vinayak Mehta 2016-07-20 15:40:01 +05:30 committed by GitHub
parent 7aebcee7e3
commit 85ffb00239
3 changed files with 5 additions and 6 deletions

View File

@ -147,10 +147,8 @@ if __name__ == '__main__':
print "using the lattice method"
for g in glob_pdf:
g_fname = os.path.basename(g)
print "working on", g_fname
g_froot, __ = os.path.splitext(g)
print "converting %s to image" % g_fname
os.system(' '.join(['convert', '-density', '300',
g, '-depth', '8', g_froot + '.png']))
try:
data = lattice(g, f=args['--fill'], s=int(args['--scale']),
jtol=int(args['--jtol']), mtol=int(args['--mtol']),
@ -174,6 +172,7 @@ if __name__ == '__main__':
print "using the stream method"
for g in glob_pdf:
g_fname = os.path.basename(g)
print "working on", g_fname
g_froot, __ = os.path.splitext(g)
try:
data = stream(g, ncolumns=int(args['--ncols']), columns=args['--columns'],

View File

@ -2,6 +2,7 @@ import os
import cv2
import glob
import numpy as np
from wand.image import Image
from table import Table
from pdf import get_pdf_info
@ -150,10 +151,10 @@ def lattice(filepath, f=None, s=15, jtol=2, mtol=2, invert=False, debug=None):
"""
if debug:
import matplotlib.pyplot as plt
filename = os.path.basename(filepath)
print "working on", filename
fileroot, __ = os.path.splitext(filepath)
imagename = fileroot + '.png'
with Image(filename=filepath, depth=8, resolution=300) as png:
png.save(filename=imagename)
img = cv2.imread(imagename)
img_x, img_y = img.shape[1], img.shape[0]
text, pdf_x, pdf_y = get_pdf_info(filepath, method='lattice')

View File

@ -70,7 +70,6 @@ def stream(filepath, ncolumns=0, columns=None, char_margin=2.0,
output : list
"""
filename = os.path.basename(filepath)
print "working on", filename
text, __, __ = get_pdf_info(filepath, method='stream', char_margin=char_margin,
line_margin=line_margin, word_margin=word_margin)
text.sort(key=lambda x: (-x.y0, x.x0))