Remove imagemagick subprocess call

pull/2/head
Vinayak Mehta 2016-07-20 15:40:01 +05:30 committed by GitHub
parent 7aebcee7e3
commit 85ffb00239
3 changed files with 5 additions and 6 deletions

View File

@ -147,10 +147,8 @@ if __name__ == '__main__':
print "using the lattice method" print "using the lattice method"
for g in glob_pdf: for g in glob_pdf:
g_fname = os.path.basename(g) g_fname = os.path.basename(g)
print "working on", g_fname
g_froot, __ = os.path.splitext(g) g_froot, __ = os.path.splitext(g)
print "converting %s to image" % g_fname
os.system(' '.join(['convert', '-density', '300',
g, '-depth', '8', g_froot + '.png']))
try: try:
data = lattice(g, f=args['--fill'], s=int(args['--scale']), data = lattice(g, f=args['--fill'], s=int(args['--scale']),
jtol=int(args['--jtol']), mtol=int(args['--mtol']), jtol=int(args['--jtol']), mtol=int(args['--mtol']),
@ -174,6 +172,7 @@ if __name__ == '__main__':
print "using the stream method" print "using the stream method"
for g in glob_pdf: for g in glob_pdf:
g_fname = os.path.basename(g) g_fname = os.path.basename(g)
print "working on", g_fname
g_froot, __ = os.path.splitext(g) g_froot, __ = os.path.splitext(g)
try: try:
data = stream(g, ncolumns=int(args['--ncols']), columns=args['--columns'], data = stream(g, ncolumns=int(args['--ncols']), columns=args['--columns'],

View File

@ -2,6 +2,7 @@ import os
import cv2 import cv2
import glob import glob
import numpy as np import numpy as np
from wand.image import Image
from table import Table from table import Table
from pdf import get_pdf_info from pdf import get_pdf_info
@ -150,10 +151,10 @@ def lattice(filepath, f=None, s=15, jtol=2, mtol=2, invert=False, debug=None):
""" """
if debug: if debug:
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
filename = os.path.basename(filepath)
print "working on", filename
fileroot, __ = os.path.splitext(filepath) fileroot, __ = os.path.splitext(filepath)
imagename = fileroot + '.png' imagename = fileroot + '.png'
with Image(filename=filepath, depth=8, resolution=300) as png:
png.save(filename=imagename)
img = cv2.imread(imagename) img = cv2.imread(imagename)
img_x, img_y = img.shape[1], img.shape[0] img_x, img_y = img.shape[1], img.shape[0]
text, pdf_x, pdf_y = get_pdf_info(filepath, method='lattice') text, pdf_x, pdf_y = get_pdf_info(filepath, method='lattice')

View File

@ -70,7 +70,6 @@ def stream(filepath, ncolumns=0, columns=None, char_margin=2.0,
output : list output : list
""" """
filename = os.path.basename(filepath) filename = os.path.basename(filepath)
print "working on", filename
text, __, __ = get_pdf_info(filepath, method='stream', char_margin=char_margin, text, __, __ = get_pdf_info(filepath, method='stream', char_margin=char_margin,
line_margin=line_margin, word_margin=word_margin) line_margin=line_margin, word_margin=word_margin)
text.sort(key=lambda x: (-x.y0, x.x0)) text.sort(key=lambda x: (-x.y0, x.x0))