From 85ffb00239e24994a1f110a739b8f5395e3d5510 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Wed, 20 Jul 2016 15:40:01 +0530 Subject: [PATCH] Remove imagemagick subprocess call --- camelot.py | 5 ++--- lattice.py | 5 +++-- stream.py | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/camelot.py b/camelot.py index 3a2dd3d..37b91f6 100755 --- a/camelot.py +++ b/camelot.py @@ -147,10 +147,8 @@ if __name__ == '__main__': print "using the lattice method" for g in glob_pdf: g_fname = os.path.basename(g) + print "working on", g_fname g_froot, __ = os.path.splitext(g) - print "converting %s to image" % g_fname - os.system(' '.join(['convert', '-density', '300', - g, '-depth', '8', g_froot + '.png'])) try: data = lattice(g, f=args['--fill'], s=int(args['--scale']), jtol=int(args['--jtol']), mtol=int(args['--mtol']), @@ -174,6 +172,7 @@ if __name__ == '__main__': print "using the stream method" for g in glob_pdf: g_fname = os.path.basename(g) + print "working on", g_fname g_froot, __ = os.path.splitext(g) try: data = stream(g, ncolumns=int(args['--ncols']), columns=args['--columns'], diff --git a/lattice.py b/lattice.py index aa4bc04..0966b63 100644 --- a/lattice.py +++ b/lattice.py @@ -2,6 +2,7 @@ import os import cv2 import glob import numpy as np +from wand.image import Image from table import Table from pdf import get_pdf_info @@ -150,10 +151,10 @@ def lattice(filepath, f=None, s=15, jtol=2, mtol=2, invert=False, debug=None): """ if debug: import matplotlib.pyplot as plt - filename = os.path.basename(filepath) - print "working on", filename fileroot, __ = os.path.splitext(filepath) imagename = fileroot + '.png' + with Image(filename=filepath, depth=8, resolution=300) as png: + png.save(filename=imagename) img = cv2.imread(imagename) img_x, img_y = img.shape[1], img.shape[0] text, pdf_x, pdf_y = get_pdf_info(filepath, method='lattice') diff --git a/stream.py b/stream.py index e09d9ed..16f0429 100644 --- a/stream.py +++ b/stream.py @@ -70,7 +70,6 @@ def stream(filepath, ncolumns=0, columns=None, char_margin=2.0, output : list """ filename = os.path.basename(filepath) - print "working on", filename text, __, __ = get_pdf_info(filepath, method='stream', char_margin=char_margin, line_margin=line_margin, word_margin=word_margin) text.sort(key=lambda x: (-x.y0, x.x0))