Remove imagemagick subprocess call
parent
7aebcee7e3
commit
85ffb00239
|
|
@ -147,10 +147,8 @@ if __name__ == '__main__':
|
||||||
print "using the lattice method"
|
print "using the lattice method"
|
||||||
for g in glob_pdf:
|
for g in glob_pdf:
|
||||||
g_fname = os.path.basename(g)
|
g_fname = os.path.basename(g)
|
||||||
|
print "working on", g_fname
|
||||||
g_froot, __ = os.path.splitext(g)
|
g_froot, __ = os.path.splitext(g)
|
||||||
print "converting %s to image" % g_fname
|
|
||||||
os.system(' '.join(['convert', '-density', '300',
|
|
||||||
g, '-depth', '8', g_froot + '.png']))
|
|
||||||
try:
|
try:
|
||||||
data = lattice(g, f=args['--fill'], s=int(args['--scale']),
|
data = lattice(g, f=args['--fill'], s=int(args['--scale']),
|
||||||
jtol=int(args['--jtol']), mtol=int(args['--mtol']),
|
jtol=int(args['--jtol']), mtol=int(args['--mtol']),
|
||||||
|
|
@ -174,6 +172,7 @@ if __name__ == '__main__':
|
||||||
print "using the stream method"
|
print "using the stream method"
|
||||||
for g in glob_pdf:
|
for g in glob_pdf:
|
||||||
g_fname = os.path.basename(g)
|
g_fname = os.path.basename(g)
|
||||||
|
print "working on", g_fname
|
||||||
g_froot, __ = os.path.splitext(g)
|
g_froot, __ = os.path.splitext(g)
|
||||||
try:
|
try:
|
||||||
data = stream(g, ncolumns=int(args['--ncols']), columns=args['--columns'],
|
data = stream(g, ncolumns=int(args['--ncols']), columns=args['--columns'],
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import os
|
||||||
import cv2
|
import cv2
|
||||||
import glob
|
import glob
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from wand.image import Image
|
||||||
|
|
||||||
from table import Table
|
from table import Table
|
||||||
from pdf import get_pdf_info
|
from pdf import get_pdf_info
|
||||||
|
|
@ -150,10 +151,10 @@ def lattice(filepath, f=None, s=15, jtol=2, mtol=2, invert=False, debug=None):
|
||||||
"""
|
"""
|
||||||
if debug:
|
if debug:
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
filename = os.path.basename(filepath)
|
|
||||||
print "working on", filename
|
|
||||||
fileroot, __ = os.path.splitext(filepath)
|
fileroot, __ = os.path.splitext(filepath)
|
||||||
imagename = fileroot + '.png'
|
imagename = fileroot + '.png'
|
||||||
|
with Image(filename=filepath, depth=8, resolution=300) as png:
|
||||||
|
png.save(filename=imagename)
|
||||||
img = cv2.imread(imagename)
|
img = cv2.imread(imagename)
|
||||||
img_x, img_y = img.shape[1], img.shape[0]
|
img_x, img_y = img.shape[1], img.shape[0]
|
||||||
text, pdf_x, pdf_y = get_pdf_info(filepath, method='lattice')
|
text, pdf_x, pdf_y = get_pdf_info(filepath, method='lattice')
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,6 @@ def stream(filepath, ncolumns=0, columns=None, char_margin=2.0,
|
||||||
output : list
|
output : list
|
||||||
"""
|
"""
|
||||||
filename = os.path.basename(filepath)
|
filename = os.path.basename(filepath)
|
||||||
print "working on", filename
|
|
||||||
text, __, __ = get_pdf_info(filepath, method='stream', char_margin=char_margin,
|
text, __, __ = get_pdf_info(filepath, method='stream', char_margin=char_margin,
|
||||||
line_margin=line_margin, word_margin=word_margin)
|
line_margin=line_margin, word_margin=word_margin)
|
||||||
text.sort(key=lambda x: (-x.y0, x.x0))
|
text.sort(key=lambda x: (-x.y0, x.x0))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue