167 lines
4.9 KiB
Python
167 lines
4.9 KiB
Python
"""
|
|
usage: python plot_geo.py file.pdf
|
|
python plot_geo.py file.pdf file.png
|
|
|
|
prints lines and rectangles present in a pdf file.
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.patches as patches
|
|
from pdfminer.pdfpage import PDFPage
|
|
from pdfminer.pdfdevice import PDFDevice
|
|
from pdfminer.pdfparser import PDFParser
|
|
from pdfminer.pdfdocument import PDFDocument
|
|
from pdfminer.converter import PDFPageAggregator
|
|
from pdfminer.pdfinterp import PDFResourceManager
|
|
from pdfminer.pdfinterp import PDFPageInterpreter
|
|
from pdfminer.layout import LAParams, LTLine, LTRect
|
|
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
|
|
|
|
|
|
MIN_LENGTH = 1
|
|
pdf_x, pdf_y, image_x, image_y = [0] * 4
|
|
|
|
|
|
def timeit(func):
|
|
def timed(*args, **kw):
|
|
start = time.time()
|
|
result = func(*args, **kw)
|
|
end = time.time()
|
|
print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
|
|
return result
|
|
return timed
|
|
|
|
|
|
def remove_coords(coords):
|
|
merged = []
|
|
for coord in coords:
|
|
if not merged:
|
|
merged.append(coord)
|
|
else:
|
|
last = merged[-1]
|
|
if np.isclose(last, coord, atol=2):
|
|
pass
|
|
else:
|
|
merged.append(coord)
|
|
return merged
|
|
|
|
|
|
def parse_layout(pdfname):
|
|
global pdf_x, pdf_y
|
|
def is_horizontal(line):
|
|
if line[0] == line[2]:
|
|
return True
|
|
return False
|
|
|
|
def is_vertical(line):
|
|
if line[1] == line[3]:
|
|
return True
|
|
return False
|
|
|
|
vertical, horizontal = [], []
|
|
with open(pdfname, 'rb') as f:
|
|
parser = PDFParser(f)
|
|
document = PDFDocument(parser)
|
|
if not document.is_extractable:
|
|
raise PDFTextExtractionNotAllowed
|
|
laparams = LAParams()
|
|
rsrcmgr = PDFResourceManager()
|
|
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
|
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
for page in PDFPage.create_pages(document):
|
|
interpreter.process_page(page)
|
|
layout = device.get_result()
|
|
pdf_x, pdf_y = layout.bbox[2], layout.bbox[3]
|
|
for obj in layout._objs:
|
|
if isinstance(obj, LTLine):
|
|
line = (obj.x0, obj.y0, obj.x1, obj.y1)
|
|
if is_vertical(line):
|
|
vertical.append(line)
|
|
elif is_horizontal(line):
|
|
horizontal.append(line)
|
|
elif isinstance(obj, LTRect):
|
|
vertical.append((obj.x0, obj.y1, obj.x0, obj.y0))
|
|
vertical.append((obj.x1, obj.y1, obj.x1, obj.y0))
|
|
horizontal.append((obj.x0, obj.y1, obj.x1, obj.y1))
|
|
horizontal.append((obj.x0, obj.y0, obj.x1, obj.y0))
|
|
return vertical, horizontal
|
|
|
|
|
|
def hough_transform(imagename):
|
|
global pdf_x, pdf_y, image_x, image_y
|
|
img = cv2.imread(imagename)
|
|
image_x, image_y = img.shape[1], img.shape[0]
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
|
|
lines = cv2.HoughLines(edges, 1, np.pi/180, 1000)
|
|
x = []
|
|
for line in lines:
|
|
r, theta = line[0]
|
|
x0 = r * np.cos(theta)
|
|
x0 *= pdf_x / float(image_x)
|
|
x.append(x0)
|
|
y = []
|
|
for line in lines:
|
|
r, theta = line[0]
|
|
y0 = r * np.sin(theta)
|
|
y0 = abs(y0 - image_y)
|
|
y0 *= pdf_y / float(image_y)
|
|
y.append(y0)
|
|
x = remove_coords(sorted(set([x0 for x0 in x if x0 > 0])))
|
|
y = remove_coords(sorted(set(y), reverse=True))
|
|
return x, y
|
|
|
|
|
|
def plot_lines1(vertical, horizontal):
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
ax.set_xlim(0, 1000)
|
|
ax.set_ylim(0, 1000)
|
|
|
|
vertical = filter(lambda x: abs(x[1] - x[3]) > MIN_LENGTH, vertical)
|
|
horizontal = filter(lambda x: abs(x[0] - x[2]) > MIN_LENGTH, horizontal)
|
|
for v in vertical:
|
|
ax.plot([v[0], v[2]], [v[1], v[3]])
|
|
for h in horizontal:
|
|
ax.plot([h[0], h[2]], [h[1], h[3]])
|
|
plt.show()
|
|
|
|
|
|
def plot_lines2(imagename, vertical, horizontal):
|
|
x, y = hough_transform(imagename)
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, aspect='equal')
|
|
ax.set_xlim(0, 1000)
|
|
ax.set_ylim(0, 1000)
|
|
|
|
for x0 in x:
|
|
for v in vertical:
|
|
if np.isclose(x0, v[0], atol=2):
|
|
ax.plot([v[0], v[2]], [v[1], v[3]])
|
|
for y0 in y:
|
|
for h in horizontal:
|
|
if np.isclose(y0, h[1], atol=2):
|
|
ax.plot([h[0], h[2]], [h[1], h[3]])
|
|
plt.show()
|
|
|
|
|
|
@timeit
|
|
def main():
|
|
vertical, horizontal = parse_layout(sys.argv[1])
|
|
if len(sys.argv) == 2:
|
|
plot_lines1(vertical, horizontal)
|
|
elif len(sys.argv) == 3:
|
|
plot_lines1(vertical, horizontal)
|
|
plot_lines2(sys.argv[2], vertical, horizontal)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) == 1:
|
|
print __doc__
|
|
else:
|
|
main() |