Remove examples and debug, restructure tests dir

2018-09-08 18:38:59 +05:30 · 2018-09-08 18:38:59 +05:30 · e0b55f0693
parent 9d2708171b
commit e0b55f0693
358 changed files with 0 additions and 731 deletions
--- a/debug/hough_opencv.py
+++ b/debug/hough_opencv.py
@ -1,53 +0,0 @@
-"""
-usage: python hough_opencv.py file.png
-
-finds lines present in an image using opencv's hough transform.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    image = cv2.imread(sys.argv[1])
-    print "image dimensions -> {0}".format(image.shape)
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
-
-    lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
-    print "found {0} lines".format(len(lines))
-    for line in lines:
-        r, theta = line[0]
-        # filter horizontal and vertical lines
-        if theta == 0 or np.isclose(theta, np.pi / 2):
-            x0 = r * np.cos(theta)
-            y0 = r * np.sin(theta)
-            x1 = int(x0 + 10000 * (-np.sin(theta)))
-            y1 = int(y0 + 10000 * (np.cos(theta)))
-            x2 = int(x0 - 10000 * (-np.sin(theta)))
-            y2 = int(y0 - 10000 * (np.cos(theta)))
-            cv2.line(image, (x1, y1), (x2, y2), (0, 0, 255), 5)
-    plt.imshow(image)
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/hough_skimage.py
+++ b/debug/hough_skimage.py
@ -1,75 +0,0 @@
-"""
-usage: python hough_skimage.py file.png
-
-finds lines present in an image using scikit-image's hough transform.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-from scipy.misc import imread
-import matplotlib.pyplot as plt
-from skimage.transform import hough_line, hough_line_peaks
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    image = cv2.imread(sys.argv[1])
-    print "image dimensions -> {0}".format(image.shape)
-    ret, binary = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
-    binary = np.min(binary, axis=2)
-    binary = np.where(binary == 255, 0, 255)
-    rows, cols = binary.shape
-    pixel = np.zeros(binary.shape)
-
-    fig, ax = plt.subplots(1, 1, figsize=(8,4))
-    ax.imshow(image, cmap=plt.cm.gray)
-
-    theta_in = np.linspace(0, np.pi / 2, 10)
-    h, theta, d = hough_line(binary, theta_in)
-    for _, angle, dist in zip(*hough_line_peaks(h, theta, d)):
-        x0 = dist * np.cos(angle)
-        y0 = dist * np.sin(angle)
-        x1 = int(x0 + 1000 * (-np.sin(angle)))
-        y1 = int(y0 + 1000 * (np.cos(angle)))
-        x2 = int(x0 - 1000 * (-np.sin(angle)))
-        y2 = int(y0 - 1000 * (np.cos(angle)))
-        ax.plot((x1, x2), (y1, y2), '-r')
-        a = np.cos(angle)
-        b = np.sin(angle)
-        x = np.arange(binary.shape[1])
-        y = np.arange(binary.shape[0])
-        x = a * x
-        y = b * y
-        R = np.round(np.add(y.reshape((binary.shape[0], 1)), x.reshape((1, binary.shape[1]))))
-        pixel += np.isclose(R, np.round(dist))
-
-    pixel = np.clip(pixel, 0, 1)
-    pixel = np.where(pixel == 1, 0, 1)
-    binary = np.where(binary == 0, 255, 0)
-    binary *= pixel.astype(np.int64)
-    ax.imshow(binary, cmap=plt.cm.gray)
-    ax.axis((0, cols, rows, 0))
-    ax.set_title('Detected lines')
-    ax.set_axis_off()
-    ax.set_adjustable('box-forced')
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/houghp_skimage.py
+++ b/debug/houghp_skimage.py
@ -1,49 +0,0 @@
-"""
-usage: python hough_prob.py file.png
-
-finds lines present in an image using scikit-image's hough transform.
-"""
-
-import sys
-import time
-
-from scipy.misc import imread
-import matplotlib.pyplot as plt
-from skimage.feature import canny
-from skimage.transform import probabilistic_hough_line
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    image = imread(sys.argv[1], mode='L')
-    edges = canny(image, 2, 1, 25)
-    lines = probabilistic_hough_line(edges, threshold=1000)
-
-    fig, ax = plt.subplots(1, 1, figsize=(8,4), sharex=True, sharey=True)
-    ax.imshow(edges * 0)
-
-    for line in lines:
-        p0, p1 = line
-        ax.plot((p0[0], p1[0]), (p0[1], p1[1]))
-
-    ax.set_title('Probabilistic Hough')
-    ax.set_axis_off()
-    ax.set_adjustable('box-forced')
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/morph_transform.py
+++ b/debug/morph_transform.py
@ -1,114 +0,0 @@
-"""
-usage: python morph_transform.py file.png scale={int} invert={bool}
-
-finds lines present in an image using opencv's morph transform.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def mt(imagename, scale=40, invert=False):
-    img = cv2.imread(imagename)
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    if invert:
-        threshold = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -2)
-    else:
-        threshold = cv2.adaptiveThreshold(np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -2)
-    vertical = threshold
-    horizontal = threshold
-
-    verticalsize = vertical.shape[0] / scale
-    horizontalsize = horizontal.shape[1] / scale
-
-    ver = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
-    hor = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontalsize, 1))
-
-    vertical = cv2.erode(vertical, ver, (-1, -1))
-    vertical = cv2.dilate(vertical, ver, (-1, -1))
-
-    horizontal = cv2.erode(horizontal, hor, (-1, -1))
-    horizontal = cv2.dilate(horizontal, hor, (-1, -1))
-
-    mask = vertical + horizontal
-    joints = np.bitwise_and(vertical, horizontal)
-    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
-
-    tables = {}
-    for c in contours:
-        x, y, w, h = cv2.boundingRect(c)
-        x1, x2 = x, x + w
-        y1, y2 = y, y + h
-        # find number of non-zero values in joints using what boundingRect returns
-        roi = joints[y:y+h, x:x+w]
-        jc, _ = cv2.findContours(roi, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
-        if len(jc) <= 4: # remove contours with less than <=4 joints
-            continue
-        joint_coords = []
-        for j in jc:
-            jx, jy, jw, jh = cv2.boundingRect(j)
-            c1, c2 = x + (2*jx + jw) / 2, y + (2*jy + jh) / 2
-            joint_coords.append((c1, c2))
-        tables[(x1, y2, x2, y1)] = joint_coords
-
-    vcontours, _ = cv2.findContours(vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    for vc in vcontours:
-        x, y, w, h = cv2.boundingRect(vc)
-        x1, x2 = x, x + w
-        y1, y2 = y, y + h
-        plt.plot([(x1 + x2) / 2, (x1 + x2) / 2], [y2, y1])
-
-    hcontours, _ = cv2.findContours(horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    for hc in hcontours:
-        x, y, w, h = cv2.boundingRect(hc)
-        x1, x2 = x, x + w
-        y1, y2 = y, y + h
-        plt.plot([x1, x2], [(y1 + y2) / 2, (y1 + y2) / 2])
-
-    x_coord = []
-    y_coord = []
-    for k in tables.keys():
-        for coord in tables[k]:
-            x_coord.append(coord[0])
-            y_coord.append(coord[1])
-    plt.plot(x_coord, y_coord, 'ro')
-
-    plt.imshow(img)
-    plt.show()
-    return tables
-
-
-@timeit
-def main():
-    try:
-        scale = int(sys.argv[2].split('=')[1])
-    except IndexError:
-        scale = 40
-    try:
-        invert = bool(sys.argv[3].split('=')[1])
-    except IndexError:
-        invert = False
-    t = mt(sys.argv[1], scale=scale, invert=invert)
-    print 'tables found: ', len(t.keys())
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/plot_geo.py
+++ b/debug/plot_geo.py
@ -1,167 +0,0 @@
-"""
-usage:  python plot_geo.py file.pdf
-        python plot_geo.py file.pdf file.png
-
-prints lines and rectangles present in a pdf file.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-from pdfminer.pdfpage import PDFPage
-from pdfminer.pdfdevice import PDFDevice
-from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.converter import PDFPageAggregator
-from pdfminer.pdfinterp import PDFResourceManager
-from pdfminer.pdfinterp import PDFPageInterpreter
-from pdfminer.layout import LAParams, LTLine, LTRect
-from pdfminer.pdfpage import PDFTextExtractionNotAllowed
-
-
-MIN_LENGTH = 1
-pdf_x, pdf_y, image_x, image_y = [0] * 4
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def remove_coords(coords):
-    merged = []
-    for coord in coords:
-        if not merged:
-            merged.append(coord)
-        else:
-            last = merged[-1]
-            if np.isclose(last, coord, atol=2):
-                pass
-            else:
-                merged.append(coord)
-    return merged
-
-
-def parse_layout(pdfname):
-    global pdf_x, pdf_y
-    def is_horizontal(line):
-        if line[0] == line[2]:
-            return True
-        return False
-
-    def is_vertical(line):
-        if line[1] == line[3]:
-            return True
-        return False
-
-    vertical, horizontal = [], []
-    with open(pdfname, 'rb') as f:
-        parser = PDFParser(f)
-        document = PDFDocument(parser)
-        if not document.is_extractable:
-            raise PDFTextExtractionNotAllowed
-        laparams = LAParams()
-        rsrcmgr = PDFResourceManager()
-        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
-        interpreter = PDFPageInterpreter(rsrcmgr, device)
-        for page in PDFPage.create_pages(document):
-            interpreter.process_page(page)
-            layout = device.get_result()
-            pdf_x, pdf_y = layout.bbox[2], layout.bbox[3]
-            for obj in layout._objs:
-                if isinstance(obj, LTLine):
-                    line = (obj.x0, obj.y0, obj.x1, obj.y1)
-                    if is_vertical(line):
-                        vertical.append(line)
-                    elif is_horizontal(line):
-                        horizontal.append(line)
-                elif isinstance(obj, LTRect):
-                    vertical.append((obj.x0, obj.y1, obj.x0, obj.y0))
-                    vertical.append((obj.x1, obj.y1, obj.x1, obj.y0))
-                    horizontal.append((obj.x0, obj.y1, obj.x1, obj.y1))
-                    horizontal.append((obj.x0, obj.y0, obj.x1, obj.y0))
-    return vertical, horizontal
-
-
-def hough_transform(imagename):
-    global pdf_x, pdf_y, image_x, image_y
-    img = cv2.imread(imagename)
-    image_x, image_y = img.shape[1], img.shape[0]
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
-    lines = cv2.HoughLines(edges, 1, np.pi/180, 1000)
-    x = []
-    for line in lines:
-        r, theta = line[0]
-        x0 = r * np.cos(theta)
-        x0 *= pdf_x / float(image_x)
-        x.append(x0)
-    y = []
-    for line in lines:
-        r, theta = line[0]
-        y0 = r * np.sin(theta)
-        y0 = abs(y0 - image_y)
-        y0 *= pdf_y / float(image_y)
-        y.append(y0)
-    x = remove_coords(sorted(set([x0 for x0 in x if x0 > 0])))
-    y = remove_coords(sorted(set(y), reverse=True))
-    return x, y
-
-
-def plot_lines1(vertical, horizontal):
-    fig = plt.figure()
-    ax = fig.add_subplot(111, aspect='equal')
-    ax.set_xlim(0, 1000)
-    ax.set_ylim(0, 1000)
-
-    vertical = filter(lambda x: abs(x[1] - x[3]) > MIN_LENGTH, vertical)
-    horizontal = filter(lambda x: abs(x[0] - x[2]) > MIN_LENGTH, horizontal)
-    for v in vertical:
-        ax.plot([v[0], v[2]], [v[1], v[3]])
-    for h in horizontal:
-        ax.plot([h[0], h[2]], [h[1], h[3]])
-    plt.show()
-
-
-def plot_lines2(imagename, vertical, horizontal):
-    x, y = hough_transform(imagename)
-    fig = plt.figure()
-    ax = fig.add_subplot(111, aspect='equal')
-    ax.set_xlim(0, 1000)
-    ax.set_ylim(0, 1000)
-
-    for x0 in x:
-        for v in vertical:
-            if np.isclose(x0, v[0], atol=2):
-                ax.plot([v[0], v[2]], [v[1], v[3]])
-    for y0 in y:
-        for h in horizontal:
-            if np.isclose(y0, h[1], atol=2):
-                ax.plot([h[0], h[2]], [h[1], h[3]])
-    plt.show()
-
-
-@timeit
-def main():
-    vertical, horizontal = parse_layout(sys.argv[1])
-    if len(sys.argv) == 2:
-        plot_lines1(vertical, horizontal)
-    elif len(sys.argv) == 3:
-        plot_lines1(vertical, horizontal)
-        plot_lines2(sys.argv[2], vertical, horizontal)
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/plot_intensity.py
+++ b/debug/plot_intensity.py
@ -1,69 +0,0 @@
-"""
-usage: python plot_intensity.py file.png threshold
-
-plots sum of pixel intensities on both axes for an image.
-"""
-import sys
-import time
-from itertools import groupby
-from operator import itemgetter
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-from pylab import barh
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def plot_barchart(ar):
-    n = len(ar)
-    ind = np.arange(n)
-    width = 0.35
-    plt.bar(ind, ar, width, color='r', zorder=1)
-    plt.show()
-
-
-def merge_lines(lines):
-    ranges = []
-    for k, g in groupby(enumerate(lines), lambda (i, x): i-x):
-        group = map(itemgetter(1), g)
-        ranges.append((group[0], group[-1]))
-    merged = []
-    for r in ranges:
-        merged.append((r[0] + r[1]) / 2)
-    return merged
-
-
-def plot_lines(image, lines):
-    for y in lines:
-        plt.plot([0, image.shape[1]], [y, y])
-    plt.imshow(image)
-    plt.show()
-
-
-@timeit
-def main():
-    image = cv2.imread(sys.argv[1])
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    threshold = cv2.adaptiveThreshold(np.invert(gray), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -2)
-    y_proj = np.sum(threshold, axis=1)
-    line_threshold = int(sys.argv[2])
-    lines = np.where(y_proj < line_threshold)[0]
-    lines = merge_lines(lines)
-    plot_lines(image, lines)
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/print_text.py
+++ b/debug/print_text.py
@ -1,83 +0,0 @@
-"""
-usage: python print_text.py file.pdf
-
-prints horizontal and vertical text lines present in a pdf file.
-"""
-
-import sys
-import time
-from pprint import pprint
-
-from pdfminer.layout import LAParams
-from pdfminer.pdfpage import PDFPage
-from pdfminer.pdfdevice import PDFDevice
-from pdfminer.pdfparser import PDFParser
-from pdfminer.pdfdocument import PDFDocument
-from pdfminer.converter import PDFPageAggregator
-from pdfminer.pdfinterp import PDFPageInterpreter
-from pdfminer.pdfinterp import PDFResourceManager
-from pdfminer.pdfpage import PDFTextExtractionNotAllowed
-from pdfminer.layout import (LAParams, LTChar, LTAnno, LTTextBoxHorizontal,
-                             LTTextLineHorizontal, LTTextLineVertical, LTLine)
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-def extract_text_objects(layout, LTObject, t=None):
-    if t is None:
-        t = []
-    try:
-        for obj in layout._objs:
-            if isinstance(obj, LTObject):
-                t.append(obj)
-            else:
-                t += extract_text_objects(obj, LTObject)
-    except AttributeError:
-        pass
-    return t
-
-
-@timeit
-def main():
-    with open(sys.argv[1], 'rb') as f:
-        parser = PDFParser(f)
-        document = PDFDocument(parser)
-        if not document.is_extractable:
-            raise PDFTextExtractionNotAllowed
-        # 2.0, 0.5, 0.1
-        kwargs = {
-            'char_margin': 1.0,
-            'line_margin': 0.5,
-            'word_margin': 0.1,
-            'detect_vertical': True
-        }
-        laparams = LAParams(**kwargs)
-        rsrcmgr = PDFResourceManager()
-        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
-        interpreter = PDFPageInterpreter(rsrcmgr, device)
-        for page in PDFPage.create_pages(document):
-            interpreter.process_page(page)
-            layout = device.get_result()
-            lh = extract_text_objects(layout, LTTextLineHorizontal)
-            lv = extract_text_objects(layout, LTTextLineVertical)
-            print "number of horizontal text lines -> {0}".format(len(lh))
-            print "horizontal text lines ->"
-            pprint([t.get_text() for t in lh])
-            print "number of vertical text lines -> {0}".format(len(lv))
-            print "vertical text lines ->"
-            pprint([t.get_text() for t in lv])
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/debug/threshold.py
+++ b/debug/threshold.py
@ -1,41 +0,0 @@
-"""
-usage: python threshold.py file.png blocksize threshold_constant
-
-shows thresholded image.
-"""
-
-import sys
-import time
-
-import cv2
-import numpy as np
-import matplotlib.pyplot as plt
-
-
-def timeit(func):
-    def timed(*args, **kw):
-        start = time.time()
-        result = func(*args, **kw)
-        end = time.time()
-        print 'Function: %r took: %2.4f seconds' % (func.__name__, end - start)
-        return result
-    return timed
-
-
-@timeit
-def main():
-    img = cv2.imread(sys.argv[1])
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    blocksize = int(sys.argv[2])
-    threshold_constant = float(sys.argv[3])
-    threshold = cv2.adaptiveThreshold(np.invert(gray), 255,
-        cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, threshold_constant)
-    plt.imshow(img)
-    plt.show()
-
-
-if __name__ == '__main__':
-    if len(sys.argv) == 1:
-        print __doc__
-    else:
-        main()
--- a/examples/demo_lattice.py
+++ b/examples/demo_lattice.py
@ -1,11 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/column_span_1.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/column_span_2.pdf"), clean=True, scale=30)
-tables = extractor.get_tables()
-print tables
--- a/examples/demo_lattice_fill.py
+++ b/examples/demo_lattice_fill.py
@ -1,13 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(
-    Pdf("files/row_span_1.pdf", clean=True), fill='v', scale=40)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(
-    Pdf("files/row_span_2.pdf", clean=True), fill='v', scale=30)
-tables = extractor.get_tables()
-print tables
--- a/examples/demo_lattice_invert.py
+++ b/examples/demo_lattice_invert.py
@ -1,13 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/lines_in_background_1.pdf",
-                        clean=True), scale=30, invert=True)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/lines_in_background_2.pdf",
-                        clean=True), scale=30, invert=True)
-tables = extractor.get_tables()
-print tables
--- a/examples/demo_lattice_rotation.py
+++ b/examples/demo_lattice_rotation.py
@ -1,11 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/left_rotated_table.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/right_rotated_table.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
--- a/examples/demo_lattice_twotables.py
+++ b/examples/demo_lattice_twotables.py
@ -1,11 +0,0 @@
-from camelot import Pdf
-from camelot import Lattice
-
-
-extractor = Lattice(Pdf("files/twotables_1.pdf", clean=True), scale=40)
-tables = extractor.get_tables()
-print tables
-
-extractor = Lattice(Pdf("files/twotables_2.pdf", clean=True), scale=30)
-tables = extractor.get_tables()
-print tables
--- a/examples/demo_stream.py
+++ b/examples/demo_stream.py
@ -1,8 +0,0 @@
-from camelot import Pdf
-from camelot import Stream
-
-
-extractor = Stream(Pdf("files/budget_2014-15.pdf",
-                       char_margin=1.0, clean=True))
-tables = extractor.get_tables()
-print tables
--- a/examples/demo_stream_columns.py
+++ b/examples/demo_stream_columns.py
@ -1,13 +0,0 @@
-from camelot import Pdf
-from camelot import Stream
-
-
-extractor = Stream(Pdf("files/inconsistent_rows.pdf", char_margin=1.0),
-                   columns="65,95,285,640,715,780", ytol=10)
-tables = extractor.get_tables()
-print tables
-
-extractor = Stream(Pdf("files/consistent_rows.pdf", char_margin=1.0),
-                   columns="28,67,180,230,425,475,700", ytol=5)
-tables = extractor.get_tables()
-print tables
--- a/examples/files/consistent_rows.pdf
+++ b/examples/files/consistent_rows.pdf
--- a/examples/files/inconsistent_rows.pdf
+++ b/examples/files/inconsistent_rows.pdf
--- a/examples/files/left_rotated_table.pdf
+++ b/examples/files/left_rotated_table.pdf
--- a/examples/files/right_rotated_table.pdf
+++ b/examples/files/right_rotated_table.pdf
--- a/tests/budget_2014-15.pdf
+++ b/tests/budget_2014-15.pdf
--- a/tests/column_span_1.pdf
+++ b/tests/column_span_1.pdf
--- a/tests/column_span_2.pdf
+++ b/tests/column_span_2.pdf
--- a/tests/files/agstat.pdf
+++ b/tests/files/agstat.pdf
--- a/tests/files/assam.pdf
+++ b/tests/files/assam.pdf
--- a/examples/files/budget_2014-15.pdf
+++ b/examples/files/budget_2014-15.pdf
--- a/examples/files/column_span_1.pdf
+++ b/examples/files/column_span_1.pdf
--- a/examples/files/column_span_2.pdf
+++ b/examples/files/column_span_2.pdf
--- a/tests/files/district_health.pdf
+++ b/tests/files/district_health.pdf
--- a/tests/files/electoral_roll.pdf
+++ b/tests/files/electoral_roll.pdf
--- a/tests/files/health.pdf
+++ b/tests/files/health.pdf
--- a/tests/files/left_rotated_table_1.pdf
+++ b/tests/files/left_rotated_table_1.pdf
--- a/tests/files/left_rotated_table_2.pdf
+++ b/tests/files/left_rotated_table_2.pdf
--- a/examples/files/lines_in_background_1.pdf
+++ b/examples/files/lines_in_background_1.pdf
--- a/examples/files/lines_in_background_2.pdf
+++ b/examples/files/lines_in_background_2.pdf
--- a/tests/files/medicine.pdf
+++ b/tests/files/medicine.pdf
--- a/tests/files/mexican_towns.pdf
+++ b/tests/files/mexican_towns.pdf
--- a/examples/files/missing_values.pdf
+++ b/examples/files/missing_values.pdf
--- a/tests/files/population_growth.pdf
+++ b/tests/files/population_growth.pdf
--- a/tests/files/rainfall_distribution.pdf
+++ b/tests/files/rainfall_distribution.pdf
--- a/tests/files/right_rotated_table_1.pdf
+++ b/tests/files/right_rotated_table_1.pdf
--- a/tests/files/right_rotated_table_2.pdf
+++ b/tests/files/right_rotated_table_2.pdf
--- a/examples/files/row_span_1.pdf
+++ b/examples/files/row_span_1.pdf
--- a/examples/files/row_span_2.pdf
+++ b/examples/files/row_span_2.pdf
--- a/tests/files/row_span_3.pdf
+++ b/tests/files/row_span_3.pdf
--- a/tests/files/tableception.pdf
+++ b/tests/files/tableception.pdf
--- a/tests/tabula_test_pdfs/12s0324.pdf
+++ b/tests/tabula_test_pdfs/12s0324.pdf
--- a/tests/tabula_test_pdfs/20.pdf
+++ b/tests/tabula_test_pdfs/20.pdf
--- a/tests/tabula_test_pdfs/S2MNCEbirdisland.pdf
+++ b/tests/tabula_test_pdfs/S2MNCEbirdisland.pdf
--- a/tests/tabula_test_pdfs/arabic.pdf
+++ b/tests/tabula_test_pdfs/arabic.pdf
--- a/tests/tabula_test_pdfs/argentina_diputados_voting_record.pdf
+++ b/tests/tabula_test_pdfs/argentina_diputados_voting_record.pdf
--- a/tests/tabula_test_pdfs/campaign_donors.pdf
+++ b/tests/tabula_test_pdfs/campaign_donors.pdf
--- a/tests/tabula_test_pdfs/china.pdf
+++ b/tests/tabula_test_pdfs/china.pdf
--- a/tests/tabula_test_pdfs/eu-002.pdf
+++ b/tests/tabula_test_pdfs/eu-002.pdf
--- a/tests/tabula_test_pdfs/eu-017.pdf
+++ b/tests/tabula_test_pdfs/eu-017.pdf
--- a/tests/tabula_test_pdfs/failing_sort.pdf
+++ b/tests/tabula_test_pdfs/failing_sort.pdf
--- a/tests/tabula_test_pdfs/frx_2012_disclosure.pdf
+++ b/tests/tabula_test_pdfs/frx_2012_disclosure.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-001.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-002.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-003.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-004.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-005.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-006.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-007.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-008.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-009b-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010-str.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.json
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.json
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.pdf
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-010.pdf
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml
--- a/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml
+++ b/tests/tabula_test_pdfs/icdar2013-dataset/competition-dataset-eu/eu-011-str.xml
--- a/Show More
+++ b/Show More