camelot-py/tests/test_common.py

82 lines
2.2 KiB
Python

# -*- coding: utf-8 -*-
import os
import pandas as pd
import camelot
from .data import *
testdir = os.path.dirname(os.path.abspath(__file__))
testdir = os.path.join(testdir, "files")
def test_stream():
pass
def test_stream_table_rotated():
df = pd.DataFrame(data_stream_table_rotated)
filename = os.path.join(testdir, "clockwise_table_2.pdf")
tables = camelot.read_pdf(filename, flavor="stream")
assert df.equals(tables[0].df)
filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
tables = camelot.read_pdf(filename, flavor="stream")
assert df.equals(tables[0].df)
def test_stream_table_area():
df = pd.DataFrame(data_stream_table_area_single)
filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf(filename, flavor="stream", table_area=["320,500,573,335"])
assert df.equals(tables[0].df)
def test_stream_columns():
df = pd.DataFrame(data_stream_columns)
filename = os.path.join(testdir, "mexican_towns.pdf")
tables = camelot.read_pdf(
filename, flavor="stream", columns=["67,180,230,425,475"], row_close_tol=10)
assert df.equals(tables[0].df)
def test_lattice():
df = pd.DataFrame(data_lattice)
filename = os.path.join(testdir,
"tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
tables = camelot.read_pdf(filename, pages="2")
assert df.equals(tables[0].df)
def test_lattice_table_rotated():
df = pd.DataFrame(data_lattice_table_rotated)
filename = os.path.join(testdir, "clockwise_table_1.pdf")
tables = camelot.read_pdf(filename)
assert df.equals(tables[0].df)
filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
tables = camelot.read_pdf(filename)
assert df.equals(tables[0].df)
def test_lattice_process_background():
df = pd.DataFrame(data_lattice_process_background)
filename = os.path.join(testdir, "background_lines_1.pdf")
tables = camelot.read_pdf(filename, process_background=True)
assert df.equals(tables[1].df)
def test_lattice_copy_text():
df = pd.DataFrame(data_lattice_copy_text)
filename = os.path.join(testdir, "row_span_1.pdf")
tables = camelot.read_pdf(filename, line_size_scaling=60, copy_text="v")
assert df.equals(tables[0].df)