diff --git a/tests/data.py b/tests/data.py index e623b9c..42776cd 100755 --- a/tests/data.py +++ b/tests/data.py @@ -2149,6 +2149,14 @@ data_stream_split_text = [ ], ] +# The stream algorithm excludes the string "Alphabetic Listing by type" +data_hybrid_split_text = [] +data_hybrid_split_text.extend(data_stream_split_text) +data_hybrid_split_text[0] = [ + 'FEB', 'RUAR', 'Y 2014 M27 (BUS)', '', + 'ALPHABETIC LISTING BY T', 'YPE', '', '', '', 'ABLPDM27' +] + data_stream_flag_size = [ [ "States", diff --git a/tests/test_common.py b/tests/test_common.py index 04f83fa..827d0e3 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -220,7 +220,7 @@ def test_hybrid_columns(): def test_hybrid_split_text(): - df = pd.DataFrame(data_stream_split_text) + df = pd.DataFrame(data_hybrid_split_text) filename = os.path.join(testdir, "tabula/m27.pdf") tables = camelot.read_pdf(