diff --git a/tests/data.py b/tests/data.py index 42776cd..1205de7 100755 --- a/tests/data.py +++ b/tests/data.py @@ -2388,7 +2388,15 @@ data_stream_flag_size = [ ], ] -data_stream_strip_text = [ +# Hybrid adds more content into the header. +data_hybrid_flag_size = [ + ['', '', '', '', '(As at end-March)', '', '', '', '', '', ''], + ['', '', '', '', '', '', '', '', '', '', '(` Billion)'] +] +data_hybrid_flag_size.extend(data_stream_flag_size) + + +data_hybrid_strip_text = [ ["VinsauVerre", ""], ["LesBlancs", "12.5CL"], ["A.O.PCôtesduRhône", ""], @@ -2413,8 +2421,24 @@ data_stream_strip_text = [ ["A.O.PCôtesdeProvence", ""], ["ChâteauGrandBoise«SainteVictoire»2017", "9€"], ["ChâteauLéoube2016", "10€"], + ["LesRouges", "12CL"], + ["A.O.PCôtesduRhône", ""], + ["DomainedeDionysos«LaCigalette»", "8€"], + ["ChâteauSaintEstèved’Uchaux«GrandeRéserve»2014", "9€"], + ["DomainedelaGuicharde«CuvéeMassillan»2016", "9€"], + ["DomainedelaFlorane«TerrePourpre»2014", "10€"], + ["L’OratoireStMartin«RéservedesSeigneurs»2015", "11€"], + ["A.O.PSaintJoseph", ""], + ["DomaineMonierPerréol«Châtelet»2015", "13€"], + ["A.O.PChâteauneufduPape", ""], + ["DomainedeBeaurenard2011", "15€"], + ["A.O.PCornas", ""], + ["DomaineLionnet«TerreBrûlée»2012", "15€"], ] +# Stream only detects part of the table +data_stream_strip_text = data_hybrid_strip_text[0:-13] + data_stream_edge_tol = [ ["Key figures", ""], ["", "2016"], diff --git a/tests/test_common.py b/tests/test_common.py index 4109ea2..7d2ef38 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -227,7 +227,7 @@ def test_hybrid_split_text(): def test_hybrid_flag_size(): - df = pd.DataFrame(data_stream_flag_size) + df = pd.DataFrame(data_hybrid_flag_size) filename = os.path.join(testdir, "superscript.pdf") tables = camelot.read_pdf(filename, flavor="hybrid", flag_size=True) @@ -235,7 +235,7 @@ def test_hybrid_flag_size(): def test_hybrid_strip_text(): - df = pd.DataFrame(data_stream_strip_text) + df = pd.DataFrame(data_hybrid_strip_text) filename = os.path.join(testdir, "detect_vertical_false.pdf") tables = camelot.read_pdf(filename, flavor="hybrid", strip_text=" ,\n")