From d3d625a08d5c6e5bb2f4b90408462d3b652b27a0 Mon Sep 17 00:00:00 2001 From: Frh Date: Wed, 22 Apr 2020 15:36:37 -0700 Subject: [PATCH] Unit test fixes --- tests/data.py | 26 +++++++++++++++++++++++++- tests/test_common.py | 4 ++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/tests/data.py b/tests/data.py index 42776cd..1205de7 100755 --- a/tests/data.py +++ b/tests/data.py @@ -2388,7 +2388,15 @@ data_stream_flag_size = [ ], ] -data_stream_strip_text = [ +# Hybrid adds more content into the header. +data_hybrid_flag_size = [ + ['', '', '', '', '(As at end-March)', '', '', '', '', '', ''], + ['', '', '', '', '', '', '', '', '', '', '(` Billion)'] +] +data_hybrid_flag_size.extend(data_stream_flag_size) + + +data_hybrid_strip_text = [ ["VinsauVerre", ""], ["LesBlancs", "12.5CL"], ["A.O.PCôtesduRhône", ""], @@ -2413,8 +2421,24 @@ data_stream_strip_text = [ ["A.O.PCôtesdeProvence", ""], ["ChâteauGrandBoise«SainteVictoire»2017", "9€"], ["ChâteauLéoube2016", "10€"], + ["LesRouges", "12CL"], + ["A.O.PCôtesduRhône", ""], + ["DomainedeDionysos«LaCigalette»", "8€"], + ["ChâteauSaintEstèved’Uchaux«GrandeRéserve»2014", "9€"], + ["DomainedelaGuicharde«CuvéeMassillan»2016", "9€"], + ["DomainedelaFlorane«TerrePourpre»2014", "10€"], + ["L’OratoireStMartin«RéservedesSeigneurs»2015", "11€"], + ["A.O.PSaintJoseph", ""], + ["DomaineMonierPerréol«Châtelet»2015", "13€"], + ["A.O.PChâteauneufduPape", ""], + ["DomainedeBeaurenard2011", "15€"], + ["A.O.PCornas", ""], + ["DomaineLionnet«TerreBrûlée»2012", "15€"], ] +# Stream only detects part of the table +data_stream_strip_text = data_hybrid_strip_text[0:-13] + data_stream_edge_tol = [ ["Key figures", ""], ["", "2016"], diff --git a/tests/test_common.py b/tests/test_common.py index 827d0e3..3d38788 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -233,7 +233,7 @@ def test_hybrid_split_text(): def test_hybrid_flag_size(): - df = pd.DataFrame(data_stream_flag_size) + df = pd.DataFrame(data_hybrid_flag_size) filename = os.path.join(testdir, "superscript.pdf") tables = camelot.read_pdf(filename, flavor="hybrid", flag_size=True) @@ -241,7 +241,7 @@ def test_hybrid_flag_size(): def test_hybrid_strip_text(): - df = pd.DataFrame(data_stream_strip_text) + df = pd.DataFrame(data_hybrid_strip_text) filename = os.path.join(testdir, "detect_vertical_false.pdf") tables = camelot.read_pdf(filename, flavor="hybrid", strip_text=" ,\n")