From ff4d8ce2289e8f418ba381913b7efae5c30dedc6 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Thu, 13 Dec 2018 13:13:07 +0530 Subject: [PATCH] Add test for arabic --- tests/data.py | 8 ++++++++ tests/test_common.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/tests/data.py b/tests/data.py index 4cc6f89..d9b3c6e 100755 --- a/tests/data.py +++ b/tests/data.py @@ -486,3 +486,11 @@ data_lattice_shift_text_right_bottom = [ ["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"], ["Knowledge &Practices on HTN &DM", "2400", "Women (≥ 18 yrs)", "-", "-", "-", "1728"] ] + +data_arabic = [ + ['ً\n\xa0\nﺎﺒﺣﺮﻣ', 'ﻥﺎﻄﻠﺳ\xa0ﻲﻤﺳﺍ'], + ['ﻝﺎﻤﺸﻟﺍ\xa0ﺎﻨﻴﻟﻭﺭﺎﻛ\xa0ﺔﻳﻻﻭ\xa0ﻦﻣ\xa0ﺎﻧﺍ', '؟ﺖﻧﺍ\xa0ﻦﻳﺍ\xa0ﻦﻣ'], + ['1234', 'ﻂﻄﻗ\xa047\xa0ﻱﺪﻨﻋ'], + ['؟ﻙﺎﺒﺷ\xa0ﺖﻧﺍ\xa0ﻞﻫ', 'ﺔﻳﺰﻴﻠﺠﻧﻻﺍ\xa0ﻲﻓ\xa0Jeremy\xa0ﻲﻤﺳﺍ'], + ['Jeremy\xa0is\xa0ﻲﻣﺮﺟ\xa0in\xa0Arabic', ''] +] diff --git a/tests/test_common.py b/tests/test_common.py index 708d61c..7430924 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -179,3 +179,11 @@ def test_repr(): assert repr(tables) == "" assert repr(tables[0]) == "" assert repr(tables[0].cells[0][0]) == "" + + +def test_arabic(): + df = pd.DataFrame(data_arabic) + + filename = os.path.join(testdir, "tabula/arabic.pdf") + tables = camelot.read_pdf(filename) + assert df.equals(tables[0].df)