# coding: utf8 import os from nose.tools import assert_equal from camelot.pdf import Pdf from camelot.stream import Stream testdir = os.path.dirname(os.path.abspath(__file__)) def test_stream_basic(): data = [ ["", "Table 6.", ""], ["", "U.S. Production, Imports, Exports, and Net Supply of Conventional Pesticides", ""], ["", "at Producer Level, 1994/95 Estimates.", ""], ["", "Active Ingredient", "Sales Value"], ["", "(in billions of lbs.)", "(in billions of dollars)"], ["Category", "1994/95", "1994/95"], ["U.S. Production", "1.3", "7.0"], ["U.S. Imports", "0.2", "2.2"], ["Total Supply", "1.5", "9.2"], ["U.S. Exports", "0.5", "2.6"], ["Net Supply/Usage", "1.0", "6.6"], ["SOURCE:", "EPA estimates based on ACPA Surveys, Department of Commerce Publications, tabulations and other", ""], ["sources.", "", ""], ["16\xe2\x80\x9494/95 Pesticides Industry Sales And Usage", "", ""] ] pdfname = os.path.join(testdir, "tabula_test_pdfs/us-024.pdf") manager = Pdf(Stream(), pdfname, pagenos=[{"start": 1, "end": 1}], clean=True) tables = manager.extract() assert_equal(tables["page-1"]["table-1"]["data"], data) def test_stream_missing_value(): data = [ ["Bhandara - Key Indicators","","","",""], ["","DLHS-4 (2012-13)","","DLHS-3 (2007-08)",""], ["Indicators","TOTAL","RURAL","TOTAL","RURAL"], ["Reported Prevalence of Morbidity","","","",""], ["Any Injury .....................................................................................................................................","1.9","2.1","",""], ["Acute Illness .................................................................................................................................","4.5","5.6","",""], ["Chronic Illness ..............................................................................................................................","5.1","4.1","",""], ["Reported Prevalence of Chronic Illness during last one year (%)","","","",""], ["Disease of respiratory system ......................................................................................................","11.7","15.0","",""], ["Disease of cardiovascular system ................................................................................................","8.9","9.3","",""], ["Persons suffering from tuberculosis .............................................................................................","2.2","1.5","",""], ["Anaemia Status by Haemoglobin Level14 (%)","","","",""], ["Children (6-59 months) having anaemia ......................................................................................","68.5","71.9","",""], ["Children (6-59 months) having severe anaemia ..........................................................................","6.7","9.4","",""], ["Children (6-9 Years) having anaemia - Male ................................................................................","67.1","71.4","",""], ["Children (6-9 Years) having severe anaemia - Male ....................................................................","4.4","2.4","",""], ["Children (6-9 Years) having anaemia - Female ...........................................................................","52.4","48.8","",""], ["Children (6-9 Years) having severe anaemia - Female ................................................................","1.2","0.0","",""], ["Children (6-14 years) having anaemia - Male .............................................................................","50.8","62.5","",""], ["Children (6-14 years) having severe anaemia - Male ..................................................................","3.7","3.6","",""], ["Children (6-14 years) having anaemia - Female .........................................................................","48.3","50.0","",""], ["Children (6-14 years) having severe anaemia - Female ..............................................................","4.3","6.1","",""], ["Children (10-19 Years15) having anaemia - Male .........................................................................","37.9","51.2","",""], ["Children (10-19 Years15) having severe anaemia - Male .............................................................","3.5","4.0","",""], ["Children (10-19 Years15) having anaemia - Female .....................................................................","46.6","52.1","",""], ["Children (10-19 Years15) having severe anaemia - Female .........................................................","6.4","6.5","",""], ["Adolescents (15-19 years) having anaemia ................................................................................","39.4","46.5","",""], ["Adolescents (15-19 years) having severe anaemia .....................................................................","5.4","5.1","",""], ["Pregnant women (15-49 aged) having anaemia ..........................................................................","48.8","51.5","",""], ["Pregnant women (15-49 aged) having severe anaemia ..............................................................","7.1","8.8","",""], ["Women (15-49 aged) having anaemia .........................................................................................","45.2","51.7","",""], ["Women (15-49 aged) having severe anaemia .............................................................................","4.8","5.9","",""], ["Persons (20 years and above) having anaemia ...........................................................................","37.8","42.1","",""], ["Persons (20 years and above) having Severe anaemia ..............................................................","4.6","4.8","",""], ["Blood Sugar Level (age 18 years and above) (%)","","","",""], ["Blood Sugar Level >140 mg/dl (high) ...........................................................................................","12.9","11.1","",""], ["Blood Sugar Level >160 mg/dl (very high) ...................................................................................","7.0","5.1","",""], ["Hypertension (age 18 years and above) (%)","","","",""], ["Above Normal Range (Systolic >140 mm of Hg & Diastolic >90 mm of Hg ) ..............................","23.8","22.8","",""], ["Moderately High (Systolic >160 mm of Hg & Diastolic >100 mm of Hg ) .....................................","8.2","7.1","",""], ["Very High (Systolic >180 mm of Hg & Diastolic >110 mm of Hg ) ...............................................","3.7","3.1","",""], ["14 Any anaemia below 11g/dl, severe anaemia below 7g/dl. 15 Excluding age group 19 years","","","",""], ["","Chronic Illness :Any person with symptoms persisting for longer than one month is defined as suffering from chronic illness","","",""], ["4","","","",""] ] pdfname = os.path.join(testdir, "missing_values.pdf") manager = Pdf(Stream(margins=(1.0, 0.5, 0.1)), pdfname, clean=True) tables = manager.extract() assert_equal(tables["page-1"]["table-1"]["data"], data) def test_stream_single_table_area(): data = [ ["","One Withholding"], ["Payroll Period","Allowance"], ["Weekly","$71.15"], ["Biweekly","142.31"], ["Semimonthly","154.17"], ["Monthly","308.33"], ["Quarterly","925.00"], ["Semiannually","1,850.00"], ["Annually","3,700.00"], ["Daily or Miscellaneous","14.23"], ["(each day of the payroll period)",""] ] pdfname = os.path.join(testdir, "tabula_test_pdfs/us-007.pdf") manager = Pdf(Stream(table_area=["320,500,573,335"], ytol=[10], margins=(1.0, 0.5, 0.1)), pdfname, pagenos=[{"start": 1, "end": 1}], clean=True) tables = manager.extract() assert_equal(tables["page-1"]["table-1"]["data"], data) def test_stream_columns(): data = [ ["Clave","Nombre Entidad","Clave","Nombre Municipio","Clave","Nombre Localidad"], ["Entidad","","Municipio","","Localidad",""], ["01","Aguascalientes","001","Aguascalientes","0094","Granja Adelita"], ["01","Aguascalientes","001","Aguascalientes","0096","Agua Azul"], ["01","Aguascalientes","001","Aguascalientes","0100","Rancho Alegre"], ["01","Aguascalientes","001","Aguascalientes","0102","Los Arbolitos [Rancho]"], ["01","Aguascalientes","001","Aguascalientes","0104","Ardillas de Abajo (Las Ardillas)"], ["01","Aguascalientes","001","Aguascalientes","0106","Arellano"], ["01","Aguascalientes","001","Aguascalientes","0112","Bajío los Vázquez"], ["01","Aguascalientes","001","Aguascalientes","0113","Bajío de Montoro"], ["01","Aguascalientes","001","Aguascalientes","0114","Residencial San Nicolás [Baños la Cantera]"], ["01","Aguascalientes","001","Aguascalientes","0120","Buenavista de Peñuelas"], ["01","Aguascalientes","001","Aguascalientes","0121","Cabecita 3 Marías (Rancho Nuevo)"], ["01","Aguascalientes","001","Aguascalientes","0125","Cañada Grande de Cotorina"], ["01","Aguascalientes","001","Aguascalientes","0126","Cañada Honda [Estación]"], ["01","Aguascalientes","001","Aguascalientes","0127","Los Caños"], ["01","Aguascalientes","001","Aguascalientes","0128","El Cariñán"], ["01","Aguascalientes","001","Aguascalientes","0129","El Carmen [Granja]"], ["01","Aguascalientes","001","Aguascalientes","0135","El Cedazo (Cedazo de San Antonio)"], ["01","Aguascalientes","001","Aguascalientes","0138","Centro de Arriba (El Taray)"], ["01","Aguascalientes","001","Aguascalientes","0139","Cieneguilla (La Lumbrera)"], ["01","Aguascalientes","001","Aguascalientes","0141","Cobos"], ["01","Aguascalientes","001","Aguascalientes","0144","El Colorado (El Soyatal)"], ["01","Aguascalientes","001","Aguascalientes","0146","El Conejal"], ["01","Aguascalientes","001","Aguascalientes","0157","Cotorina de Abajo"], ["01","Aguascalientes","001","Aguascalientes","0162","Coyotes"], ["01","Aguascalientes","001","Aguascalientes","0166","La Huerta (La Cruz)"], ["01","Aguascalientes","001","Aguascalientes","0170","Cuauhtémoc (Las Palomas)"], ["01","Aguascalientes","001","Aguascalientes","0171","Los Cuervos (Los Ojos de Agua)"], ["01","Aguascalientes","001","Aguascalientes","0172","San José [Granja]"], ["01","Aguascalientes","001","Aguascalientes","0176","La Chiripa"], ["01","Aguascalientes","001","Aguascalientes","0182","Dolores"], ["01","Aguascalientes","001","Aguascalientes","0183","Los Dolores"], ["01","Aguascalientes","001","Aguascalientes","0190","El Duraznillo"], ["01","Aguascalientes","001","Aguascalientes","0191","Los Durón"], ["01","Aguascalientes","001","Aguascalientes","0197","La Escondida"], ["01","Aguascalientes","001","Aguascalientes","0201","Brande Vin [Bodegas]"], ["01","Aguascalientes","001","Aguascalientes","0207","Valle Redondo"], ["01","Aguascalientes","001","Aguascalientes","0209","La Fortuna"], ["01","Aguascalientes","001","Aguascalientes","0212","Lomas del Gachupín"], ["01","Aguascalientes","001","Aguascalientes","0213","El Carmen (Gallinas Güeras) [Rancho]"], ["01","Aguascalientes","001","Aguascalientes","0216","La Gloria"], ["01","Aguascalientes","001","Aguascalientes","0226","Hacienda Nueva"], ] pdfname = os.path.join(testdir, "mexican_towns.pdf") manager = Pdf(Stream(columns=["28,67,180,230,425,475,700"], ytol=[10]), pdfname, clean=True) tables = manager.extract() assert_equal(tables["page-1"]["table-1"]["data"], data)