{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Common import and setup\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "'/Users/francoishuet/Code/camelot/camelot/__init__.py'" }, "metadata": {}, "execution_count": 1 } ], "source": [ "import os, sys, time, pytest\n", "\n", "import matplotlib.pyplot as plt\n", "from matplotlib import patches, lines\n", "import numpy as np\n", "import pandas as pd\n", "from pandas.testing import assert_frame_equal\n", "\n", "import pdfminer\n", "\n", "from IPython.display import display\n", "\n", "# Make sure we use the local version of camelot if it is here\n", "sys.path.insert(0, os.path.abspath(''))\n", "\n", "import camelot\n", "from camelot.core import Table, TableList, TextEdges\n", "from camelot.__version__ import generate_version\n", "from camelot.utils import get_text_objects, text_in_bbox\n", "from camelot.parsers.stream import Stream\n", "from camelot.parsers.hybrid import Hybrid\n", "from camelot.handlers import PDFHandler\n", "from camelot.plotting import draw_pdf\n", "from tests.data import *\n", "\n", "testdir = os.path.dirname(os.path.abspath('.'))\n", "testdir = os.path.join(testdir, \"camelot/tests/files\")\n", "\n", "# Set up plots to be large enough for visualization\n", "\n", "# To check which library we're using\n", "camelot.__file__\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "kwargs = {}\n", "data = None\n", "pdf_file = \"vertical_header.pdf\" # test_hybrid_vertical_header\n", "# pdf_file, kwargs, data = \"superscript.pdf\", {\"flag_size\": True}, data_stream_flag_size # test_hybrid_flag_size\n", "# pdf_file = \"health.pdf\" # test_hybrid\n", "# pdf_file = \"clockwise_table_2.pdf\"\n", "# pdf_file = \"tabula/12s0324.pdf\" # interesting because contains two separate tables\n", "# pdf_file, kwargs = \"tabula/us-007.pdf\", {\"table_regions\": [\"320,335,573,505\"]} # test_hybrid_table_regions\n", "# pdf_file, kwargs = \"detect_vertical_false.pdf\", {\"strip_text\": \" ,\\n\"} # data_stream_strip_text\n", "# pdf_file, kwargs, data = \"tabula/m27.pdf\", {\"columns\": [\"72,95,209,327,442,529,566,606,683\"], \"split_text\": True, }, data_stream_split_text # data_stream_split_text\n", "# pdf_file = \"vertical_header.pdf\"\n", "# pdf_file = \"twotables_2.pdf\"\n", "# pdf_file = \"camelot-issue-132-multiple-tables.pdf\"\n", "# pdf_file, kwargs, data = \"edge_tol.pdf\", {\"edge_tol\": 500}, data_stream_edge_tol\n", "# pdf_file, kwargs, data = \"edge_tol.pdf\", {}, data_stream_edge_tol\n", "\n", "filename = os.path.join(testdir, pdf_file)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": "Found 1 tables (37x2) in 0.21s\n" }, { "output_type": "display_data", "data": { "text/plain": " 0 1\n0 VinsauVerre \n1 LesBlancs 12.5CL\n2 A.O.PCôtesduRhône \n3 DomainedelaGuicharde«Autourdelachapelle»2016 8€\n4 A.O.PVacqueyras \n5 DomainedeMontvac«Melodine»2016 10€\n6 A.O.PChâteauneufduPape \n7 DomainedeBeaurenard2017 13€\n8 A.O.PCôteauxduLanguedoc \n9 VillaTempora«Untempspourelle»2014 9€\n10 A.O.PCôtesdeProvence \n11 ChâteauGrandBoise2017 9€\n12 LesRosés 125CL\n13 A.O.PCôtesduRhône \n14 DomainedelaFlorane«AfleurdePampre»2016 8€\n15 FamilleCoulon(DomaineBeaurenard)Biotifulfox2017 8€\n16 A.O.PVacqueyras \n17 DomainedeMontvac2017 9€\n18 A.O.PLanguedoc \n19 DomainedeJoncas«Nébla»2015 8€\n20 VillaTempora«L’arroseurarrosé»2015 9€\n21 A.O.PCôtesdeProvence \n22 ChâteauGrandBoise«SainteVictoire»2017 9€\n23 ChâteauLéoube2016 10€\n24 LesRouges 12CL\n25 A.O.PCôtesduRhône \n26 DomainedeDionysos«LaCigalette» 8€\n27 ChâteauSaintEstèved’Uchaux«GrandeRéserve»2014 9€\n28 DomainedelaGuicharde«CuvéeMassillan»2016 9€\n29 DomainedelaFlorane«TerrePourpre»2014 10€\n30 L’OratoireStMartin«RéservedesSeigneurs»2015 11€\n31 A.O.PSaintJoseph \n32 DomaineMonierPerréol«Châtelet»2015 13€\n33 A.O.PChâteauneufduPape \n34 DomainedeBeaurenard2011 15€\n35 A.O.PCornas \n36 DomaineLionnet«TerreBrûlée»2012 15€", "text/html": "
| \n | 0 | \n1 | \n
|---|---|---|
| 0 | \nVinsauVerre | \n\n |
| 1 | \nLesBlancs | \n12.5CL | \n
| 2 | \nA.O.PCôtesduRhône | \n\n |
| 3 | \nDomainedelaGuicharde«Autourdelachapelle»2016 | \n8€ | \n
| 4 | \nA.O.PVacqueyras | \n\n |
| 5 | \nDomainedeMontvac«Melodine»2016 | \n10€ | \n
| 6 | \nA.O.PChâteauneufduPape | \n\n |
| 7 | \nDomainedeBeaurenard2017 | \n13€ | \n
| 8 | \nA.O.PCôteauxduLanguedoc | \n\n |
| 9 | \nVillaTempora«Untempspourelle»2014 | \n9€ | \n
| 10 | \nA.O.PCôtesdeProvence | \n\n |
| 11 | \nChâteauGrandBoise2017 | \n9€ | \n
| 12 | \nLesRosés | \n125CL | \n
| 13 | \nA.O.PCôtesduRhône | \n\n |
| 14 | \nDomainedelaFlorane«AfleurdePampre»2016 | \n8€ | \n
| 15 | \nFamilleCoulon(DomaineBeaurenard)Biotifulfox2017 | \n8€ | \n
| 16 | \nA.O.PVacqueyras | \n\n |
| 17 | \nDomainedeMontvac2017 | \n9€ | \n
| 18 | \nA.O.PLanguedoc | \n\n |
| 19 | \nDomainedeJoncas«Nébla»2015 | \n8€ | \n
| 20 | \nVillaTempora«L’arroseurarrosé»2015 | \n9€ | \n
| 21 | \nA.O.PCôtesdeProvence | \n\n |
| 22 | \nChâteauGrandBoise«SainteVictoire»2017 | \n9€ | \n
| 23 | \nChâteauLéoube2016 | \n10€ | \n
| 24 | \nLesRouges | \n12CL | \n
| 25 | \nA.O.PCôtesduRhône | \n\n |
| 26 | \nDomainedeDionysos«LaCigalette» | \n8€ | \n
| 27 | \nChâteauSaintEstèved’Uchaux«GrandeRéserve»2014 | \n9€ | \n
| 28 | \nDomainedelaGuicharde«CuvéeMassillan»2016 | \n9€ | \n
| 29 | \nDomainedelaFlorane«TerrePourpre»2014 | \n10€ | \n
| 30 | \nL’OratoireStMartin«RéservedesSeigneurs»2015 | \n11€ | \n
| 31 | \nA.O.PSaintJoseph | \n\n |
| 32 | \nDomaineMonierPerréol«Châtelet»2015 | \n13€ | \n
| 33 | \nA.O.PChâteauneufduPape | \n\n |
| 34 | \nDomainedeBeaurenard2011 | \n15€ | \n
| 35 | \nA.O.PCornas | \n\n |
| 36 | \nDomaineLionnet«TerreBrûlée»2012 | \n15€ | \n