diff --git a/camelot/handlers.py b/camelot/handlers.py index 8585432..516cc3b 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -1,11 +1,11 @@ import os -import tempfile from PyPDF2 import PdfFileReader, PdfFileWriter from .core import TableList, GeometryList from .parsers import Stream, Lattice -from .utils import get_page_layout, get_text_objects, get_rotation +from .utils import (TemporaryDirectory, get_page_layout, get_text_objects, + get_rotation) class PDFHandler(object): @@ -27,7 +27,6 @@ class PDFHandler(object): if not self.filename.endswith('.pdf'): raise TypeError("File format not supported.") self.pages = self._get_pages(self.filename, pages) - self.tempdir = tempfile.mkdtemp() def _get_pages(self, filename, pages): """Converts pages string to list of ints. @@ -130,15 +129,16 @@ class PDFHandler(object): found in pdf. """ - for p in self.pages: - self._save_page(self.filename, p, self.tempdir) - pages = [os.path.join(self.tempdir, 'page-{0}.pdf'.format(p)) - for p in self.pages] tables = [] geometry = [] - parser = Stream(**kwargs) if not mesh else Lattice(**kwargs) - for p in pages: - t, g = parser.extract_tables(p) - tables.extend(t) - geometry.append(g) + with TemporaryDirectory() as tempdir: + for p in self.pages: + self._save_page(self.filename, p, tempdir) + pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p)) + for p in self.pages] + parser = Stream(**kwargs) if not mesh else Lattice(**kwargs) + for p in pages: + t, g = parser.extract_tables(p) + tables.extend(t) + geometry.append(g) return TableList(tables), GeometryList(geometry) \ No newline at end of file diff --git a/camelot/utils.py b/camelot/utils.py index 3e87fe5..6c29410 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -1,6 +1,8 @@ from __future__ import division import os +import shutil import logging +import tempfile from itertools import groupby from operator import itemgetter @@ -18,6 +20,16 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal, LTTextLineVertical) +# https://stackoverflow.com/a/22726782 +class TemporaryDirectory(object): + def __enter__(self): + self.name = tempfile.mkdtemp() + return self.name + + def __exit__(self, exc_type, exc_value, traceback): + shutil.rmtree(self.name) + + def setup_logging(name): """Sets up a logger with StreamHandler.