Add temporary directory context manager

pull/2/head
Vinayak Mehta 2018-09-09 18:10:55 +05:30
parent 5116234bc7
commit d3beaafc99
2 changed files with 24 additions and 12 deletions

View File

@ -1,11 +1,11 @@
import os import os
import tempfile
from PyPDF2 import PdfFileReader, PdfFileWriter from PyPDF2 import PdfFileReader, PdfFileWriter
from .core import TableList, GeometryList from .core import TableList, GeometryList
from .parsers import Stream, Lattice from .parsers import Stream, Lattice
from .utils import get_page_layout, get_text_objects, get_rotation from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
get_rotation)
class PDFHandler(object): class PDFHandler(object):
@ -27,7 +27,6 @@ class PDFHandler(object):
if not self.filename.endswith('.pdf'): if not self.filename.endswith('.pdf'):
raise TypeError("File format not supported.") raise TypeError("File format not supported.")
self.pages = self._get_pages(self.filename, pages) self.pages = self._get_pages(self.filename, pages)
self.tempdir = tempfile.mkdtemp()
def _get_pages(self, filename, pages): def _get_pages(self, filename, pages):
"""Converts pages string to list of ints. """Converts pages string to list of ints.
@ -130,12 +129,13 @@ class PDFHandler(object):
found in pdf. found in pdf.
""" """
for p in self.pages:
self._save_page(self.filename, p, self.tempdir)
pages = [os.path.join(self.tempdir, 'page-{0}.pdf'.format(p))
for p in self.pages]
tables = [] tables = []
geometry = [] geometry = []
with TemporaryDirectory() as tempdir:
for p in self.pages:
self._save_page(self.filename, p, tempdir)
pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
for p in self.pages]
parser = Stream(**kwargs) if not mesh else Lattice(**kwargs) parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
for p in pages: for p in pages:
t, g = parser.extract_tables(p) t, g = parser.extract_tables(p)

View File

@ -1,6 +1,8 @@
from __future__ import division from __future__ import division
import os import os
import shutil
import logging import logging
import tempfile
from itertools import groupby from itertools import groupby
from operator import itemgetter from operator import itemgetter
@ -18,6 +20,16 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
LTTextLineVertical) LTTextLineVertical)
# https://stackoverflow.com/a/22726782
class TemporaryDirectory(object):
def __enter__(self):
self.name = tempfile.mkdtemp()
return self.name
def __exit__(self, exc_type, exc_value, traceback):
shutil.rmtree(self.name)
def setup_logging(name): def setup_logging(name):
"""Sets up a logger with StreamHandler. """Sets up a logger with StreamHandler.