Add temporary directory context manager

pull/2/head
Vinayak Mehta 2018-09-09 18:10:55 +05:30
parent 5116234bc7
commit d3beaafc99
2 changed files with 24 additions and 12 deletions

View File

@ -1,11 +1,11 @@
import os
import tempfile
from PyPDF2 import PdfFileReader, PdfFileWriter
from .core import TableList, GeometryList
from .parsers import Stream, Lattice
from .utils import get_page_layout, get_text_objects, get_rotation
from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
get_rotation)
class PDFHandler(object):
@ -27,7 +27,6 @@ class PDFHandler(object):
if not self.filename.endswith('.pdf'):
raise TypeError("File format not supported.")
self.pages = self._get_pages(self.filename, pages)
self.tempdir = tempfile.mkdtemp()
def _get_pages(self, filename, pages):
"""Converts pages string to list of ints.
@ -130,15 +129,16 @@ class PDFHandler(object):
found in pdf.
"""
for p in self.pages:
self._save_page(self.filename, p, self.tempdir)
pages = [os.path.join(self.tempdir, 'page-{0}.pdf'.format(p))
for p in self.pages]
tables = []
geometry = []
parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
for p in pages:
t, g = parser.extract_tables(p)
tables.extend(t)
geometry.append(g)
with TemporaryDirectory() as tempdir:
for p in self.pages:
self._save_page(self.filename, p, tempdir)
pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
for p in self.pages]
parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
for p in pages:
t, g = parser.extract_tables(p)
tables.extend(t)
geometry.append(g)
return TableList(tables), GeometryList(geometry)

View File

@ -1,6 +1,8 @@
from __future__ import division
import os
import shutil
import logging
import tempfile
from itertools import groupby
from operator import itemgetter
@ -18,6 +20,16 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
LTTextLineVertical)
# https://stackoverflow.com/a/22726782
class TemporaryDirectory(object):
def __enter__(self):
self.name = tempfile.mkdtemp()
return self.name
def __exit__(self, exc_type, exc_value, traceback):
shutil.rmtree(self.name)
def setup_logging(name):
"""Sets up a logger with StreamHandler.