Add temporary directory context manager
parent
5116234bc7
commit
d3beaafc99
|
|
@ -1,11 +1,11 @@
|
|||
import os
|
||||
import tempfile
|
||||
|
||||
from PyPDF2 import PdfFileReader, PdfFileWriter
|
||||
|
||||
from .core import TableList, GeometryList
|
||||
from .parsers import Stream, Lattice
|
||||
from .utils import get_page_layout, get_text_objects, get_rotation
|
||||
from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
|
||||
get_rotation)
|
||||
|
||||
|
||||
class PDFHandler(object):
|
||||
|
|
@ -27,7 +27,6 @@ class PDFHandler(object):
|
|||
if not self.filename.endswith('.pdf'):
|
||||
raise TypeError("File format not supported.")
|
||||
self.pages = self._get_pages(self.filename, pages)
|
||||
self.tempdir = tempfile.mkdtemp()
|
||||
|
||||
def _get_pages(self, filename, pages):
|
||||
"""Converts pages string to list of ints.
|
||||
|
|
@ -130,15 +129,16 @@ class PDFHandler(object):
|
|||
found in pdf.
|
||||
|
||||
"""
|
||||
for p in self.pages:
|
||||
self._save_page(self.filename, p, self.tempdir)
|
||||
pages = [os.path.join(self.tempdir, 'page-{0}.pdf'.format(p))
|
||||
for p in self.pages]
|
||||
tables = []
|
||||
geometry = []
|
||||
parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
|
||||
for p in pages:
|
||||
t, g = parser.extract_tables(p)
|
||||
tables.extend(t)
|
||||
geometry.append(g)
|
||||
with TemporaryDirectory() as tempdir:
|
||||
for p in self.pages:
|
||||
self._save_page(self.filename, p, tempdir)
|
||||
pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
|
||||
for p in self.pages]
|
||||
parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
|
||||
for p in pages:
|
||||
t, g = parser.extract_tables(p)
|
||||
tables.extend(t)
|
||||
geometry.append(g)
|
||||
return TableList(tables), GeometryList(geometry)
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
from __future__ import division
|
||||
import os
|
||||
import shutil
|
||||
import logging
|
||||
import tempfile
|
||||
from itertools import groupby
|
||||
from operator import itemgetter
|
||||
|
||||
|
|
@ -18,6 +20,16 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
|
|||
LTTextLineVertical)
|
||||
|
||||
|
||||
# https://stackoverflow.com/a/22726782
|
||||
class TemporaryDirectory(object):
|
||||
def __enter__(self):
|
||||
self.name = tempfile.mkdtemp()
|
||||
return self.name
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
shutil.rmtree(self.name)
|
||||
|
||||
|
||||
def setup_logging(name):
|
||||
"""Sets up a logger with StreamHandler.
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue