Add test for page list generation and fix backend kwarg
parent
8650f25331
commit
cc820b9e5d
|
|
@ -47,9 +47,9 @@ class PDFHandler(object):
|
|||
self.password = password
|
||||
if sys.version_info[0] < 3:
|
||||
self.password = self.password.encode("ascii")
|
||||
self.pages = self._get_pages(self.filepath, pages)
|
||||
self.pages = self._get_pages(pages)
|
||||
|
||||
def _get_pages(self, filepath, pages):
|
||||
def _get_pages(self, pages):
|
||||
"""Converts pages string to list of ints.
|
||||
|
||||
Parameters
|
||||
|
|
@ -67,13 +67,16 @@ class PDFHandler(object):
|
|||
|
||||
"""
|
||||
page_numbers = []
|
||||
|
||||
if pages == "1":
|
||||
page_numbers.append({"start": 1, "end": 1})
|
||||
else:
|
||||
instream = open(filepath, "rb")
|
||||
infile = PdfFileReader(instream, strict=False)
|
||||
with open(self.filepath, "rb") as f:
|
||||
infile = PdfFileReader(f, strict=False)
|
||||
|
||||
if infile.isEncrypted:
|
||||
infile.decrypt(self.password)
|
||||
|
||||
if pages == "all":
|
||||
page_numbers.append({"start": 1, "end": infile.getNumPages()})
|
||||
else:
|
||||
|
|
@ -85,7 +88,7 @@ class PDFHandler(object):
|
|||
page_numbers.append({"start": int(a), "end": int(b)})
|
||||
else:
|
||||
page_numbers.append({"start": int(r), "end": int(r)})
|
||||
instream.close()
|
||||
|
||||
P = []
|
||||
for p in page_numbers:
|
||||
P.extend(range(p["start"], p["end"] + 1))
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import pandas as pd
|
|||
from pandas.testing import assert_frame_equal
|
||||
|
||||
import camelot
|
||||
from camelot.io import PDFHandler
|
||||
from camelot.core import Table, TableList
|
||||
from camelot.__version__ import generate_version
|
||||
from camelot.backends import ImageConversionBackend
|
||||
|
|
@ -60,9 +61,7 @@ def test_password():
|
|||
|
||||
def test_repr_poppler():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(
|
||||
filename, backend=ImageConversionBackend(backend="poppler", use_fallback=False)
|
||||
)
|
||||
tables = camelot.read_pdf(filename, backend="poppler")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||
|
|
@ -71,10 +70,7 @@ def test_repr_poppler():
|
|||
@skip_on_windows
|
||||
def test_repr_ghostscript():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(
|
||||
filename,
|
||||
backend=ImageConversionBackend(backend="ghostscript", use_fallback=False),
|
||||
)
|
||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||
|
|
@ -82,9 +78,7 @@ def test_repr_ghostscript():
|
|||
|
||||
def test_url_poppler():
|
||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||
tables = camelot.read_pdf(
|
||||
url, backend=ImageConversionBackend(backend="poppler", use_fallback=False)
|
||||
)
|
||||
tables = camelot.read_pdf(url, backend="poppler")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||
|
|
@ -93,9 +87,7 @@ def test_url_poppler():
|
|||
@skip_on_windows
|
||||
def test_url_ghostscript():
|
||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||
tables = camelot.read_pdf(
|
||||
url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False)
|
||||
)
|
||||
tables = camelot.read_pdf(url, backend="ghostscript")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||
|
|
@ -103,27 +95,17 @@ def test_url_ghostscript():
|
|||
|
||||
def test_pages_poppler():
|
||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||
tables = camelot.read_pdf(
|
||||
url, backend=ImageConversionBackend(backend="poppler", use_fallback=False)
|
||||
)
|
||||
tables = camelot.read_pdf(url, backend="poppler")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||
|
||||
tables = camelot.read_pdf(
|
||||
url,
|
||||
pages="1-end",
|
||||
backend=ImageConversionBackend(backend="poppler", use_fallback=False),
|
||||
)
|
||||
tables = camelot.read_pdf(url, pages="1-end", backend="poppler")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||
|
||||
tables = camelot.read_pdf(
|
||||
url,
|
||||
pages="all",
|
||||
backend=ImageConversionBackend(backend="poppler", use_fallback=False),
|
||||
)
|
||||
tables = camelot.read_pdf(url, pages="all", backend="poppler")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||
|
|
@ -132,27 +114,17 @@ def test_pages_poppler():
|
|||
@skip_on_windows
|
||||
def test_pages_ghostscript():
|
||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||
tables = camelot.read_pdf(
|
||||
url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False)
|
||||
)
|
||||
tables = camelot.read_pdf(url, backend="ghostscript")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||
|
||||
tables = camelot.read_pdf(
|
||||
url,
|
||||
pages="1-end",
|
||||
backend=ImageConversionBackend(backend="ghostscript", use_fallback=False),
|
||||
)
|
||||
tables = camelot.read_pdf(url, pages="1-end", backend="ghostscript")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||
|
||||
tables = camelot.read_pdf(
|
||||
url,
|
||||
pages="all",
|
||||
backend=ImageConversionBackend(backend="ghostscript", use_fallback=False),
|
||||
)
|
||||
tables = camelot.read_pdf(url, pages="all", backend="ghostscript")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||
|
|
@ -181,3 +153,22 @@ def test_table_order():
|
|||
(1, 2),
|
||||
(1, 1),
|
||||
]
|
||||
|
||||
|
||||
def test_handler_pages_generator():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
|
||||
handler = PDFHandler(filename)
|
||||
assert handler._get_pages("1") == [1]
|
||||
|
||||
handler = PDFHandler(filename)
|
||||
assert handler._get_pages("all") == [1]
|
||||
|
||||
handler = PDFHandler(filename)
|
||||
assert handler._get_pages("1-end") == [1]
|
||||
|
||||
handler = PDFHandler(filename)
|
||||
assert handler._get_pages("1,2,3,4") == [1, 2, 3, 4]
|
||||
|
||||
handler = PDFHandler(filename)
|
||||
assert handler._get_pages("1,2,5-10") == [1, 2, 5, 6, 7, 8, 9, 10]
|
||||
|
|
|
|||
Loading…
Reference in New Issue