Merge branch 'master' into hybrid-parser
commit
b43aca8ff5
|
|
@ -0,0 +1 @@
|
|||
open_collective: camelot
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
# .readthedocs.yml
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# Build documentation with MkDocs
|
||||
#mkdocs:
|
||||
# configuration: mkdocs.yml
|
||||
|
||||
# Optionally build your docs in additional formats such as PDF
|
||||
formats:
|
||||
- pdf
|
||||
|
||||
# Optionally set the version of Python and requirements required to build your docs
|
||||
python:
|
||||
version: 3.8
|
||||
install:
|
||||
- requirements: requirements.txt
|
||||
11
.travis.yml
11
.travis.yml
|
|
@ -7,10 +7,6 @@ install:
|
|||
- make install
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
python: '3.5'
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
|
|
@ -20,8 +16,13 @@ jobs:
|
|||
- make test
|
||||
python: '3.7'
|
||||
dist: xenial
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
python: '3.8'
|
||||
dist: xenial
|
||||
- stage: coverage
|
||||
python: '3.6'
|
||||
python: '3.8'
|
||||
script:
|
||||
- make test
|
||||
- codecov --verbose
|
||||
|
|
|
|||
12
HISTORY.md
12
HISTORY.md
|
|
@ -4,6 +4,18 @@ Release History
|
|||
master
|
||||
------
|
||||
|
||||
0.8.0 (2020-05-24)
|
||||
------------------
|
||||
|
||||
**Improvements**
|
||||
|
||||
* Drop Python 2 support!
|
||||
* Remove Python 2.7 and 3.5 support.
|
||||
* Replace all instances of `.format` with f-strings.
|
||||
* Remove all `__future__` imports.
|
||||
* Fix HTTP 403 forbidden exception in read_pdf(url) and remove Python 2 urllib support.
|
||||
* Fix test data.
|
||||
|
||||
**Bugfixes**
|
||||
|
||||
* Fix library discovery on Windows. [#32](https://github.com/camelot-dev/camelot/pull/32) by [KOLANICH](https://github.com/KOLANICH).
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
__all__ = ("main",)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
VERSION = (0, 7, 3)
|
||||
VERSION = (0, 8, 0)
|
||||
PRERELEASE = None # alpha, beta or rc
|
||||
REVISION = None
|
||||
|
||||
|
|
@ -8,9 +8,9 @@ REVISION = None
|
|||
def generate_version(version, prerelease=None, revision=None):
|
||||
version_parts = [".".join(map(str, version))]
|
||||
if prerelease is not None:
|
||||
version_parts.append("-{}".format(prerelease))
|
||||
version_parts.append(f"-{prerelease}")
|
||||
if revision is not None:
|
||||
version_parts.append(".{}".format(revision))
|
||||
version_parts.append(f".{revision}")
|
||||
return "".join(version_parts)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ def lattice(c, *args, **kwargs):
|
|||
filepath, pages=pages, flavor="lattice", suppress_stdout=quiet,
|
||||
**kwargs
|
||||
)
|
||||
click.echo("Found {} tables".format(tables.n))
|
||||
click.echo(f"Found {tables.n} tables")
|
||||
if plot_type is not None:
|
||||
for table in tables:
|
||||
plot(table, kind=plot_type)
|
||||
|
|
@ -304,7 +304,7 @@ def stream(c, *args, **kwargs):
|
|||
tables = read_pdf(
|
||||
filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
|
||||
)
|
||||
click.echo("Found {} tables".format(tables.n))
|
||||
click.echo(f"Found {tables.n} tables")
|
||||
if plot_type is not None:
|
||||
for table in tables:
|
||||
plot(table, kind=plot_type)
|
||||
|
|
@ -399,7 +399,7 @@ def network(c, *args, **kwargs):
|
|||
filepath, pages=pages, flavor="network",
|
||||
suppress_stdout=quiet, **kwargs
|
||||
)
|
||||
click.echo("Found {} tables".format(tables.n))
|
||||
click.echo(f"Found {tables.n} tables")
|
||||
if plot_type is not None:
|
||||
for table in tables:
|
||||
plot(table, kind=plot_type)
|
||||
|
|
|
|||
|
|
@ -68,12 +68,8 @@ class TextAlignment():
|
|||
def __repr__(self):
|
||||
text_inside = " | ".join(
|
||||
map(lambda x: x.get_text(), self.textlines[:2])).replace("\n", "")
|
||||
return "<TextEdge coord={coord} tl={tl_count} " \
|
||||
"textlines text='{text_inside}...'>".format(
|
||||
coord=self.coord,
|
||||
tl_count=len(self.textlines),
|
||||
text_inside=text_inside
|
||||
)
|
||||
return f"<TextEdge coord={self.coord} tl={len(self.textlines)} " \
|
||||
f"textlines text='{text_inside}...'>"
|
||||
|
||||
def register_aligned_textline(self, textline, coord):
|
||||
"""Update new textline to this alignment, adapting its average."""
|
||||
|
|
@ -116,13 +112,10 @@ class TextEdge(TextAlignment):
|
|||
self.is_valid = False
|
||||
|
||||
def __repr__(self):
|
||||
return "<TextEdge x={} y0={} y1={} align={} valid={}>".format(
|
||||
round(self.coord, 2),
|
||||
round(self.y0, 2),
|
||||
round(self.y1, 2),
|
||||
self.align,
|
||||
self.is_valid,
|
||||
)
|
||||
x = round(self.x, 2)
|
||||
y0 = round(self.y0, 2)
|
||||
y1 = round(self.y1, 2)
|
||||
return f"<TextEdge x={x} y0={y0} y1={y1} align={self.align} valid={self.is_valid}>"
|
||||
|
||||
def update_coords(self, x, textline, edge_tol=50):
|
||||
"""Updates the text edge's x and bottom y coordinates and sets
|
||||
|
|
@ -386,12 +379,11 @@ class Cell():
|
|||
self._text = ""
|
||||
|
||||
def __repr__(self):
|
||||
return "<Cell x1={} y1={} x2={} y2={}>".format(
|
||||
round(self.x1, 2),
|
||||
round(self.y1, 2),
|
||||
round(self.x2, 2),
|
||||
round(self.y2, 2)
|
||||
)
|
||||
x1 = round(self.x1, 2)
|
||||
y1 = round(self.y1, 2)
|
||||
x2 = round(self.x2, 2)
|
||||
y2 = round(self.y2, 2)
|
||||
return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
|
|
@ -465,7 +457,7 @@ class Table():
|
|||
self.textlines = [] # List of actual textlines on the page
|
||||
|
||||
def __repr__(self):
|
||||
return "<{} shape={}>".format(self.__class__.__name__, self.shape)
|
||||
return f"<{self.__class__.__name__} shape={self.shape}>"
|
||||
|
||||
def __lt__(self, other):
|
||||
if self.page == other.page:
|
||||
|
|
@ -739,7 +731,7 @@ class Table():
|
|||
|
||||
"""
|
||||
kw = {
|
||||
"sheet_name": "page-{}-table-{}".format(self.page, self.order),
|
||||
"sheet_name": f"page-{self.page}-table-{self.order}",
|
||||
"encoding": "utf-8",
|
||||
}
|
||||
kw.update(kwargs)
|
||||
|
|
@ -777,7 +769,7 @@ class Table():
|
|||
kw = {"if_exists": "replace", "index": False}
|
||||
kw.update(kwargs)
|
||||
conn = sqlite3.connect(path)
|
||||
table_name = "page-{}-table-{}".format(self.page, self.order)
|
||||
table_name = f"page-{self.page}-table-{self.order}"
|
||||
self.df.to_sql(table_name, conn, **kw)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
|
@ -831,7 +823,7 @@ class TableList():
|
|||
self._tables = tables
|
||||
|
||||
def __repr__(self):
|
||||
return "<{} n={}>".format(self.__class__.__name__, self.n)
|
||||
return f"<{self.__class__.__name__} n={self.n}>"
|
||||
|
||||
def __len__(self):
|
||||
return len(self._tables)
|
||||
|
|
@ -841,7 +833,7 @@ class TableList():
|
|||
|
||||
@staticmethod
|
||||
def _format_func(table, f):
|
||||
return getattr(table, "to_{}".format(f))
|
||||
return getattr(table, f"to_{f}")
|
||||
|
||||
@property
|
||||
def n(self):
|
||||
|
|
@ -852,10 +844,7 @@ class TableList():
|
|||
root = kwargs.get("root")
|
||||
ext = kwargs.get("ext")
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order,
|
||||
ext)
|
||||
)
|
||||
filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
|
||||
filepath = os.path.join(dirname, filename)
|
||||
to_format = self._format_func(table, f)
|
||||
to_format(filepath)
|
||||
|
|
@ -868,12 +857,7 @@ class TableList():
|
|||
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
||||
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root,
|
||||
table.page,
|
||||
table.order,
|
||||
ext)
|
||||
)
|
||||
filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
|
||||
filepath = os.path.join(dirname, filename)
|
||||
z.write(filepath, os.path.basename(filepath))
|
||||
|
||||
|
|
@ -907,9 +891,8 @@ class TableList():
|
|||
# pylint: disable=abstract-class-instantiated
|
||||
writer = pd.ExcelWriter(filepath)
|
||||
for table in self._tables:
|
||||
sheet_name = "page-{}-table-{}".format(table.page, table.order)
|
||||
table.df.to_excel(writer, sheet_name=sheet_name,
|
||||
encoding="utf-8")
|
||||
sheet_name = f"page-{table.page}-table-{table.order}"
|
||||
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
|
||||
writer.save()
|
||||
if compress:
|
||||
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ def delete_instance(instance):
|
|||
"""
|
||||
return libgs.gsapi_delete_instance(instance)
|
||||
|
||||
|
||||
if sys.platform == "win32":
|
||||
c_stdstream_call_t = WINFUNCTYPE(c_int, gs_main_instance, POINTER(c_char), c_int)
|
||||
else:
|
||||
|
|
@ -247,7 +248,10 @@ if sys.platform == "win32":
|
|||
libgs = __win32_finddll()
|
||||
if not libgs:
|
||||
import ctypes.util
|
||||
libgs = ctypes.util.find_library("".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))) # finds in %PATH%
|
||||
|
||||
libgs = ctypes.util.find_library(
|
||||
"".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))
|
||||
) # finds in %PATH%
|
||||
if not libgs:
|
||||
raise RuntimeError("Please make sure that Ghostscript is installed")
|
||||
libgs = windll.LoadLibrary(libgs)
|
||||
|
|
|
|||
|
|
@ -133,8 +133,7 @@ class PDFHandler():
|
|||
infile = PdfFileReader(fileobj, strict=False)
|
||||
if infile.isEncrypted:
|
||||
infile.decrypt(self.password)
|
||||
fpath = build_file_path_in_temp_dir(
|
||||
"page-{page}.pdf".format(page=page))
|
||||
fpath = build_file_path_in_temp_dir(f"page-{page}.pdf")
|
||||
froot, fext = os.path.splitext(fpath)
|
||||
p = infile.getPage(page - 1)
|
||||
outfile = PdfFileWriter()
|
||||
|
|
@ -211,8 +210,7 @@ class PDFHandler():
|
|||
page_idx, layout_kwargs)
|
||||
if not suppress_stdout:
|
||||
rootname = os.path.basename(parser.rootname)
|
||||
logger.info(
|
||||
"Processing {rootname}".format(rootname=rootname))
|
||||
logger.info(f"Processing {rootname}")
|
||||
t = parser.extract_tables()
|
||||
tables.extend(t)
|
||||
return TableList(sorted(tables))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import division
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import division
|
||||
import os
|
||||
|
||||
from .base import BaseParser
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import division
|
||||
|
||||
import warnings
|
||||
|
||||
from .base import TextBaseParser
|
||||
|
|
@ -167,8 +165,7 @@ class Stream(TextBaseParser):
|
|||
ncols = max(set(elements), key=elements.count)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found in table area {bbox}".format(
|
||||
bbox=bbox)
|
||||
f"No tables found in table area {bbox}"
|
||||
)
|
||||
cols = [
|
||||
(t.x0, t.x1)
|
||||
|
|
|
|||
|
|
@ -191,14 +191,11 @@ class PlotMethods():
|
|||
|
||||
if table.flavor == "lattice" and kind in ["textedge"]:
|
||||
raise NotImplementedError(
|
||||
"Lattice flavor does not support kind='{}'".format(kind)
|
||||
f"Lattice flavor does not support kind='{kind}'"
|
||||
)
|
||||
if table.flavor != "lattice" and kind in ["line"]:
|
||||
raise NotImplementedError(
|
||||
"{flavor} flavor does not support kind='{kind}'".format(
|
||||
flavor=table.flavor,
|
||||
kind=kind
|
||||
)
|
||||
f"{table.flavor} flavor does not support kind='{kind}'"
|
||||
)
|
||||
|
||||
plot_method = getattr(self, kind)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import division
|
||||
|
||||
import re
|
||||
import os
|
||||
import atexit
|
||||
import sys
|
||||
import re
|
||||
import random
|
||||
import shutil
|
||||
import string
|
||||
|
|
@ -34,18 +33,9 @@ from pdfminer.layout import (
|
|||
|
||||
from .ext.ghostscript import Ghostscript
|
||||
|
||||
# pylint: disable=import-error
|
||||
# PyLint will evaluate both branches, and will necessarily complain about one
|
||||
# of them.
|
||||
PY3 = sys.version_info[0] >= 3
|
||||
if PY3:
|
||||
from urllib.request import urlopen
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.parse import urlparse as parse_url
|
||||
from urllib.parse import uses_relative, uses_netloc, uses_params
|
||||
else:
|
||||
from urllib2 import urlopen
|
||||
from urlparse import urlparse as parse_url
|
||||
from urlparse import uses_relative, uses_netloc, uses_params
|
||||
|
||||
|
||||
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
|
||||
|
|
@ -95,14 +85,12 @@ def download_url(url):
|
|||
Temporary filepath.
|
||||
|
||||
"""
|
||||
filename = "{}.pdf".format(random_string(6))
|
||||
filename = f"{random_string(6)}.pdf"
|
||||
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
|
||||
req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
obj = urlopen(req)
|
||||
if PY3:
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
request = Request(url, None, headers)
|
||||
obj = urlopen(request)
|
||||
content_type = obj.info().get_content_type()
|
||||
else:
|
||||
content_type = obj.info().getheader("Content-Type")
|
||||
if content_type != "application/pdf":
|
||||
raise NotImplementedError("File format not supported")
|
||||
f.write(obj.read())
|
||||
|
|
@ -110,6 +98,7 @@ def download_url(url):
|
|||
shutil.move(f.name, filepath)
|
||||
return filepath
|
||||
|
||||
|
||||
common_kwargs = [
|
||||
"flag_size",
|
||||
"margins",
|
||||
|
|
@ -150,8 +139,7 @@ def validate_input(kwargs, flavor="lattice"):
|
|||
isec = set(kwargs.keys()).difference(set(parser_kwargs))
|
||||
if isec:
|
||||
raise ValueError(
|
||||
"{} cannot be used with flavor='{}'".format(
|
||||
",".join(sorted(isec)), flavor
|
||||
f"{",".join(sorted(isec))} cannot be used with flavor='{flavor}'"
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -763,7 +751,7 @@ def text_strip(text, strip=""):
|
|||
return text
|
||||
|
||||
stripped = re.sub(
|
||||
r"[{}]".format("".join(map(re.escape, strip))), "", text, re.UNICODE
|
||||
fr"[{''.join(map(re.escape, strip))}]", "", text, re.UNICODE
|
||||
)
|
||||
return stripped
|
||||
|
||||
|
|
@ -998,9 +986,7 @@ def get_table_index(
|
|||
text_range = (t.x0, t.x1)
|
||||
col_range = (table.cols[0][0], table.cols[-1][1])
|
||||
warnings.warn(
|
||||
"{} {} does not lie in column range {}".format(
|
||||
text, text_range, col_range
|
||||
)
|
||||
f"{text} {text_range} does not lie in column range {col_range}"
|
||||
)
|
||||
r_idx = r
|
||||
c_idx = lt_col_overlap.index(max(lt_col_overlap))
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
chardet>=3.0.4
|
||||
click>=6.7
|
||||
matplotlib>=2.2.3
|
||||
numpy>=1.13.3
|
||||
|
|
@ -6,3 +7,4 @@ openpyxl>=2.5.8
|
|||
pandas>=0.23.4
|
||||
pdfminer.six>=20200402
|
||||
PyPDF2>=1.26.0
|
||||
Sphinx>=1.7.9
|
||||
|
|
|
|||
4
setup.py
4
setup.py
|
|
@ -71,9 +71,9 @@ def setup_package():
|
|||
# Trove classifiers
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7'
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8'
|
||||
])
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,2 +1,3 @@
|
|||
import matplotlib
|
||||
matplotlib.use('agg')
|
||||
|
||||
matplotlib.use("agg")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
data_stream = [
|
||||
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
|
||||
|
|
|
|||
|
|
@ -140,10 +140,11 @@ def test_cli_password():
|
|||
def test_cli_output_format():
|
||||
with TemporaryDirectory() as tempdir:
|
||||
infile = os.path.join(testdir, "health.pdf")
|
||||
outfile = os.path.join(tempdir, "health.{}")
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
# json
|
||||
outfile = os.path.join(tempdir, "health.json")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "json", "--output", outfile.format("json"), "stream",
|
||||
|
|
@ -152,6 +153,7 @@ def test_cli_output_format():
|
|||
assert result.exit_code == 0
|
||||
|
||||
# excel
|
||||
outfile = os.path.join(tempdir, "health.xlsx")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream",
|
||||
|
|
@ -160,6 +162,7 @@ def test_cli_output_format():
|
|||
assert result.exit_code == 0
|
||||
|
||||
# html
|
||||
outfile = os.path.join(tempdir, "health.html")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "html", "--output", outfile.format("html"), "stream",
|
||||
|
|
@ -168,6 +171,7 @@ def test_cli_output_format():
|
|||
assert result.exit_code == 0
|
||||
|
||||
# zip
|
||||
outfile = os.path.join(tempdir, "health.csv")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
[
|
||||
|
|
@ -175,7 +179,7 @@ def test_cli_output_format():
|
|||
"--format",
|
||||
"csv",
|
||||
"--output",
|
||||
outfile.format("csv"),
|
||||
outfile,
|
||||
"stream",
|
||||
infile,
|
||||
],
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import camelot
|
|||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
filename = os.path.join(testdir, 'foo.pdf')
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
|
||||
|
||||
def test_unknown_flavor():
|
||||
|
|
@ -27,15 +27,14 @@ def test_input_kwargs():
|
|||
|
||||
|
||||
def test_unsupported_format():
|
||||
message = 'File format not supported'
|
||||
filename = os.path.join(testdir, 'foo.csv')
|
||||
message = "File format not supported"
|
||||
filename = os.path.join(testdir, "foo.csv")
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
camelot.read_pdf(filename)
|
||||
|
||||
|
||||
def test_stream_equal_length():
|
||||
message = ("Length of table_areas and columns"
|
||||
" should be equal")
|
||||
message = "Length of table_areas and columns" " should be equal"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
camelot.read_pdf(
|
||||
filename,
|
||||
|
|
@ -46,9 +45,9 @@ def test_stream_equal_length():
|
|||
|
||||
|
||||
def test_image_warning():
|
||||
filename = os.path.join(testdir, 'image.pdf')
|
||||
filename = os.path.join(testdir, "image.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('error')
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
camelot.read_pdf(filename)
|
||||
assert str(e.value) == 'page-1 is image-based, camelot only works ' \
|
||||
|
|
@ -56,47 +55,47 @@ def test_image_warning():
|
|||
|
||||
|
||||
def test_no_tables_found():
|
||||
filename = os.path.join(testdir, 'blank.pdf')
|
||||
filename = os.path.join(testdir, "blank.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('error')
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
camelot.read_pdf(filename)
|
||||
assert str(e.value) == 'No tables found on page-1'
|
||||
|
||||
|
||||
def test_no_tables_found_logs_suppressed():
|
||||
filename = os.path.join(testdir, 'foo.pdf')
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
with warnings.catch_warnings():
|
||||
# the test should fail if any warning is thrown
|
||||
warnings.simplefilter('error')
|
||||
warnings.simplefilter("error")
|
||||
try:
|
||||
camelot.read_pdf(filename, suppress_stdout=True)
|
||||
except Warning as e:
|
||||
warning_text = str(e)
|
||||
pytest.fail('Unexpected warning: {}'.format(warning_text))
|
||||
pytest.fail(f"Unexpected warning: {warning_text}")
|
||||
|
||||
|
||||
def test_no_tables_found_warnings_suppressed():
|
||||
filename = os.path.join(testdir, 'blank.pdf')
|
||||
filename = os.path.join(testdir, "blank.pdf")
|
||||
with warnings.catch_warnings():
|
||||
# the test should fail if any warning is thrown
|
||||
warnings.simplefilter('error')
|
||||
warnings.simplefilter("error")
|
||||
try:
|
||||
camelot.read_pdf(filename, suppress_stdout=True)
|
||||
except Warning as e:
|
||||
warning_text = str(e)
|
||||
pytest.fail('Unexpected warning: {}'.format(warning_text))
|
||||
pytest.fail(f"Unexpected warning: {warning_text}")
|
||||
|
||||
|
||||
def test_no_password():
|
||||
filename = os.path.join(testdir, 'health_protected.pdf')
|
||||
message = 'file has not been decrypted'
|
||||
filename = os.path.join(testdir, "health_protected.pdf")
|
||||
message = "file has not been decrypted"
|
||||
with pytest.raises(Exception, match=message):
|
||||
camelot.read_pdf(filename)
|
||||
|
||||
|
||||
def test_bad_password():
|
||||
filename = os.path.join(testdir, 'health_protected.pdf')
|
||||
message = 'file has not been decrypted'
|
||||
filename = os.path.join(testdir, "health_protected.pdf")
|
||||
message = "file has not been decrypted"
|
||||
with pytest.raises(Exception, match=message):
|
||||
camelot.read_pdf(filename, password='wrongpass')
|
||||
|
|
|
|||
Loading…
Reference in New Issue