Merge branch 'master' into hybrid-parser

pull/153/head
Frh 2020-06-14 08:53:43 -07:00
commit b43aca8ff5
21 changed files with 123 additions and 121 deletions

1
.github/FUNDING.yml vendored 100644
View File

@ -0,0 +1 @@
open_collective: camelot

24
.readthedocs.yml 100644
View File

@ -0,0 +1,24 @@
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# Build documentation with MkDocs
#mkdocs:
# configuration: mkdocs.yml
# Optionally build your docs in additional formats such as PDF
formats:
- pdf
# Optionally set the version of Python and requirements required to build your docs
python:
version: 3.8
install:
- requirements: requirements.txt

View File

@ -7,10 +7,6 @@ install:
- make install - make install
jobs: jobs:
include: include:
- stage: test
script:
- make test
python: '3.5'
- stage: test - stage: test
script: script:
- make test - make test
@ -20,8 +16,13 @@ jobs:
- make test - make test
python: '3.7' python: '3.7'
dist: xenial dist: xenial
- stage: test
script:
- make test
python: '3.8'
dist: xenial
- stage: coverage - stage: coverage
python: '3.6' python: '3.8'
script: script:
- make test - make test
- codecov --verbose - codecov --verbose

View File

@ -4,6 +4,18 @@ Release History
master master
------ ------
0.8.0 (2020-05-24)
------------------
**Improvements**
* Drop Python 2 support!
* Remove Python 2.7 and 3.5 support.
* Replace all instances of `.format` with f-strings.
* Remove all `__future__` imports.
* Fix HTTP 403 forbidden exception in read_pdf(url) and remove Python 2 urllib support.
* Fix test data.
**Bugfixes** **Bugfixes**
* Fix library discovery on Windows. [#32](https://github.com/camelot-dev/camelot/pull/32) by [KOLANICH](https://github.com/KOLANICH). * Fix library discovery on Windows. [#32](https://github.com/camelot-dev/camelot/pull/32) by [KOLANICH](https://github.com/KOLANICH).

View File

@ -1,7 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import
__all__ = ("main",) __all__ = ("main",)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
VERSION = (0, 7, 3) VERSION = (0, 8, 0)
PRERELEASE = None # alpha, beta or rc PRERELEASE = None # alpha, beta or rc
REVISION = None REVISION = None
@ -8,9 +8,9 @@ REVISION = None
def generate_version(version, prerelease=None, revision=None): def generate_version(version, prerelease=None, revision=None):
version_parts = [".".join(map(str, version))] version_parts = [".".join(map(str, version))]
if prerelease is not None: if prerelease is not None:
version_parts.append("-{}".format(prerelease)) version_parts.append(f"-{prerelease}")
if revision is not None: if revision is not None:
version_parts.append(".{}".format(revision)) version_parts.append(f".{revision}")
return "".join(version_parts) return "".join(version_parts)

View File

@ -210,7 +210,7 @@ def lattice(c, *args, **kwargs):
filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, filepath, pages=pages, flavor="lattice", suppress_stdout=quiet,
**kwargs **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)
@ -304,7 +304,7 @@ def stream(c, *args, **kwargs):
tables = read_pdf( tables = read_pdf(
filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)
@ -399,7 +399,7 @@ def network(c, *args, **kwargs):
filepath, pages=pages, flavor="network", filepath, pages=pages, flavor="network",
suppress_stdout=quiet, **kwargs suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)

View File

@ -68,12 +68,8 @@ class TextAlignment():
def __repr__(self): def __repr__(self):
text_inside = " | ".join( text_inside = " | ".join(
map(lambda x: x.get_text(), self.textlines[:2])).replace("\n", "") map(lambda x: x.get_text(), self.textlines[:2])).replace("\n", "")
return "<TextEdge coord={coord} tl={tl_count} " \ return f"<TextEdge coord={self.coord} tl={len(self.textlines)} " \
"textlines text='{text_inside}...'>".format( f"textlines text='{text_inside}...'>"
coord=self.coord,
tl_count=len(self.textlines),
text_inside=text_inside
)
def register_aligned_textline(self, textline, coord): def register_aligned_textline(self, textline, coord):
"""Update new textline to this alignment, adapting its average.""" """Update new textline to this alignment, adapting its average."""
@ -116,13 +112,10 @@ class TextEdge(TextAlignment):
self.is_valid = False self.is_valid = False
def __repr__(self): def __repr__(self):
return "<TextEdge x={} y0={} y1={} align={} valid={}>".format( x = round(self.x, 2)
round(self.coord, 2), y0 = round(self.y0, 2)
round(self.y0, 2), y1 = round(self.y1, 2)
round(self.y1, 2), return f"<TextEdge x={x} y0={y0} y1={y1} align={self.align} valid={self.is_valid}>"
self.align,
self.is_valid,
)
def update_coords(self, x, textline, edge_tol=50): def update_coords(self, x, textline, edge_tol=50):
"""Updates the text edge's x and bottom y coordinates and sets """Updates the text edge's x and bottom y coordinates and sets
@ -386,12 +379,11 @@ class Cell():
self._text = "" self._text = ""
def __repr__(self): def __repr__(self):
return "<Cell x1={} y1={} x2={} y2={}>".format( x1 = round(self.x1, 2)
round(self.x1, 2), y1 = round(self.y1, 2)
round(self.y1, 2), x2 = round(self.x2, 2)
round(self.x2, 2), y2 = round(self.y2, 2)
round(self.y2, 2) return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
)
@property @property
def text(self): def text(self):
@ -465,7 +457,7 @@ class Table():
self.textlines = [] # List of actual textlines on the page self.textlines = [] # List of actual textlines on the page
def __repr__(self): def __repr__(self):
return "<{} shape={}>".format(self.__class__.__name__, self.shape) return f"<{self.__class__.__name__} shape={self.shape}>"
def __lt__(self, other): def __lt__(self, other):
if self.page == other.page: if self.page == other.page:
@ -739,7 +731,7 @@ class Table():
""" """
kw = { kw = {
"sheet_name": "page-{}-table-{}".format(self.page, self.order), "sheet_name": f"page-{self.page}-table-{self.order}",
"encoding": "utf-8", "encoding": "utf-8",
} }
kw.update(kwargs) kw.update(kwargs)
@ -777,7 +769,7 @@ class Table():
kw = {"if_exists": "replace", "index": False} kw = {"if_exists": "replace", "index": False}
kw.update(kwargs) kw.update(kwargs)
conn = sqlite3.connect(path) conn = sqlite3.connect(path)
table_name = "page-{}-table-{}".format(self.page, self.order) table_name = f"page-{self.page}-table-{self.order}"
self.df.to_sql(table_name, conn, **kw) self.df.to_sql(table_name, conn, **kw)
conn.commit() conn.commit()
conn.close() conn.close()
@ -831,7 +823,7 @@ class TableList():
self._tables = tables self._tables = tables
def __repr__(self): def __repr__(self):
return "<{} n={}>".format(self.__class__.__name__, self.n) return f"<{self.__class__.__name__} n={self.n}>"
def __len__(self): def __len__(self):
return len(self._tables) return len(self._tables)
@ -841,7 +833,7 @@ class TableList():
@staticmethod @staticmethod
def _format_func(table, f): def _format_func(table, f):
return getattr(table, "to_{}".format(f)) return getattr(table, f"to_{f}")
@property @property
def n(self): def n(self):
@ -852,10 +844,7 @@ class TableList():
root = kwargs.get("root") root = kwargs.get("root")
ext = kwargs.get("ext") ext = kwargs.get("ext")
for table in self._tables: for table in self._tables:
filename = os.path.join( filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
"{}-page-{}-table-{}{}".format(root, table.page, table.order,
ext)
)
filepath = os.path.join(dirname, filename) filepath = os.path.join(dirname, filename)
to_format = self._format_func(table, f) to_format = self._format_func(table, f)
to_format(filepath) to_format(filepath)
@ -868,12 +857,7 @@ class TableList():
zipname = os.path.join(os.path.dirname(path), root) + ".zip" zipname = os.path.join(os.path.dirname(path), root) + ".zip"
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z: with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
for table in self._tables: for table in self._tables:
filename = os.path.join( filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
"{}-page-{}-table-{}{}".format(root,
table.page,
table.order,
ext)
)
filepath = os.path.join(dirname, filename) filepath = os.path.join(dirname, filename)
z.write(filepath, os.path.basename(filepath)) z.write(filepath, os.path.basename(filepath))
@ -907,9 +891,8 @@ class TableList():
# pylint: disable=abstract-class-instantiated # pylint: disable=abstract-class-instantiated
writer = pd.ExcelWriter(filepath) writer = pd.ExcelWriter(filepath)
for table in self._tables: for table in self._tables:
sheet_name = "page-{}-table-{}".format(table.page, table.order) sheet_name = f"page-{table.page}-table-{table.order}"
table.df.to_excel(writer, sheet_name=sheet_name, table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
encoding="utf-8")
writer.save() writer.save()
if compress: if compress:
zipname = os.path.join(os.path.dirname(path), root) + ".zip" zipname = os.path.join(os.path.dirname(path), root) + ".zip"

View File

@ -81,6 +81,7 @@ def delete_instance(instance):
""" """
return libgs.gsapi_delete_instance(instance) return libgs.gsapi_delete_instance(instance)
if sys.platform == "win32": if sys.platform == "win32":
c_stdstream_call_t = WINFUNCTYPE(c_int, gs_main_instance, POINTER(c_char), c_int) c_stdstream_call_t = WINFUNCTYPE(c_int, gs_main_instance, POINTER(c_char), c_int)
else: else:
@ -247,7 +248,10 @@ if sys.platform == "win32":
libgs = __win32_finddll() libgs = __win32_finddll()
if not libgs: if not libgs:
import ctypes.util import ctypes.util
libgs = ctypes.util.find_library("".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))) # finds in %PATH%
libgs = ctypes.util.find_library(
"".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))
) # finds in %PATH%
if not libgs: if not libgs:
raise RuntimeError("Please make sure that Ghostscript is installed") raise RuntimeError("Please make sure that Ghostscript is installed")
libgs = windll.LoadLibrary(libgs) libgs = windll.LoadLibrary(libgs)

View File

@ -133,8 +133,7 @@ class PDFHandler():
infile = PdfFileReader(fileobj, strict=False) infile = PdfFileReader(fileobj, strict=False)
if infile.isEncrypted: if infile.isEncrypted:
infile.decrypt(self.password) infile.decrypt(self.password)
fpath = build_file_path_in_temp_dir( fpath = build_file_path_in_temp_dir(f"page-{page}.pdf")
"page-{page}.pdf".format(page=page))
froot, fext = os.path.splitext(fpath) froot, fext = os.path.splitext(fpath)
p = infile.getPage(page - 1) p = infile.getPage(page - 1)
outfile = PdfFileWriter() outfile = PdfFileWriter()
@ -211,8 +210,7 @@ class PDFHandler():
page_idx, layout_kwargs) page_idx, layout_kwargs)
if not suppress_stdout: if not suppress_stdout:
rootname = os.path.basename(parser.rootname) rootname = os.path.basename(parser.rootname)
logger.info( logger.info(f"Processing {rootname}")
"Processing {rootname}".format(rootname=rootname))
t = parser.extract_tables() t = parser.extract_tables()
tables.extend(t) tables.extend(t)
return TableList(sorted(tables)) return TableList(sorted(tables))

View File

@ -1,7 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import cv2 import cv2
import numpy as np import numpy as np

View File

@ -1,6 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import os import os
from .base import BaseParser from .base import BaseParser

View File

@ -1,7 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import warnings import warnings
from .base import TextBaseParser from .base import TextBaseParser
@ -167,8 +165,7 @@ class Stream(TextBaseParser):
ncols = max(set(elements), key=elements.count) ncols = max(set(elements), key=elements.count)
else: else:
warnings.warn( warnings.warn(
"No tables found in table area {bbox}".format( f"No tables found in table area {bbox}"
bbox=bbox)
) )
cols = [ cols = [
(t.x0, t.x1) (t.x0, t.x1)

View File

@ -191,14 +191,11 @@ class PlotMethods():
if table.flavor == "lattice" and kind in ["textedge"]: if table.flavor == "lattice" and kind in ["textedge"]:
raise NotImplementedError( raise NotImplementedError(
"Lattice flavor does not support kind='{}'".format(kind) f"Lattice flavor does not support kind='{kind}'"
) )
if table.flavor != "lattice" and kind in ["line"]: if table.flavor != "lattice" and kind in ["line"]:
raise NotImplementedError( raise NotImplementedError(
"{flavor} flavor does not support kind='{kind}'".format( f"{table.flavor} flavor does not support kind='{kind}'"
flavor=table.flavor,
kind=kind
)
) )
plot_method = getattr(self, kind) plot_method = getattr(self, kind)

View File

@ -1,10 +1,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import re
import os import os
import atexit import atexit
import sys import sys
import re
import random import random
import shutil import shutil
import string import string
@ -34,18 +33,9 @@ from pdfminer.layout import (
from .ext.ghostscript import Ghostscript from .ext.ghostscript import Ghostscript
# pylint: disable=import-error from urllib.request import Request, urlopen
# PyLint will evaluate both branches, and will necessarily complain about one
# of them.
PY3 = sys.version_info[0] >= 3
if PY3:
from urllib.request import urlopen
from urllib.parse import urlparse as parse_url from urllib.parse import urlparse as parse_url
from urllib.parse import uses_relative, uses_netloc, uses_params from urllib.parse import uses_relative, uses_netloc, uses_params
else:
from urllib2 import urlopen
from urlparse import urlparse as parse_url
from urlparse import uses_relative, uses_netloc, uses_params
_VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
@ -95,14 +85,12 @@ def download_url(url):
Temporary filepath. Temporary filepath.
""" """
filename = "{}.pdf".format(random_string(6)) filename = f"{random_string(6)}.pdf"
with tempfile.NamedTemporaryFile("wb", delete=False) as f: with tempfile.NamedTemporaryFile("wb", delete=False) as f:
req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) headers = {"User-Agent": "Mozilla/5.0"}
obj = urlopen(req) request = Request(url, None, headers)
if PY3: obj = urlopen(request)
content_type = obj.info().get_content_type() content_type = obj.info().get_content_type()
else:
content_type = obj.info().getheader("Content-Type")
if content_type != "application/pdf": if content_type != "application/pdf":
raise NotImplementedError("File format not supported") raise NotImplementedError("File format not supported")
f.write(obj.read()) f.write(obj.read())
@ -110,6 +98,7 @@ def download_url(url):
shutil.move(f.name, filepath) shutil.move(f.name, filepath)
return filepath return filepath
common_kwargs = [ common_kwargs = [
"flag_size", "flag_size",
"margins", "margins",
@ -150,8 +139,7 @@ def validate_input(kwargs, flavor="lattice"):
isec = set(kwargs.keys()).difference(set(parser_kwargs)) isec = set(kwargs.keys()).difference(set(parser_kwargs))
if isec: if isec:
raise ValueError( raise ValueError(
"{} cannot be used with flavor='{}'".format( f"{",".join(sorted(isec))} cannot be used with flavor='{flavor}'"
",".join(sorted(isec)), flavor
) )
) )
@ -763,7 +751,7 @@ def text_strip(text, strip=""):
return text return text
stripped = re.sub( stripped = re.sub(
r"[{}]".format("".join(map(re.escape, strip))), "", text, re.UNICODE fr"[{''.join(map(re.escape, strip))}]", "", text, re.UNICODE
) )
return stripped return stripped
@ -998,9 +986,7 @@ def get_table_index(
text_range = (t.x0, t.x1) text_range = (t.x0, t.x1)
col_range = (table.cols[0][0], table.cols[-1][1]) col_range = (table.cols[0][0], table.cols[-1][1])
warnings.warn( warnings.warn(
"{} {} does not lie in column range {}".format( f"{text} {text_range} does not lie in column range {col_range}"
text, text_range, col_range
)
) )
r_idx = r r_idx = r
c_idx = lt_col_overlap.index(max(lt_col_overlap)) c_idx = lt_col_overlap.index(max(lt_col_overlap))

View File

@ -1,3 +1,4 @@
chardet>=3.0.4
click>=6.7 click>=6.7
matplotlib>=2.2.3 matplotlib>=2.2.3
numpy>=1.13.3 numpy>=1.13.3
@ -6,3 +7,4 @@ openpyxl>=2.5.8
pandas>=0.23.4 pandas>=0.23.4
pdfminer.six>=20200402 pdfminer.six>=20200402
PyPDF2>=1.26.0 PyPDF2>=1.26.0
Sphinx>=1.7.9

View File

@ -71,9 +71,9 @@ def setup_package():
# Trove classifiers # Trove classifiers
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa
'License :: OSI Approved :: MIT License', 'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7' 'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'
]) ])
try: try:

View File

@ -1,2 +1,3 @@
import matplotlib import matplotlib
matplotlib.use('agg')
matplotlib.use("agg")

View File

@ -1,7 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals
data_stream = [ data_stream = [
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"], ["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],

View File

@ -140,10 +140,11 @@ def test_cli_password():
def test_cli_output_format(): def test_cli_output_format():
with TemporaryDirectory() as tempdir: with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, "health.pdf") infile = os.path.join(testdir, "health.pdf")
outfile = os.path.join(tempdir, "health.{}")
runner = CliRunner() runner = CliRunner()
# json # json
outfile = os.path.join(tempdir, "health.json")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "json", "--output", outfile.format("json"), "stream", ["--format", "json", "--output", outfile.format("json"), "stream",
@ -152,6 +153,7 @@ def test_cli_output_format():
assert result.exit_code == 0 assert result.exit_code == 0
# excel # excel
outfile = os.path.join(tempdir, "health.xlsx")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "excel", "--output", outfile.format("xlsx"), "stream", ["--format", "excel", "--output", outfile.format("xlsx"), "stream",
@ -160,6 +162,7 @@ def test_cli_output_format():
assert result.exit_code == 0 assert result.exit_code == 0
# html # html
outfile = os.path.join(tempdir, "health.html")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "html", "--output", outfile.format("html"), "stream", ["--format", "html", "--output", outfile.format("html"), "stream",
@ -168,6 +171,7 @@ def test_cli_output_format():
assert result.exit_code == 0 assert result.exit_code == 0
# zip # zip
outfile = os.path.join(tempdir, "health.csv")
result = runner.invoke( result = runner.invoke(
cli, cli,
[ [
@ -175,7 +179,7 @@ def test_cli_output_format():
"--format", "--format",
"csv", "csv",
"--output", "--output",
outfile.format("csv"), outfile,
"stream", "stream",
infile, infile,
], ],

View File

@ -10,7 +10,7 @@ import camelot
testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.dirname(os.path.abspath(__file__))
testdir = os.path.join(testdir, "files") testdir = os.path.join(testdir, "files")
filename = os.path.join(testdir, 'foo.pdf') filename = os.path.join(testdir, "foo.pdf")
def test_unknown_flavor(): def test_unknown_flavor():
@ -27,15 +27,14 @@ def test_input_kwargs():
def test_unsupported_format(): def test_unsupported_format():
message = 'File format not supported' message = "File format not supported"
filename = os.path.join(testdir, 'foo.csv') filename = os.path.join(testdir, "foo.csv")
with pytest.raises(NotImplementedError, match=message): with pytest.raises(NotImplementedError, match=message):
camelot.read_pdf(filename) camelot.read_pdf(filename)
def test_stream_equal_length(): def test_stream_equal_length():
message = ("Length of table_areas and columns" message = "Length of table_areas and columns" " should be equal"
" should be equal")
with pytest.raises(ValueError, match=message): with pytest.raises(ValueError, match=message):
camelot.read_pdf( camelot.read_pdf(
filename, filename,
@ -46,9 +45,9 @@ def test_stream_equal_length():
def test_image_warning(): def test_image_warning():
filename = os.path.join(testdir, 'image.pdf') filename = os.path.join(testdir, "image.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('error') warnings.simplefilter("error")
with pytest.raises(UserWarning) as e: with pytest.raises(UserWarning) as e:
camelot.read_pdf(filename) camelot.read_pdf(filename)
assert str(e.value) == 'page-1 is image-based, camelot only works ' \ assert str(e.value) == 'page-1 is image-based, camelot only works ' \
@ -56,47 +55,47 @@ def test_image_warning():
def test_no_tables_found(): def test_no_tables_found():
filename = os.path.join(testdir, 'blank.pdf') filename = os.path.join(testdir, "blank.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('error') warnings.simplefilter("error")
with pytest.raises(UserWarning) as e: with pytest.raises(UserWarning) as e:
camelot.read_pdf(filename) camelot.read_pdf(filename)
assert str(e.value) == 'No tables found on page-1' assert str(e.value) == 'No tables found on page-1'
def test_no_tables_found_logs_suppressed(): def test_no_tables_found_logs_suppressed():
filename = os.path.join(testdir, 'foo.pdf') filename = os.path.join(testdir, "foo.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
# the test should fail if any warning is thrown # the test should fail if any warning is thrown
warnings.simplefilter('error') warnings.simplefilter("error")
try: try:
camelot.read_pdf(filename, suppress_stdout=True) camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e: except Warning as e:
warning_text = str(e) warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text)) pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_tables_found_warnings_suppressed(): def test_no_tables_found_warnings_suppressed():
filename = os.path.join(testdir, 'blank.pdf') filename = os.path.join(testdir, "blank.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
# the test should fail if any warning is thrown # the test should fail if any warning is thrown
warnings.simplefilter('error') warnings.simplefilter("error")
try: try:
camelot.read_pdf(filename, suppress_stdout=True) camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e: except Warning as e:
warning_text = str(e) warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text)) pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_password(): def test_no_password():
filename = os.path.join(testdir, 'health_protected.pdf') filename = os.path.join(testdir, "health_protected.pdf")
message = 'file has not been decrypted' message = "file has not been decrypted"
with pytest.raises(Exception, match=message): with pytest.raises(Exception, match=message):
camelot.read_pdf(filename) camelot.read_pdf(filename)
def test_bad_password(): def test_bad_password():
filename = os.path.join(testdir, 'health_protected.pdf') filename = os.path.join(testdir, "health_protected.pdf")
message = 'file has not been decrypted' message = "file has not been decrypted"
with pytest.raises(Exception, match=message): with pytest.raises(Exception, match=message):
camelot.read_pdf(filename, password='wrongpass') camelot.read_pdf(filename, password='wrongpass')