Merge pull request #146 from camelot-dev/add-python38-travis

[MRG] Fix test data and drop python2 support
pull/166/head
Vinayak Mehta 2020-05-24 18:31:27 +05:30 committed by GitHub
commit 420d5aa624
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 124 additions and 205 deletions

View File

@ -8,14 +8,6 @@ install:
- make install - make install
jobs: jobs:
include: include:
- stage: test
script:
- make test
python: '2.7'
- stage: test
script:
- make test
python: '3.5'
- stage: test - stage: test
script: script:
- make test - make test
@ -25,8 +17,13 @@ jobs:
- make test - make test
python: '3.7' python: '3.7'
dist: xenial dist: xenial
- stage: test
script:
- make test
python: '3.8'
dist: xenial
- stage: coverage - stage: coverage
python: '3.6' python: '3.8'
script: script:
- make test - make test
- codecov --verbose - codecov --verbose

View File

@ -1,7 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import
__all__ = ("main",) __all__ = ("main",)

View File

@ -8,9 +8,9 @@ REVISION = None
def generate_version(version, prerelease=None, revision=None): def generate_version(version, prerelease=None, revision=None):
version_parts = [".".join(map(str, version))] version_parts = [".".join(map(str, version))]
if prerelease is not None: if prerelease is not None:
version_parts.append("-{}".format(prerelease)) version_parts.append(f"-{prerelease}")
if revision is not None: if revision is not None:
version_parts.append(".{}".format(revision)) version_parts.append(f".{revision}")
return "".join(version_parts) return "".join(version_parts)

View File

@ -204,7 +204,7 @@ def lattice(c, *args, **kwargs):
tables = read_pdf( tables = read_pdf(
filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)
@ -295,7 +295,7 @@ def stream(c, *args, **kwargs):
tables = read_pdf( tables = read_pdf(
filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)

View File

@ -52,13 +52,10 @@ class TextEdge(object):
self.is_valid = False self.is_valid = False
def __repr__(self): def __repr__(self):
return "<TextEdge x={} y0={} y1={} align={} valid={}>".format( x = round(self.x, 2)
round(self.x, 2), y0 = round(self.y0, 2)
round(self.y0, 2), y1 = round(self.y1, 2)
round(self.y1, 2), return f"<TextEdge x={x} y0={y0} y1={y1} align={self.align} valid={self.is_valid}>"
self.align,
self.is_valid,
)
def update_coords(self, x, y0, edge_tol=50): def update_coords(self, x, y0, edge_tol=50):
"""Updates the text edge's x and bottom y coordinates and sets """Updates the text edge's x and bottom y coordinates and sets
@ -291,9 +288,11 @@ class Cell(object):
self._text = "" self._text = ""
def __repr__(self): def __repr__(self):
return "<Cell x1={} y1={} x2={} y2={}>".format( x1 = round(self.x1, 2)
round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2) y1 = round(self.y1, 2)
) x2 = round(self.x2, 2)
y2 = round(self.y2, 2)
return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
@property @property
def text(self): def text(self):
@ -351,7 +350,7 @@ class Table(object):
self.page = None self.page = None
def __repr__(self): def __repr__(self):
return "<{} shape={}>".format(self.__class__.__name__, self.shape) return f"<{self.__class__.__name__} shape={self.shape}>"
def __lt__(self, other): def __lt__(self, other):
if self.page == other.page: if self.page == other.page:
@ -612,7 +611,7 @@ class Table(object):
""" """
kw = { kw = {
"sheet_name": "page-{}-table-{}".format(self.page, self.order), "sheet_name": f"page-{self.page}-table-{self.order}",
"encoding": "utf-8", "encoding": "utf-8",
} }
kw.update(kwargs) kw.update(kwargs)
@ -649,7 +648,7 @@ class Table(object):
kw = {"if_exists": "replace", "index": False} kw = {"if_exists": "replace", "index": False}
kw.update(kwargs) kw.update(kwargs)
conn = sqlite3.connect(path) conn = sqlite3.connect(path)
table_name = "page-{}-table-{}".format(self.page, self.order) table_name = f"page-{self.page}-table-{self.order}"
self.df.to_sql(table_name, conn, **kw) self.df.to_sql(table_name, conn, **kw)
conn.commit() conn.commit()
conn.close() conn.close()
@ -670,7 +669,7 @@ class TableList(object):
self._tables = tables self._tables = tables
def __repr__(self): def __repr__(self):
return "<{} n={}>".format(self.__class__.__name__, self.n) return f"<{self.__class__.__name__} n={self.n}>"
def __len__(self): def __len__(self):
return len(self._tables) return len(self._tables)
@ -680,7 +679,7 @@ class TableList(object):
@staticmethod @staticmethod
def _format_func(table, f): def _format_func(table, f):
return getattr(table, "to_{}".format(f)) return getattr(table, f"to_{f}")
@property @property
def n(self): def n(self):
@ -691,9 +690,7 @@ class TableList(object):
root = kwargs.get("root") root = kwargs.get("root")
ext = kwargs.get("ext") ext = kwargs.get("ext")
for table in self._tables: for table in self._tables:
filename = os.path.join( filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
)
filepath = os.path.join(dirname, filename) filepath = os.path.join(dirname, filename)
to_format = self._format_func(table, f) to_format = self._format_func(table, f)
to_format(filepath) to_format(filepath)
@ -706,9 +703,7 @@ class TableList(object):
zipname = os.path.join(os.path.dirname(path), root) + ".zip" zipname = os.path.join(os.path.dirname(path), root) + ".zip"
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z: with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
for table in self._tables: for table in self._tables:
filename = os.path.join( filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
)
filepath = os.path.join(dirname, filename) filepath = os.path.join(dirname, filename)
z.write(filepath, os.path.basename(filepath)) z.write(filepath, os.path.basename(filepath))
@ -741,7 +736,7 @@ class TableList(object):
filepath = os.path.join(dirname, basename) filepath = os.path.join(dirname, basename)
writer = pd.ExcelWriter(filepath) writer = pd.ExcelWriter(filepath)
for table in self._tables: for table in self._tables:
sheet_name = "page-{}-table-{}".format(table.page, table.order) sheet_name = f"page-{table.page}-table-{table.order}"
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8") table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
writer.save() writer.save()
if compress: if compress:

View File

@ -81,6 +81,7 @@ def delete_instance(instance):
""" """
return libgs.gsapi_delete_instance(instance) return libgs.gsapi_delete_instance(instance)
if sys.platform == "win32": if sys.platform == "win32":
c_stdstream_call_t = WINFUNCTYPE(c_int, gs_main_instance, POINTER(c_char), c_int) c_stdstream_call_t = WINFUNCTYPE(c_int, gs_main_instance, POINTER(c_char), c_int)
else: else:
@ -247,7 +248,10 @@ if sys.platform == "win32":
libgs = __win32_finddll() libgs = __win32_finddll()
if not libgs: if not libgs:
import ctypes.util import ctypes.util
libgs = ctypes.util.find_library("".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))) # finds in %PATH%
libgs = ctypes.util.find_library(
"".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))
) # finds in %PATH%
if not libgs: if not libgs:
raise RuntimeError("Please make sure that Ghostscript is installed") raise RuntimeError("Please make sure that Ghostscript is installed")
libgs = windll.LoadLibrary(libgs) libgs = windll.LoadLibrary(libgs)

View File

@ -106,7 +106,7 @@ class PDFHandler(object):
infile = PdfFileReader(fileobj, strict=False) infile = PdfFileReader(fileobj, strict=False)
if infile.isEncrypted: if infile.isEncrypted:
infile.decrypt(self.password) infile.decrypt(self.password)
fpath = os.path.join(temp, "page-{0}.pdf".format(page)) fpath = os.path.join(temp, f"page-{page}.pdf")
froot, fext = os.path.splitext(fpath) froot, fext = os.path.splitext(fpath)
p = infile.getPage(page - 1) p = infile.getPage(page - 1)
outfile = PdfFileWriter() outfile = PdfFileWriter()
@ -164,7 +164,7 @@ class PDFHandler(object):
for p in self.pages: for p in self.pages:
self._save_page(self.filepath, p, tempdir) self._save_page(self.filepath, p, tempdir)
pages = [ pages = [
os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages os.path.join(tempdir, f"page-{p}.pdf") for p in self.pages
] ]
parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs) parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
for p in pages: for p in pages:

View File

@ -1,7 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import cv2 import cv2
import numpy as np import numpy as np

View File

@ -1,6 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import os import os
import sys import sys
import copy import copy

View File

@ -1,6 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import os import os
import logging import logging
import warnings import warnings
@ -358,7 +357,7 @@ class Stream(BaseParser):
ncols = max(set(elements), key=elements.count) ncols = max(set(elements), key=elements.count)
else: else:
warnings.warn( warnings.warn(
"No tables found in table area {}".format(table_idx + 1) f"No tables found in table area {table_idx + 1}"
) )
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r] cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol) cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
@ -433,19 +432,19 @@ class Stream(BaseParser):
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}): def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
self._generate_layout(filename, layout_kwargs) self._generate_layout(filename, layout_kwargs)
base_filename = os.path.basename(self.rootname)
if not suppress_stdout: if not suppress_stdout:
logger.info("Processing {}".format(os.path.basename(self.rootname))) logger.info(f"Processing {base_filename}")
if not self.horizontal_text: if not self.horizontal_text:
if self.images: if self.images:
warnings.warn( warnings.warn(
"{} is image-based, camelot only works on" f"{base_filename} is image-based, camelot only works on"
" text-based pages.".format(os.path.basename(self.rootname)) " text-based pages."
) )
else: else:
warnings.warn( warnings.warn(f"No tables found on {base_filename}")
"No tables found on {}".format(os.path.basename(self.rootname))
)
return [] return []
self._generate_table_bbox() self._generate_table_bbox()

View File

@ -35,11 +35,11 @@ class PlotMethods(object):
if table.flavor == "lattice" and kind in ["textedge"]: if table.flavor == "lattice" and kind in ["textedge"]:
raise NotImplementedError( raise NotImplementedError(
"Lattice flavor does not support kind='{}'".format(kind) f"Lattice flavor does not support kind='{kind}'"
) )
elif table.flavor == "stream" and kind in ["joint", "line"]: elif table.flavor == "stream" and kind in ["joint", "line"]:
raise NotImplementedError( raise NotImplementedError(
"Stream flavor does not support kind='{}'".format(kind) f"Stream flavor does not support kind='{kind}'"
) )
plot_method = getattr(self, kind) plot_method = getattr(self, kind)

View File

@ -1,9 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import division
import re
import os import os
import sys import re
import random import random
import shutil import shutil
import string import string
@ -29,16 +27,9 @@ from pdfminer.layout import (
LTImage, LTImage,
) )
from urllib.request import Request, urlopen
PY3 = sys.version_info[0] >= 3
if PY3:
from urllib.request import urlopen
from urllib.parse import urlparse as parse_url from urllib.parse import urlparse as parse_url
from urllib.parse import uses_relative, uses_netloc, uses_params from urllib.parse import uses_relative, uses_netloc, uses_params
else:
from urllib2 import urlopen
from urlparse import urlparse as parse_url
from urlparse import uses_relative, uses_netloc, uses_params
_VALID_URLS = set(uses_relative + uses_netloc + uses_params) _VALID_URLS = set(uses_relative + uses_netloc + uses_params)
@ -88,13 +79,12 @@ def download_url(url):
Temporary filepath. Temporary filepath.
""" """
filename = "{}.pdf".format(random_string(6)) filename = f"{random_string(6)}.pdf"
with tempfile.NamedTemporaryFile("wb", delete=False) as f: with tempfile.NamedTemporaryFile("wb", delete=False) as f:
obj = urlopen(url) headers = {"User-Agent": "Mozilla/5.0"}
if PY3: request = Request(url, None, headers)
obj = urlopen(request)
content_type = obj.info().get_content_type() content_type = obj.info().get_content_type()
else:
content_type = obj.info().getheader("Content-Type")
if content_type != "application/pdf": if content_type != "application/pdf":
raise NotImplementedError("File format not supported") raise NotImplementedError("File format not supported")
f.write(obj.read()) f.write(obj.read())
@ -123,9 +113,7 @@ def validate_input(kwargs, flavor="lattice"):
isec = set(parser_kwargs).intersection(set(input_kwargs.keys())) isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
if isec: if isec:
raise ValueError( raise ValueError(
"{} cannot be used with flavor='{}'".format( f"{','.join(sorted(isec))} cannot be used with flavor='{flavor}'"
",".join(sorted(isec)), flavor
)
) )
if flavor == "lattice": if flavor == "lattice":
@ -423,7 +411,7 @@ def text_strip(text, strip=""):
return text return text
stripped = re.sub( stripped = re.sub(
r"[{}]".format("".join(map(re.escape, strip))), "", text, re.UNICODE fr"[{''.join(map(re.escape, strip))}]", "", text, re.UNICODE
) )
return stripped return stripped
@ -660,9 +648,7 @@ def get_table_index(
text_range = (t.x0, t.x1) text_range = (t.x0, t.x1)
col_range = (table.cols[0][0], table.cols[-1][1]) col_range = (table.cols[0][0], table.cols[-1][1])
warnings.warn( warnings.warn(
"{} {} does not lie in column range {}".format( f"{text} {text_range} does not lie in column range {col_range}"
text, text_range, col_range
)
) )
r_idx = r r_idx = r
c_idx = lt_col_overlap.index(max(lt_col_overlap)) c_idx = lt_col_overlap.index(max(lt_col_overlap))

View File

@ -71,10 +71,10 @@ def setup_package():
# Trove classifiers # Trove classifiers
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
'License :: OSI Approved :: MIT License', 'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7' 'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8'
]) ])
try: try:

View File

@ -1,2 +1,3 @@
import matplotlib import matplotlib
matplotlib.use('agg')
matplotlib.use("agg")

View File

@ -1,19 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals
data_stream = [ data_stream = [
[
"",
"Table: 5 Public Health Outlay 2012-13 (Budget Estimates) (Rs. in 000)",
"",
"",
"",
"",
"",
"",
],
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"], ["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
["", "", "", "", "", "Revenue &", "", ""], ["", "", "", "", "", "Revenue &", "", ""],
["", "Medical &", "Family", "Medical &", "Family", "", "", ""], ["", "Medical &", "Family", "Medical &", "Family", "", "", ""],
@ -829,18 +817,6 @@ data_stream_table_rotated = [
] ]
data_stream_two_tables_1 = [ data_stream_two_tables_1 = [
[
"[In thousands (11,062.6 represents 11,062,600) For year ending December 31. Based on Uniform Crime Reporting (UCR)",
"",
"",
"",
"",
"",
"",
"",
"",
"",
],
[ [
"Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated", "Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated",
"", "",
@ -1300,29 +1276,10 @@ data_stream_two_tables_1 = [
"", "",
"", "",
], ],
[
"",
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
"",
"",
"",
"",
"",
"",
"",
"",
],
] ]
data_stream_two_tables_2 = [ data_stream_two_tables_2 = [
[
"",
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
"",
"",
"",
"",
],
["Table 325. Arrests by Race: 2009", "", "", "", "", ""], ["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
[ [
"[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies", "[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies",
@ -1600,16 +1557,9 @@ data_stream_two_tables_2 = [
"3,950", "3,950",
], ],
["1 Except forcible rape and prostitution.", "", "", "", "", ""], ["1 Except forcible rape and prostitution.", "", "", "", "", ""],
[
"",
"Source: U.S. Department of Justice, Federal Bureau of Investigation, “Crime in the United States, Arrests,” September 2010,",
"",
"",
"",
"",
],
] ]
data_stream_table_areas = [ data_stream_table_areas = [
["", "One Withholding"], ["", "One Withholding"],
["Payroll Period", "Allowance"], ["Payroll Period", "Allowance"],
@ -1776,18 +1726,7 @@ data_stream_columns = [
] ]
data_stream_split_text = [ data_stream_split_text = [
[ ["FEB", "RUAR", "Y 2014 M27 (BUS)", "", "", "", "", "", "", ""],
"FEB",
"RUAR",
"Y 2014 M27 (BUS)",
"",
"ALPHABETIC LISTING BY T",
"YPE",
"",
"",
"",
"ABLPDM27",
],
["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"], ["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""], ["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""], ["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
@ -2121,6 +2060,7 @@ data_stream_split_text = [
], ],
] ]
data_stream_flag_size = [ data_stream_flag_size = [
[ [
"States", "States",

Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.7 KiB

After

Width:  |  Height:  |  Size: 6.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.8 KiB

After

Width:  |  Height:  |  Size: 8.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 19 KiB

View File

@ -114,31 +114,35 @@ def test_cli_password():
def test_cli_output_format(): def test_cli_output_format():
with TemporaryDirectory() as tempdir: with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, "health.pdf") infile = os.path.join(testdir, "health.pdf")
outfile = os.path.join(tempdir, "health.{}")
runner = CliRunner() runner = CliRunner()
# json # json
outfile = os.path.join(tempdir, "health.json")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "json", "--output", outfile.format("json"), "stream", infile], ["--format", "json", "--output", outfile, "stream", infile],
) )
assert result.exit_code == 0 assert result.exit_code == 0
# excel # excel
outfile = os.path.join(tempdir, "health.xlsx")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile], ["--format", "excel", "--output", outfile, "stream", infile],
) )
assert result.exit_code == 0 assert result.exit_code == 0
# html # html
outfile = os.path.join(tempdir, "health.html")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "html", "--output", outfile.format("html"), "stream", infile], ["--format", "html", "--output", outfile, "stream", infile],
) )
assert result.exit_code == 0 assert result.exit_code == 0
# zip # zip
outfile = os.path.join(tempdir, "health.csv")
result = runner.invoke( result = runner.invoke(
cli, cli,
[ [
@ -146,7 +150,7 @@ def test_cli_output_format():
"--format", "--format",
"csv", "csv",
"--output", "--output",
outfile.format("csv"), outfile,
"stream", "stream",
infile, infile,
], ],

View File

@ -10,88 +10,93 @@ import camelot
testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.dirname(os.path.abspath(__file__))
testdir = os.path.join(testdir, "files") testdir = os.path.join(testdir, "files")
filename = os.path.join(testdir, 'foo.pdf') filename = os.path.join(testdir, "foo.pdf")
def test_unknown_flavor(): def test_unknown_flavor():
message = ("Unknown flavor specified." message = "Unknown flavor specified." " Use either 'lattice' or 'stream'"
" Use either 'lattice' or 'stream'")
with pytest.raises(NotImplementedError, match=message): with pytest.raises(NotImplementedError, match=message):
tables = camelot.read_pdf(filename, flavor='chocolate') tables = camelot.read_pdf(filename, flavor="chocolate")
def test_input_kwargs(): def test_input_kwargs():
message = "columns cannot be used with flavor='lattice'" message = "columns cannot be used with flavor='lattice'"
with pytest.raises(ValueError, match=message): with pytest.raises(ValueError, match=message):
tables = camelot.read_pdf(filename, columns=['10,20,30,40']) tables = camelot.read_pdf(filename, columns=["10,20,30,40"])
def test_unsupported_format(): def test_unsupported_format():
message = 'File format not supported' message = "File format not supported"
filename = os.path.join(testdir, 'foo.csv') filename = os.path.join(testdir, "foo.csv")
with pytest.raises(NotImplementedError, match=message): with pytest.raises(NotImplementedError, match=message):
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
def test_stream_equal_length(): def test_stream_equal_length():
message = ("Length of table_areas and columns" message = "Length of table_areas and columns" " should be equal"
" should be equal")
with pytest.raises(ValueError, match=message): with pytest.raises(ValueError, match=message):
tables = camelot.read_pdf(filename, flavor='stream', tables = camelot.read_pdf(
table_areas=['10,20,30,40'], columns=['10,20,30,40', '10,20,30,40']) filename,
flavor="stream",
table_areas=["10,20,30,40"],
columns=["10,20,30,40", "10,20,30,40"],
)
def test_image_warning(): def test_image_warning():
filename = os.path.join(testdir, 'image.pdf') filename = os.path.join(testdir, "image.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('error') warnings.simplefilter("error")
with pytest.raises(UserWarning) as e: with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
assert str(e.value) == 'page-1 is image-based, camelot only works on text-based pages.' assert (
str(e.value)
== "page-1 is image-based, camelot only works on text-based pages."
)
def test_no_tables_found(): def test_no_tables_found():
filename = os.path.join(testdir, 'blank.pdf') filename = os.path.join(testdir, "blank.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter('error') warnings.simplefilter("error")
with pytest.raises(UserWarning) as e: with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
assert str(e.value) == 'No tables found on page-1' assert str(e.value) == "No tables found on page-1"
def test_no_tables_found_logs_suppressed(): def test_no_tables_found_logs_suppressed():
filename = os.path.join(testdir, 'foo.pdf') filename = os.path.join(testdir, "foo.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
# the test should fail if any warning is thrown # the test should fail if any warning is thrown
warnings.simplefilter('error') warnings.simplefilter("error")
try: try:
tables = camelot.read_pdf(filename, suppress_stdout=True) tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e: except Warning as e:
warning_text = str(e) warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text)) pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_tables_found_warnings_suppressed(): def test_no_tables_found_warnings_suppressed():
filename = os.path.join(testdir, 'blank.pdf') filename = os.path.join(testdir, "blank.pdf")
with warnings.catch_warnings(): with warnings.catch_warnings():
# the test should fail if any warning is thrown # the test should fail if any warning is thrown
warnings.simplefilter('error') warnings.simplefilter("error")
try: try:
tables = camelot.read_pdf(filename, suppress_stdout=True) tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e: except Warning as e:
warning_text = str(e) warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text)) pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_password(): def test_no_password():
filename = os.path.join(testdir, 'health_protected.pdf') filename = os.path.join(testdir, "health_protected.pdf")
message = 'file has not been decrypted' message = "file has not been decrypted"
with pytest.raises(Exception, match=message): with pytest.raises(Exception, match=message):
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
def test_bad_password(): def test_bad_password():
filename = os.path.join(testdir, 'health_protected.pdf') filename = os.path.join(testdir, "health_protected.pdf")
message = 'file has not been decrypted' message = "file has not been decrypted"
with pytest.raises(Exception, match=message): with pytest.raises(Exception, match=message):
tables = camelot.read_pdf(filename, password='wrongpass') tables = camelot.read_pdf(filename, password="wrongpass")

View File

@ -11,57 +11,50 @@ testdir = os.path.dirname(os.path.abspath(__file__))
testdir = os.path.join(testdir, "files") testdir = os.path.join(testdir, "files")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_text_plot(): def test_text_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='text') return camelot.plot(tables[0], kind="text")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_grid_plot(): def test_grid_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='grid') return camelot.plot(tables[0], kind="grid")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_lattice_contour_plot(): def test_lattice_contour_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='contour') return camelot.plot(tables[0], kind="contour")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_stream_contour_plot(): def test_stream_contour_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor='stream') tables = camelot.read_pdf(filename, flavor="stream")
return camelot.plot(tables[0], kind='contour') return camelot.plot(tables[0], kind="contour")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_line_plot(): def test_line_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='line') return camelot.plot(tables[0], kind="line")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_joint_plot(): def test_joint_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='joint') return camelot.plot(tables[0], kind="joint")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
baseline_dir="files/baseline_plots", remove_text=True)
def test_textedge_plot(): def test_textedge_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor='stream') tables = camelot.read_pdf(filename, flavor="stream")
return camelot.plot(tables[0], kind='textedge') return camelot.plot(tables[0], kind="textedge")