Add f-strings and remove python3.5 test job
parent
afa1ba7c1f
commit
52b2a595b4
|
|
@ -8,10 +8,6 @@ install:
|
|||
- make install
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
python: '3.5'
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@ REVISION = None
|
|||
def generate_version(version, prerelease=None, revision=None):
|
||||
version_parts = [".".join(map(str, version))]
|
||||
if prerelease is not None:
|
||||
version_parts.append("-{}".format(prerelease))
|
||||
version_parts.append(f"-{prerelease}")
|
||||
if revision is not None:
|
||||
version_parts.append(".{}".format(revision))
|
||||
version_parts.append(f".{revision}")
|
||||
return "".join(version_parts)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ def lattice(c, *args, **kwargs):
|
|||
tables = read_pdf(
|
||||
filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs
|
||||
)
|
||||
click.echo("Found {} tables".format(tables.n))
|
||||
click.echo(f"Found {tables.n} tables")
|
||||
if plot_type is not None:
|
||||
for table in tables:
|
||||
plot(table, kind=plot_type)
|
||||
|
|
@ -295,7 +295,7 @@ def stream(c, *args, **kwargs):
|
|||
tables = read_pdf(
|
||||
filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
|
||||
)
|
||||
click.echo("Found {} tables".format(tables.n))
|
||||
click.echo(f"Found {tables.n} tables")
|
||||
if plot_type is not None:
|
||||
for table in tables:
|
||||
plot(table, kind=plot_type)
|
||||
|
|
|
|||
|
|
@ -52,13 +52,10 @@ class TextEdge(object):
|
|||
self.is_valid = False
|
||||
|
||||
def __repr__(self):
|
||||
return "<TextEdge x={} y0={} y1={} align={} valid={}>".format(
|
||||
round(self.x, 2),
|
||||
round(self.y0, 2),
|
||||
round(self.y1, 2),
|
||||
self.align,
|
||||
self.is_valid,
|
||||
)
|
||||
x = round(self.x, 2)
|
||||
y0 = round(self.y0, 2)
|
||||
y1 = round(self.y1, 2)
|
||||
return f"<TextEdge x={x} y0={y0} y1={y1} align={self.align} valid={self.is_valid}>"
|
||||
|
||||
def update_coords(self, x, y0, edge_tol=50):
|
||||
"""Updates the text edge's x and bottom y coordinates and sets
|
||||
|
|
@ -291,9 +288,11 @@ class Cell(object):
|
|||
self._text = ""
|
||||
|
||||
def __repr__(self):
|
||||
return "<Cell x1={} y1={} x2={} y2={}>".format(
|
||||
round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
|
||||
)
|
||||
x1 = round(self.x1, 2)
|
||||
y1 = round(self.y1, 2)
|
||||
x2 = round(self.x2, 2)
|
||||
y2 = round(self.y2, 2)
|
||||
return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
|
|
@ -351,7 +350,7 @@ class Table(object):
|
|||
self.page = None
|
||||
|
||||
def __repr__(self):
|
||||
return "<{} shape={}>".format(self.__class__.__name__, self.shape)
|
||||
return f"<{self.__class__.__name__} shape={self.shape}>"
|
||||
|
||||
def __lt__(self, other):
|
||||
if self.page == other.page:
|
||||
|
|
@ -612,7 +611,7 @@ class Table(object):
|
|||
|
||||
"""
|
||||
kw = {
|
||||
"sheet_name": "page-{}-table-{}".format(self.page, self.order),
|
||||
"sheet_name": f"page-{self.page}-table-{self.order}",
|
||||
"encoding": "utf-8",
|
||||
}
|
||||
kw.update(kwargs)
|
||||
|
|
@ -649,7 +648,7 @@ class Table(object):
|
|||
kw = {"if_exists": "replace", "index": False}
|
||||
kw.update(kwargs)
|
||||
conn = sqlite3.connect(path)
|
||||
table_name = "page-{}-table-{}".format(self.page, self.order)
|
||||
table_name = f"page-{self.page}-table-{self.order}"
|
||||
self.df.to_sql(table_name, conn, **kw)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
|
@ -670,7 +669,7 @@ class TableList(object):
|
|||
self._tables = tables
|
||||
|
||||
def __repr__(self):
|
||||
return "<{} n={}>".format(self.__class__.__name__, self.n)
|
||||
return f"<{self.__class__.__name__} n={self.n}>"
|
||||
|
||||
def __len__(self):
|
||||
return len(self._tables)
|
||||
|
|
@ -680,7 +679,7 @@ class TableList(object):
|
|||
|
||||
@staticmethod
|
||||
def _format_func(table, f):
|
||||
return getattr(table, "to_{}".format(f))
|
||||
return getattr(table, f"to_{f}")
|
||||
|
||||
@property
|
||||
def n(self):
|
||||
|
|
@ -691,9 +690,7 @@ class TableList(object):
|
|||
root = kwargs.get("root")
|
||||
ext = kwargs.get("ext")
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
||||
)
|
||||
filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
|
||||
filepath = os.path.join(dirname, filename)
|
||||
to_format = self._format_func(table, f)
|
||||
to_format(filepath)
|
||||
|
|
@ -706,9 +703,7 @@ class TableList(object):
|
|||
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
||||
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
||||
)
|
||||
filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
|
||||
filepath = os.path.join(dirname, filename)
|
||||
z.write(filepath, os.path.basename(filepath))
|
||||
|
||||
|
|
@ -741,7 +736,7 @@ class TableList(object):
|
|||
filepath = os.path.join(dirname, basename)
|
||||
writer = pd.ExcelWriter(filepath)
|
||||
for table in self._tables:
|
||||
sheet_name = "page-{}-table-{}".format(table.page, table.order)
|
||||
sheet_name = f"page-{table.page}-table-{table.order}"
|
||||
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
|
||||
writer.save()
|
||||
if compress:
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ class PDFHandler(object):
|
|||
infile = PdfFileReader(fileobj, strict=False)
|
||||
if infile.isEncrypted:
|
||||
infile.decrypt(self.password)
|
||||
fpath = os.path.join(temp, "page-{0}.pdf".format(page))
|
||||
fpath = os.path.join(temp, f"page-{page}.pdf")
|
||||
froot, fext = os.path.splitext(fpath)
|
||||
p = infile.getPage(page - 1)
|
||||
outfile = PdfFileWriter()
|
||||
|
|
@ -164,7 +164,7 @@ class PDFHandler(object):
|
|||
for p in self.pages:
|
||||
self._save_page(self.filepath, p, tempdir)
|
||||
pages = [
|
||||
os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages
|
||||
os.path.join(tempdir, f"page-{p}.pdf") for p in self.pages
|
||||
]
|
||||
parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
|
||||
for p in pages:
|
||||
|
|
|
|||
|
|
@ -357,7 +357,7 @@ class Stream(BaseParser):
|
|||
ncols = max(set(elements), key=elements.count)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found in table area {}".format(table_idx + 1)
|
||||
f"No tables found in table area {table_idx + 1}"
|
||||
)
|
||||
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
|
||||
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
|
||||
|
|
@ -432,19 +432,19 @@ class Stream(BaseParser):
|
|||
|
||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
||||
self._generate_layout(filename, layout_kwargs)
|
||||
base_filename = os.path.basename(self.rootname)
|
||||
|
||||
if not suppress_stdout:
|
||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
||||
logger.info(f"Processing {base_filename}")
|
||||
|
||||
if not self.horizontal_text:
|
||||
if self.images:
|
||||
warnings.warn(
|
||||
"{} is image-based, camelot only works on"
|
||||
" text-based pages.".format(os.path.basename(self.rootname))
|
||||
f"{base_filename} is image-based, camelot only works on"
|
||||
" text-based pages.")
|
||||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
||||
)
|
||||
warnings.warn(f"No tables found on {base_filename}"))
|
||||
return []
|
||||
|
||||
self._generate_table_bbox()
|
||||
|
|
|
|||
|
|
@ -35,11 +35,11 @@ class PlotMethods(object):
|
|||
|
||||
if table.flavor == "lattice" and kind in ["textedge"]:
|
||||
raise NotImplementedError(
|
||||
"Lattice flavor does not support kind='{}'".format(kind)
|
||||
f"Lattice flavor does not support kind='{kind}'"
|
||||
)
|
||||
elif table.flavor == "stream" and kind in ["joint", "line"]:
|
||||
raise NotImplementedError(
|
||||
"Stream flavor does not support kind='{}'".format(kind)
|
||||
f"Stream flavor does not support kind='{kind}'"
|
||||
)
|
||||
|
||||
plot_method = getattr(self, kind)
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ def download_url(url):
|
|||
Temporary filepath.
|
||||
|
||||
"""
|
||||
filename = "{}.pdf".format(random_string(6))
|
||||
filename = f"{random_string(6)}.pdf"
|
||||
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
request = Request(url, None, headers)
|
||||
|
|
@ -113,9 +113,7 @@ def validate_input(kwargs, flavor="lattice"):
|
|||
isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
|
||||
if isec:
|
||||
raise ValueError(
|
||||
"{} cannot be used with flavor='{}'".format(
|
||||
",".join(sorted(isec)), flavor
|
||||
)
|
||||
f"{','.join(sorted(isec))} cannot be used with flavor='{flavor}'"
|
||||
)
|
||||
|
||||
if flavor == "lattice":
|
||||
|
|
@ -413,7 +411,7 @@ def text_strip(text, strip=""):
|
|||
return text
|
||||
|
||||
stripped = re.sub(
|
||||
r"[{}]".format("".join(map(re.escape, strip))), "", text, re.UNICODE
|
||||
fr"[{''.join(map(re.escape, strip))}]", "", text, re.UNICODE
|
||||
)
|
||||
return stripped
|
||||
|
||||
|
|
@ -650,9 +648,7 @@ def get_table_index(
|
|||
text_range = (t.x0, t.x1)
|
||||
col_range = (table.cols[0][0], table.cols[-1][1])
|
||||
warnings.warn(
|
||||
"{} {} does not lie in column range {}".format(
|
||||
text, text_range, col_range
|
||||
)
|
||||
f"{text} {text_range} does not lie in column range {col_range}"
|
||||
)
|
||||
r_idx = r
|
||||
c_idx = lt_col_overlap.index(max(lt_col_overlap))
|
||||
|
|
|
|||
|
|
@ -114,31 +114,35 @@ def test_cli_password():
|
|||
def test_cli_output_format():
|
||||
with TemporaryDirectory() as tempdir:
|
||||
infile = os.path.join(testdir, "health.pdf")
|
||||
outfile = os.path.join(tempdir, "health.{}")
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
# json
|
||||
outfile = os.path.join(tempdir, "health.json")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "json", "--output", outfile.format("json"), "stream", infile],
|
||||
["--format", "json", "--output", outfile, "stream", infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# excel
|
||||
outfile = os.path.join(tempdir, "health.xlsx")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile],
|
||||
["--format", "excel", "--output", outfile, "stream", infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# html
|
||||
outfile = os.path.join(tempdir, "health.html")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "html", "--output", outfile.format("html"), "stream", infile],
|
||||
["--format", "html", "--output", outfile, "stream", infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# zip
|
||||
outfile = os.path.join(tempdir, "health.csv")
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
[
|
||||
|
|
@ -146,7 +150,7 @@ def test_cli_output_format():
|
|||
"--format",
|
||||
"csv",
|
||||
"--output",
|
||||
outfile.format("csv"),
|
||||
outfile,
|
||||
"stream",
|
||||
infile,
|
||||
],
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ def test_no_tables_found_logs_suppressed():
|
|||
tables = camelot.read_pdf(filename, suppress_stdout=True)
|
||||
except Warning as e:
|
||||
warning_text = str(e)
|
||||
pytest.fail("Unexpected warning: {}".format(warning_text))
|
||||
pytest.fail(f"Unexpected warning: {warning_text}")
|
||||
|
||||
|
||||
def test_no_tables_found_warnings_suppressed():
|
||||
|
|
@ -85,7 +85,7 @@ def test_no_tables_found_warnings_suppressed():
|
|||
tables = camelot.read_pdf(filename, suppress_stdout=True)
|
||||
except Warning as e:
|
||||
warning_text = str(e)
|
||||
pytest.fail("Unexpected warning: {}".format(warning_text))
|
||||
pytest.fail(f"Unexpected warning: {warning_text}"
|
||||
|
||||
|
||||
def test_no_password():
|
||||
|
|
|
|||
Loading…
Reference in New Issue