Add f-strings and remove python3.5 test job

pull/146/head
Vinayak Mehta 2020-05-24 18:14:43 +05:30
parent afa1ba7c1f
commit 52b2a595b4
No known key found for this signature in database
GPG Key ID: 2170CDB940114C1D
10 changed files with 47 additions and 56 deletions

View File

@ -8,10 +8,6 @@ install:
- make install - make install
jobs: jobs:
include: include:
- stage: test
script:
- make test
python: '3.5'
- stage: test - stage: test
script: script:
- make test - make test

View File

@ -8,9 +8,9 @@ REVISION = None
def generate_version(version, prerelease=None, revision=None): def generate_version(version, prerelease=None, revision=None):
version_parts = [".".join(map(str, version))] version_parts = [".".join(map(str, version))]
if prerelease is not None: if prerelease is not None:
version_parts.append("-{}".format(prerelease)) version_parts.append(f"-{prerelease}")
if revision is not None: if revision is not None:
version_parts.append(".{}".format(revision)) version_parts.append(f".{revision}")
return "".join(version_parts) return "".join(version_parts)

View File

@ -204,7 +204,7 @@ def lattice(c, *args, **kwargs):
tables = read_pdf( tables = read_pdf(
filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)
@ -295,7 +295,7 @@ def stream(c, *args, **kwargs):
tables = read_pdf( tables = read_pdf(
filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo(f"Found {tables.n} tables")
if plot_type is not None: if plot_type is not None:
for table in tables: for table in tables:
plot(table, kind=plot_type) plot(table, kind=plot_type)

View File

@ -52,13 +52,10 @@ class TextEdge(object):
self.is_valid = False self.is_valid = False
def __repr__(self): def __repr__(self):
return "<TextEdge x={} y0={} y1={} align={} valid={}>".format( x = round(self.x, 2)
round(self.x, 2), y0 = round(self.y0, 2)
round(self.y0, 2), y1 = round(self.y1, 2)
round(self.y1, 2), return f"<TextEdge x={x} y0={y0} y1={y1} align={self.align} valid={self.is_valid}>"
self.align,
self.is_valid,
)
def update_coords(self, x, y0, edge_tol=50): def update_coords(self, x, y0, edge_tol=50):
"""Updates the text edge's x and bottom y coordinates and sets """Updates the text edge's x and bottom y coordinates and sets
@ -291,9 +288,11 @@ class Cell(object):
self._text = "" self._text = ""
def __repr__(self): def __repr__(self):
return "<Cell x1={} y1={} x2={} y2={}>".format( x1 = round(self.x1, 2)
round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2) y1 = round(self.y1, 2)
) x2 = round(self.x2, 2)
y2 = round(self.y2, 2)
return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
@property @property
def text(self): def text(self):
@ -351,7 +350,7 @@ class Table(object):
self.page = None self.page = None
def __repr__(self): def __repr__(self):
return "<{} shape={}>".format(self.__class__.__name__, self.shape) return f"<{self.__class__.__name__} shape={self.shape}>"
def __lt__(self, other): def __lt__(self, other):
if self.page == other.page: if self.page == other.page:
@ -612,7 +611,7 @@ class Table(object):
""" """
kw = { kw = {
"sheet_name": "page-{}-table-{}".format(self.page, self.order), "sheet_name": f"page-{self.page}-table-{self.order}",
"encoding": "utf-8", "encoding": "utf-8",
} }
kw.update(kwargs) kw.update(kwargs)
@ -649,7 +648,7 @@ class Table(object):
kw = {"if_exists": "replace", "index": False} kw = {"if_exists": "replace", "index": False}
kw.update(kwargs) kw.update(kwargs)
conn = sqlite3.connect(path) conn = sqlite3.connect(path)
table_name = "page-{}-table-{}".format(self.page, self.order) table_name = f"page-{self.page}-table-{self.order}"
self.df.to_sql(table_name, conn, **kw) self.df.to_sql(table_name, conn, **kw)
conn.commit() conn.commit()
conn.close() conn.close()
@ -670,7 +669,7 @@ class TableList(object):
self._tables = tables self._tables = tables
def __repr__(self): def __repr__(self):
return "<{} n={}>".format(self.__class__.__name__, self.n) return f"<{self.__class__.__name__} n={self.n}>"
def __len__(self): def __len__(self):
return len(self._tables) return len(self._tables)
@ -680,7 +679,7 @@ class TableList(object):
@staticmethod @staticmethod
def _format_func(table, f): def _format_func(table, f):
return getattr(table, "to_{}".format(f)) return getattr(table, f"to_{f}")
@property @property
def n(self): def n(self):
@ -691,9 +690,7 @@ class TableList(object):
root = kwargs.get("root") root = kwargs.get("root")
ext = kwargs.get("ext") ext = kwargs.get("ext")
for table in self._tables: for table in self._tables:
filename = os.path.join( filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
)
filepath = os.path.join(dirname, filename) filepath = os.path.join(dirname, filename)
to_format = self._format_func(table, f) to_format = self._format_func(table, f)
to_format(filepath) to_format(filepath)
@ -706,9 +703,7 @@ class TableList(object):
zipname = os.path.join(os.path.dirname(path), root) + ".zip" zipname = os.path.join(os.path.dirname(path), root) + ".zip"
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z: with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
for table in self._tables: for table in self._tables:
filename = os.path.join( filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
)
filepath = os.path.join(dirname, filename) filepath = os.path.join(dirname, filename)
z.write(filepath, os.path.basename(filepath)) z.write(filepath, os.path.basename(filepath))
@ -741,7 +736,7 @@ class TableList(object):
filepath = os.path.join(dirname, basename) filepath = os.path.join(dirname, basename)
writer = pd.ExcelWriter(filepath) writer = pd.ExcelWriter(filepath)
for table in self._tables: for table in self._tables:
sheet_name = "page-{}-table-{}".format(table.page, table.order) sheet_name = f"page-{table.page}-table-{table.order}"
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8") table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
writer.save() writer.save()
if compress: if compress:

View File

@ -106,7 +106,7 @@ class PDFHandler(object):
infile = PdfFileReader(fileobj, strict=False) infile = PdfFileReader(fileobj, strict=False)
if infile.isEncrypted: if infile.isEncrypted:
infile.decrypt(self.password) infile.decrypt(self.password)
fpath = os.path.join(temp, "page-{0}.pdf".format(page)) fpath = os.path.join(temp, f"page-{page}.pdf")
froot, fext = os.path.splitext(fpath) froot, fext = os.path.splitext(fpath)
p = infile.getPage(page - 1) p = infile.getPage(page - 1)
outfile = PdfFileWriter() outfile = PdfFileWriter()
@ -164,7 +164,7 @@ class PDFHandler(object):
for p in self.pages: for p in self.pages:
self._save_page(self.filepath, p, tempdir) self._save_page(self.filepath, p, tempdir)
pages = [ pages = [
os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages os.path.join(tempdir, f"page-{p}.pdf") for p in self.pages
] ]
parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs) parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
for p in pages: for p in pages:

View File

@ -357,7 +357,7 @@ class Stream(BaseParser):
ncols = max(set(elements), key=elements.count) ncols = max(set(elements), key=elements.count)
else: else:
warnings.warn( warnings.warn(
"No tables found in table area {}".format(table_idx + 1) f"No tables found in table area {table_idx + 1}"
) )
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r] cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol) cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
@ -432,19 +432,19 @@ class Stream(BaseParser):
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}): def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
self._generate_layout(filename, layout_kwargs) self._generate_layout(filename, layout_kwargs)
base_filename = os.path.basename(self.rootname)
if not suppress_stdout: if not suppress_stdout:
logger.info("Processing {}".format(os.path.basename(self.rootname))) logger.info(f"Processing {base_filename}")
if not self.horizontal_text: if not self.horizontal_text:
if self.images: if self.images:
warnings.warn( warnings.warn(
"{} is image-based, camelot only works on" f"{base_filename} is image-based, camelot only works on"
" text-based pages.".format(os.path.basename(self.rootname)) " text-based pages.")
) )
else: else:
warnings.warn( warnings.warn(f"No tables found on {base_filename}"))
"No tables found on {}".format(os.path.basename(self.rootname))
)
return [] return []
self._generate_table_bbox() self._generate_table_bbox()

View File

@ -35,11 +35,11 @@ class PlotMethods(object):
if table.flavor == "lattice" and kind in ["textedge"]: if table.flavor == "lattice" and kind in ["textedge"]:
raise NotImplementedError( raise NotImplementedError(
"Lattice flavor does not support kind='{}'".format(kind) f"Lattice flavor does not support kind='{kind}'"
) )
elif table.flavor == "stream" and kind in ["joint", "line"]: elif table.flavor == "stream" and kind in ["joint", "line"]:
raise NotImplementedError( raise NotImplementedError(
"Stream flavor does not support kind='{}'".format(kind) f"Stream flavor does not support kind='{kind}'"
) )
plot_method = getattr(self, kind) plot_method = getattr(self, kind)

View File

@ -79,7 +79,7 @@ def download_url(url):
Temporary filepath. Temporary filepath.
""" """
filename = "{}.pdf".format(random_string(6)) filename = f"{random_string(6)}.pdf"
with tempfile.NamedTemporaryFile("wb", delete=False) as f: with tempfile.NamedTemporaryFile("wb", delete=False) as f:
headers = {"User-Agent": "Mozilla/5.0"} headers = {"User-Agent": "Mozilla/5.0"}
request = Request(url, None, headers) request = Request(url, None, headers)
@ -113,9 +113,7 @@ def validate_input(kwargs, flavor="lattice"):
isec = set(parser_kwargs).intersection(set(input_kwargs.keys())) isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
if isec: if isec:
raise ValueError( raise ValueError(
"{} cannot be used with flavor='{}'".format( f"{','.join(sorted(isec))} cannot be used with flavor='{flavor}'"
",".join(sorted(isec)), flavor
)
) )
if flavor == "lattice": if flavor == "lattice":
@ -413,7 +411,7 @@ def text_strip(text, strip=""):
return text return text
stripped = re.sub( stripped = re.sub(
r"[{}]".format("".join(map(re.escape, strip))), "", text, re.UNICODE fr"[{''.join(map(re.escape, strip))}]", "", text, re.UNICODE
) )
return stripped return stripped
@ -650,9 +648,7 @@ def get_table_index(
text_range = (t.x0, t.x1) text_range = (t.x0, t.x1)
col_range = (table.cols[0][0], table.cols[-1][1]) col_range = (table.cols[0][0], table.cols[-1][1])
warnings.warn( warnings.warn(
"{} {} does not lie in column range {}".format( f"{text} {text_range} does not lie in column range {col_range}"
text, text_range, col_range
)
) )
r_idx = r r_idx = r
c_idx = lt_col_overlap.index(max(lt_col_overlap)) c_idx = lt_col_overlap.index(max(lt_col_overlap))

View File

@ -114,31 +114,35 @@ def test_cli_password():
def test_cli_output_format(): def test_cli_output_format():
with TemporaryDirectory() as tempdir: with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, "health.pdf") infile = os.path.join(testdir, "health.pdf")
outfile = os.path.join(tempdir, "health.{}")
runner = CliRunner() runner = CliRunner()
# json # json
outfile = os.path.join(tempdir, "health.json")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "json", "--output", outfile.format("json"), "stream", infile], ["--format", "json", "--output", outfile, "stream", infile],
) )
assert result.exit_code == 0 assert result.exit_code == 0
# excel # excel
outfile = os.path.join(tempdir, "health.xlsx")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile], ["--format", "excel", "--output", outfile, "stream", infile],
) )
assert result.exit_code == 0 assert result.exit_code == 0
# html # html
outfile = os.path.join(tempdir, "health.html")
result = runner.invoke( result = runner.invoke(
cli, cli,
["--format", "html", "--output", outfile.format("html"), "stream", infile], ["--format", "html", "--output", outfile, "stream", infile],
) )
assert result.exit_code == 0 assert result.exit_code == 0
# zip # zip
outfile = os.path.join(tempdir, "health.csv")
result = runner.invoke( result = runner.invoke(
cli, cli,
[ [
@ -146,7 +150,7 @@ def test_cli_output_format():
"--format", "--format",
"csv", "csv",
"--output", "--output",
outfile.format("csv"), outfile,
"stream", "stream",
infile, infile,
], ],

View File

@ -73,7 +73,7 @@ def test_no_tables_found_logs_suppressed():
tables = camelot.read_pdf(filename, suppress_stdout=True) tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e: except Warning as e:
warning_text = str(e) warning_text = str(e)
pytest.fail("Unexpected warning: {}".format(warning_text)) pytest.fail(f"Unexpected warning: {warning_text}")
def test_no_tables_found_warnings_suppressed(): def test_no_tables_found_warnings_suppressed():
@ -85,7 +85,7 @@ def test_no_tables_found_warnings_suppressed():
tables = camelot.read_pdf(filename, suppress_stdout=True) tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e: except Warning as e:
warning_text = str(e) warning_text = str(e)
pytest.fail("Unexpected warning: {}".format(warning_text)) pytest.fail(f"Unexpected warning: {warning_text}"
def test_no_password(): def test_no_password():