From 8beb8d79bfc475d1d436719c4e1773d2a3a430da Mon Sep 17 00:00:00 2001 From: pratheeshraniprakash Date: Wed, 5 Jan 2022 12:56:30 +0530 Subject: [PATCH] Optimised and cleaned the code. --- .gitignore | 4 ++- CODE_OF_CONDUCT.md | 2 +- CONTRIBUTING.md | 2 +- camelot/core.py | 12 ++++---- camelot/handlers.py | 4 +-- camelot/parsers/lattice.py | 58 ++++++++++++++++++-------------------- camelot/utils.py | 5 +++- 7 files changed, 43 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index aaeac14..e9fc3ae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ fontconfig/ -__pycache__/ +__pycache__/* *.py[cod] *.so @@ -18,3 +18,5 @@ htmlcov/ # vscode .vscode +.env +changelog.txt diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4d7cf3..080a0fb 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,3 +1,3 @@ Be cordial or be on your way. --Kenneth Reitz -https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way +https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8bb8371..1bfc8dd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ The following quote sums up the **Code Of Conduct**. > Be cordial or be on your way. --Kenneth Reitz -Kenneth Reitz has also written an [essay](https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way) on this topic, which you should read. +Kenneth Reitz has also written an [essay](https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way) on this topic, which you should read. As the [Requests Code Of Conduct](http://docs.python-requests.org/en/master/dev/contributing/#be-cordial) states, **all contributions are welcome**, as long as everyone involved is treated with respect. diff --git a/camelot/core.py b/camelot/core.py index 58a98ef..f951f83 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -524,12 +524,12 @@ class Table(object): def set_border(self): """Sets table border edges to True.""" - for r in range(len(self.rows)): - self.cells[r][0].left = True - self.cells[r][len(self.cols) - 1].right = True - for c in range(len(self.cols)): - self.cells[0][c].top = True - self.cells[len(self.rows) - 1][c].bottom = True + for index, row in enumerate(self.rows): + self.cells[index][0].left = True + self.cells[index][len(self.cols) - 1].right = True + for index, col in enumerate(self.cols): + self.cells[0][index].top = True + self.cells[len(self.rows) - 1][index].bottom = True return self def set_span(self): diff --git a/camelot/handlers.py b/camelot/handlers.py index 61585b6..101793c 100644 --- a/camelot/handlers.py +++ b/camelot/handlers.py @@ -35,9 +35,7 @@ class PDFHandler(object): """ def __init__(self, filepath, pages="1", password=None): - if is_url(filepath): - filepath = download_url(filepath) - self.filepath = filepath + self.filepath = download_url(filepath) if is_url(filepath) else filepath if not filepath.lower().endswith(".pdf"): raise NotImplementedError("File format not supported") diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index a175227..d10bb21 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -162,7 +162,7 @@ class Lattice(BaseParser): return backend @staticmethod - def _reduce_index(t, idx, shift_text): + def _reduce_index(table, idx, shift_text): """Reduces index of a text object if it lies within a spanning cell. @@ -187,32 +187,28 @@ class Lattice(BaseParser): indices = [] for r_idx, c_idx, text in idx: for d in shift_text: - if d == "l": - if t.cells[r_idx][c_idx].hspan: - while not t.cells[r_idx][c_idx].left: - c_idx -= 1 - if d == "r": - if t.cells[r_idx][c_idx].hspan: - while not t.cells[r_idx][c_idx].right: - c_idx += 1 - if d == "t": - if t.cells[r_idx][c_idx].vspan: - while not t.cells[r_idx][c_idx].top: - r_idx -= 1 - if d == "b": - if t.cells[r_idx][c_idx].vspan: - while not t.cells[r_idx][c_idx].bottom: - r_idx += 1 + if d == "l" and table.cells[r_idx][c_idx].hspan: + while not table.cells[r_idx][c_idx].left: + c_idx -= 1 + if d == "r" and table.cells[r_idx][c_idx].hspan: + while not table.cells[r_idx][c_idx].right: + c_idx += 1 + if d == "t" and table.cells[r_idx][c_idx].vspan: + while not table.cells[r_idx][c_idx].top: + r_idx -= 1 + if d == "b" and table.cells[r_idx][c_idx].vspan: + while not table.cells[r_idx][c_idx].bottom: + r_idx += 1 indices.append((r_idx, c_idx, text)) return indices @staticmethod - def _copy_spanning_text(t, copy_text=None): + def _copy_spanning_text(table, copy_text=None): """Copies over text in empty spanning cells. Parameters ---------- - t : camelot.core.Table + table : camelot.core.Table copy_text : list, optional (default: None) {'h', 'v'} Select one or more strings from above and pass them as a list @@ -221,23 +217,23 @@ class Lattice(BaseParser): Returns ------- - t : camelot.core.Table + table : camelot.core.Table """ for f in copy_text: if f == "h": - for i in range(len(t.cells)): - for j in range(len(t.cells[i])): - if t.cells[i][j].text.strip() == "": - if t.cells[i][j].hspan and not t.cells[i][j].left: - t.cells[i][j].text = t.cells[i][j - 1].text + for i in range(len(table.cells)): + for j in range(len(table.cells[i])): + if table.cells[i][j].text.strip() == "": + if table.cells[i][j].hspan and not table.cells[i][j].left: + table.cells[i][j].text = table.cells[i][j - 1].text elif f == "v": - for i in range(len(t.cells)): - for j in range(len(t.cells[i])): - if t.cells[i][j].text.strip() == "": - if t.cells[i][j].vspan and not t.cells[i][j].top: - t.cells[i][j].text = t.cells[i - 1][j].text - return t + for i in range(len(table.cells)): + for j in range(len(table.cells[i])): + if table.cells[i][j].text.strip() == "": + if table.cells[i][j].vspan and not table.cells[i][j].top: + table.cells[i][j].text = table.cells[i - 1][j].text + return table def _generate_table_bbox(self): def scale_areas(areas): diff --git a/camelot/utils.py b/camelot/utils.py index 404c00b..21f101a 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -81,7 +81,10 @@ def download_url(url): """ filename = f"{random_string(6)}.pdf" with tempfile.NamedTemporaryFile("wb", delete=False) as f: - headers = {"User-Agent": "Mozilla/5.0"} + headers = { + "User-Agent": "Mozilla/5.0", + "Accept-Encoding": "gzip;q=1.0, deflate;q=0.9, br;q=0.8, compress;q=0.7, *;q=0.1" + } request = Request(url, None, headers) obj = urlopen(request) content_type = obj.info().get_content_type()