Optimised and cleaned the code.

pull/280/head
pratheeshraniprakash 2022-01-05 12:56:30 +05:30
parent 644bbe7c6d
commit 8beb8d79bf
7 changed files with 43 additions and 44 deletions

4
.gitignore vendored
View File

@ -1,5 +1,5 @@
fontconfig/ fontconfig/
__pycache__/ __pycache__/*
*.py[cod] *.py[cod]
*.so *.so
@ -18,3 +18,5 @@ htmlcov/
# vscode # vscode
.vscode .vscode
.env
changelog.txt

View File

@ -1,3 +1,3 @@
Be cordial or be on your way. --Kenneth Reitz Be cordial or be on your way. --Kenneth Reitz
https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way

View File

@ -10,7 +10,7 @@ The following quote sums up the **Code Of Conduct**.
> Be cordial or be on your way. --Kenneth Reitz > Be cordial or be on your way. --Kenneth Reitz
Kenneth Reitz has also written an [essay](https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way) on this topic, which you should read. Kenneth Reitz has also written an [essay](https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way) on this topic, which you should read.
As the [Requests Code Of Conduct](http://docs.python-requests.org/en/master/dev/contributing/#be-cordial) states, **all contributions are welcome**, as long as everyone involved is treated with respect. As the [Requests Code Of Conduct](http://docs.python-requests.org/en/master/dev/contributing/#be-cordial) states, **all contributions are welcome**, as long as everyone involved is treated with respect.

View File

@ -524,12 +524,12 @@ class Table(object):
def set_border(self): def set_border(self):
"""Sets table border edges to True.""" """Sets table border edges to True."""
for r in range(len(self.rows)): for index, row in enumerate(self.rows):
self.cells[r][0].left = True self.cells[index][0].left = True
self.cells[r][len(self.cols) - 1].right = True self.cells[index][len(self.cols) - 1].right = True
for c in range(len(self.cols)): for index, col in enumerate(self.cols):
self.cells[0][c].top = True self.cells[0][index].top = True
self.cells[len(self.rows) - 1][c].bottom = True self.cells[len(self.rows) - 1][index].bottom = True
return self return self
def set_span(self): def set_span(self):

View File

@ -35,9 +35,7 @@ class PDFHandler(object):
""" """
def __init__(self, filepath, pages="1", password=None): def __init__(self, filepath, pages="1", password=None):
if is_url(filepath): self.filepath = download_url(filepath) if is_url(filepath) else filepath
filepath = download_url(filepath)
self.filepath = filepath
if not filepath.lower().endswith(".pdf"): if not filepath.lower().endswith(".pdf"):
raise NotImplementedError("File format not supported") raise NotImplementedError("File format not supported")

View File

@ -162,7 +162,7 @@ class Lattice(BaseParser):
return backend return backend
@staticmethod @staticmethod
def _reduce_index(t, idx, shift_text): def _reduce_index(table, idx, shift_text):
"""Reduces index of a text object if it lies within a spanning """Reduces index of a text object if it lies within a spanning
cell. cell.
@ -187,32 +187,28 @@ class Lattice(BaseParser):
indices = [] indices = []
for r_idx, c_idx, text in idx: for r_idx, c_idx, text in idx:
for d in shift_text: for d in shift_text:
if d == "l": if d == "l" and table.cells[r_idx][c_idx].hspan:
if t.cells[r_idx][c_idx].hspan: while not table.cells[r_idx][c_idx].left:
while not t.cells[r_idx][c_idx].left:
c_idx -= 1 c_idx -= 1
if d == "r": if d == "r" and table.cells[r_idx][c_idx].hspan:
if t.cells[r_idx][c_idx].hspan: while not table.cells[r_idx][c_idx].right:
while not t.cells[r_idx][c_idx].right:
c_idx += 1 c_idx += 1
if d == "t": if d == "t" and table.cells[r_idx][c_idx].vspan:
if t.cells[r_idx][c_idx].vspan: while not table.cells[r_idx][c_idx].top:
while not t.cells[r_idx][c_idx].top:
r_idx -= 1 r_idx -= 1
if d == "b": if d == "b" and table.cells[r_idx][c_idx].vspan:
if t.cells[r_idx][c_idx].vspan: while not table.cells[r_idx][c_idx].bottom:
while not t.cells[r_idx][c_idx].bottom:
r_idx += 1 r_idx += 1
indices.append((r_idx, c_idx, text)) indices.append((r_idx, c_idx, text))
return indices return indices
@staticmethod @staticmethod
def _copy_spanning_text(t, copy_text=None): def _copy_spanning_text(table, copy_text=None):
"""Copies over text in empty spanning cells. """Copies over text in empty spanning cells.
Parameters Parameters
---------- ----------
t : camelot.core.Table table : camelot.core.Table
copy_text : list, optional (default: None) copy_text : list, optional (default: None)
{'h', 'v'} {'h', 'v'}
Select one or more strings from above and pass them as a list Select one or more strings from above and pass them as a list
@ -221,23 +217,23 @@ class Lattice(BaseParser):
Returns Returns
------- -------
t : camelot.core.Table table : camelot.core.Table
""" """
for f in copy_text: for f in copy_text:
if f == "h": if f == "h":
for i in range(len(t.cells)): for i in range(len(table.cells)):
for j in range(len(t.cells[i])): for j in range(len(table.cells[i])):
if t.cells[i][j].text.strip() == "": if table.cells[i][j].text.strip() == "":
if t.cells[i][j].hspan and not t.cells[i][j].left: if table.cells[i][j].hspan and not table.cells[i][j].left:
t.cells[i][j].text = t.cells[i][j - 1].text table.cells[i][j].text = table.cells[i][j - 1].text
elif f == "v": elif f == "v":
for i in range(len(t.cells)): for i in range(len(table.cells)):
for j in range(len(t.cells[i])): for j in range(len(table.cells[i])):
if t.cells[i][j].text.strip() == "": if table.cells[i][j].text.strip() == "":
if t.cells[i][j].vspan and not t.cells[i][j].top: if table.cells[i][j].vspan and not table.cells[i][j].top:
t.cells[i][j].text = t.cells[i - 1][j].text table.cells[i][j].text = table.cells[i - 1][j].text
return t return table
def _generate_table_bbox(self): def _generate_table_bbox(self):
def scale_areas(areas): def scale_areas(areas):

View File

@ -81,7 +81,10 @@ def download_url(url):
""" """
filename = f"{random_string(6)}.pdf" filename = f"{random_string(6)}.pdf"
with tempfile.NamedTemporaryFile("wb", delete=False) as f: with tempfile.NamedTemporaryFile("wb", delete=False) as f:
headers = {"User-Agent": "Mozilla/5.0"} headers = {
"User-Agent": "Mozilla/5.0",
"Accept-Encoding": "gzip;q=1.0, deflate;q=0.9, br;q=0.8, compress;q=0.7, *;q=0.1"
}
request = Request(url, None, headers) request = Request(url, None, headers)
obj = urlopen(request) obj = urlopen(request)
content_type = obj.info().get_content_type() content_type = obj.info().get_content_type()