Optimised and cleaned the code.

2022-01-05 12:56:30 +05:30 · 2022-01-05 12:56:30 +05:30 · 8beb8d79bf
parent 644bbe7c6d
commit 8beb8d79bf
7 changed files with 43 additions and 44 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,5 @@
 fontconfig/
-__pycache__/
+__pycache__/*
 *.py[cod]
 *.so

@ -18,3 +18,5 @@ htmlcov/

 # vscode
 .vscode
+.env
+changelog.txt
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -1,3 +1,3 @@
 Be cordial or be on your way. --Kenneth Reitz

-https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way
+https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -10,7 +10,7 @@ The following quote sums up the **Code Of Conduct**.

   > Be cordial or be on your way. --Kenneth Reitz

-Kenneth Reitz has also written an [essay](https://www.kennethreitz.org/essays/be-cordial-or-be-on-your-way) on this topic, which you should read.
+Kenneth Reitz has also written an [essay](https://kennethreitz.org/essays/2013/01/27/be-cordial-or-be-on-your-way) on this topic, which you should read.

 As the [Requests Code Of Conduct](http://docs.python-requests.org/en/master/dev/contributing/#be-cordial) states, **all contributions are welcome**, as long as everyone involved is treated with respect.

--- a/camelot/core.py
+++ b/camelot/core.py
@ -524,12 +524,12 @@ class Table(object):

    def set_border(self):
        """Sets table border edges to True."""
-        for r in range(len(self.rows)):
-            self.cells[r][0].left = True
-            self.cells[r][len(self.cols) - 1].right = True
-        for c in range(len(self.cols)):
-            self.cells[0][c].top = True
-            self.cells[len(self.rows) - 1][c].bottom = True
+        for index, row in enumerate(self.rows):
+            self.cells[index][0].left = True
+            self.cells[index][len(self.cols) - 1].right = True
+        for index, col in enumerate(self.cols):
+            self.cells[0][index].top = True
+            self.cells[len(self.rows) - 1][index].bottom = True
        return self

    def set_span(self):
--- a/camelot/handlers.py
+++ b/camelot/handlers.py
@ -35,9 +35,7 @@ class PDFHandler(object):
    """

    def __init__(self, filepath, pages="1", password=None):
-        if is_url(filepath):
-            filepath = download_url(filepath)
-        self.filepath = filepath
+        self.filepath = download_url(filepath) if is_url(filepath) else filepath
        if not filepath.lower().endswith(".pdf"):
            raise NotImplementedError("File format not supported")

--- a/camelot/parsers/lattice.py
+++ b/camelot/parsers/lattice.py
@ -162,7 +162,7 @@ class Lattice(BaseParser):
            return backend

    @staticmethod
-    def _reduce_index(t, idx, shift_text):
+    def _reduce_index(table, idx, shift_text):
        """Reduces index of a text object if it lies within a spanning
        cell.

@ -187,32 +187,28 @@ class Lattice(BaseParser):
        indices = []
        for r_idx, c_idx, text in idx:
            for d in shift_text:
-                if d == "l":
-                    if t.cells[r_idx][c_idx].hspan:
-                        while not t.cells[r_idx][c_idx].left:
-                            c_idx -= 1
-                if d == "r":
-                    if t.cells[r_idx][c_idx].hspan:
-                        while not t.cells[r_idx][c_idx].right:
-                            c_idx += 1
-                if d == "t":
-                    if t.cells[r_idx][c_idx].vspan:
-                        while not t.cells[r_idx][c_idx].top:
-                            r_idx -= 1
-                if d == "b":
-                    if t.cells[r_idx][c_idx].vspan:
-                        while not t.cells[r_idx][c_idx].bottom:
-                            r_idx += 1
+                if d == "l" and table.cells[r_idx][c_idx].hspan:
+                    while not table.cells[r_idx][c_idx].left:
+                        c_idx -= 1
+                if d == "r" and table.cells[r_idx][c_idx].hspan:
+                    while not table.cells[r_idx][c_idx].right:
+                        c_idx += 1
+                if d == "t" and table.cells[r_idx][c_idx].vspan:
+                    while not table.cells[r_idx][c_idx].top:
+                        r_idx -= 1
+                if d == "b" and table.cells[r_idx][c_idx].vspan:
+                    while not table.cells[r_idx][c_idx].bottom:
+                        r_idx += 1
            indices.append((r_idx, c_idx, text))
        return indices

    @staticmethod
-    def _copy_spanning_text(t, copy_text=None):
+    def _copy_spanning_text(table, copy_text=None):
        """Copies over text in empty spanning cells.

        Parameters
        ----------
-        t : camelot.core.Table
+        table : camelot.core.Table
        copy_text : list, optional (default: None)
            {'h', 'v'}
            Select one or more strings from above and pass them as a list
@ -221,23 +217,23 @@ class Lattice(BaseParser):

        Returns
        -------
-        t : camelot.core.Table
+        table : camelot.core.Table

        """
        for f in copy_text:
            if f == "h":
-                for i in range(len(t.cells)):
-                    for j in range(len(t.cells[i])):
-                        if t.cells[i][j].text.strip() == "":
-                            if t.cells[i][j].hspan and not t.cells[i][j].left:
-                                t.cells[i][j].text = t.cells[i][j - 1].text
+                for i in range(len(table.cells)):
+                    for j in range(len(table.cells[i])):
+                        if table.cells[i][j].text.strip() == "":
+                            if table.cells[i][j].hspan and not table.cells[i][j].left:
+                                table.cells[i][j].text = table.cells[i][j - 1].text
            elif f == "v":
-                for i in range(len(t.cells)):
-                    for j in range(len(t.cells[i])):
-                        if t.cells[i][j].text.strip() == "":
-                            if t.cells[i][j].vspan and not t.cells[i][j].top:
-                                t.cells[i][j].text = t.cells[i - 1][j].text
-        return t
+                for i in range(len(table.cells)):
+                    for j in range(len(table.cells[i])):
+                        if table.cells[i][j].text.strip() == "":
+                            if table.cells[i][j].vspan and not table.cells[i][j].top:
+                                table.cells[i][j].text = table.cells[i - 1][j].text
+        return table

    def _generate_table_bbox(self):
        def scale_areas(areas):
--- a/camelot/utils.py
+++ b/camelot/utils.py
@ -81,7 +81,10 @@ def download_url(url):
    """
    filename = f"{random_string(6)}.pdf"
    with tempfile.NamedTemporaryFile("wb", delete=False) as f:
-        headers = {"User-Agent": "Mozilla/5.0"}
+        headers = {
+            "User-Agent": "Mozilla/5.0",
+            "Accept-Encoding": "gzip;q=1.0, deflate;q=0.9, br;q=0.8, compress;q=0.7, *;q=0.1"
+        }
        request = Request(url, None, headers)
        obj = urlopen(request)
        content_type = obj.info().get_content_type()