Move cell and table to core

2018-09-04 03:49:43 +05:30 · 2018-09-04 03:49:43 +05:30 · c689735da2
parent ae64264d3e
commit c689735da2
7 changed files with 144 additions and 135 deletions
--- a/camelot/init.py
+++ b/camelot/init.py
@ -1,3 +1,6 @@
-__version__ = '1.2.0'
+from .core import *
+from .__version__ import __version__

-__all__ = ['pdf', 'lattice', 'stream']
+
+def read_pdf(filepath, pages='1', grid=True):
+    pass
--- a/camelot/version.py
+++ b/camelot/version.py
@ -0,0 +1 @@
+__version__ = '0.1.0'
--- a/camelot/cell.py
+++ b/camelot/cell.py
@ -1,128 +0,0 @@
-class Cell:
-    """Cell.
-    Defines a cell object with coordinates relative to a left-bottom
-    origin, which is also PDFMiner's coordinate space.
-
-    Parameters
-    ----------
-    x1 : float
-        x-coordinate of left-bottom point.
-
-    y1 : float
-        y-coordinate of left-bottom point.
-
-    x2 : float
-        x-coordinate of right-top point.
-
-    y2 : float
-        y-coordinate of right-top point.
-
-    Attributes
-    ----------
-    lb : tuple
-        Tuple representing left-bottom coordinates.
-
-    lt : tuple
-        Tuple representing left-top coordinates.
-
-    rb : tuple
-        Tuple representing right-bottom coordinates.
-
-    rt : tuple
-        Tuple representing right-top coordinates.
-
-    bbox : tuple
-        Tuple representing the cell's bounding box using the
-        lower-bottom and right-top coordinates.
-
-    left : bool
-        Whether or not cell is bounded on the left.
-
-    right : bool
-        Whether or not cell is bounded on the right.
-
-    top : bool
-        Whether or not cell is bounded on the top.
-
-    bottom : bool
-        Whether or not cell is bounded on the bottom.
-
-    text_objects : list
-        List of text objects assigned to cell.
-
-    text : string
-        Text assigned to cell.
-
-    spanning_h : bool
-        Whether or not cell spans/extends horizontally.
-
-    spanning_v : bool
-        Whether or not cell spans/extends vertically.
-    """
-
-    def __init__(self, x1, y1, x2, y2):
-
-        self.x1 = x1
-        self.y1 = y1
-        self.x2 = x2
-        self.y2 = y2
-        self.lb = (x1, y1)
-        self.lt = (x1, y2)
-        self.rb = (x2, y1)
-        self.rt = (x2, y2)
-        self.bbox = (x1, y1, x2, y2)
-        self.left = False
-        self.right = False
-        self.top = False
-        self.bottom = False
-        self.text_objects = []
-        self.text = ''
-        self.spanning_h = False
-        self.spanning_v = False
-        self.image = None
-
-    def add_text(self, text):
-        """Adds text to cell.
-
-        Parameters
-        ----------
-        text : string
-        """
-        self.text = ''.join([self.text, text])
-
-    def get_text(self):
-        """Returns text assigned to cell.
-
-        Returns
-        -------
-        text : string
-        """
-        return self.text
-
-    def add_object(self, t_object):
-        """Adds PDFMiner text object to cell.
-
-        Parameters
-        ----------
-        t_object : object
-        """
-        self.text_objects.append(t_object)
-
-    def get_objects(self):
-        """Returns list of text objects assigned to cell.
-
-        Returns
-        -------
-        text_objects : list
-        """
-        return self.text_objects
-
-    def get_bounded_edges(self):
-        """Returns the number of edges by which a cell is bounded.
-
-        Returns
-        -------
-        bounded_edges : int
-        """
-        self.bounded_edges = self.top + self.bottom + self.left + self.right
-        return self.bounded_edges
--- a/camelot/cli.py
+++ b/camelot/cli.py
--- a/camelot/table.py
+++ b/camelot/table.py
@ -1,9 +1,138 @@
 import numpy as np
-
-from .cell import Cell
+import pandas as pd


-class Table:
+class Cell(object):
+    """Cell.
+    Defines a cell object with coordinates relative to a left-bottom
+    origin, which is also PDFMiner's coordinate space.
+
+    Parameters
+    ----------
+    x1 : float
+        x-coordinate of left-bottom point.
+
+    y1 : float
+        y-coordinate of left-bottom point.
+
+    x2 : float
+        x-coordinate of right-top point.
+
+    y2 : float
+        y-coordinate of right-top point.
+
+    Attributes
+    ----------
+    lb : tuple
+        Tuple representing left-bottom coordinates.
+
+    lt : tuple
+        Tuple representing left-top coordinates.
+
+    rb : tuple
+        Tuple representing right-bottom coordinates.
+
+    rt : tuple
+        Tuple representing right-top coordinates.
+
+    bbox : tuple
+        Tuple representing the cell's bounding box using the
+        lower-bottom and right-top coordinates.
+
+    left : bool
+        Whether or not cell is bounded on the left.
+
+    right : bool
+        Whether or not cell is bounded on the right.
+
+    top : bool
+        Whether or not cell is bounded on the top.
+
+    bottom : bool
+        Whether or not cell is bounded on the bottom.
+
+    text_objects : list
+        List of text objects assigned to cell.
+
+    text : string
+        Text assigned to cell.
+
+    spanning_h : bool
+        Whether or not cell spans/extends horizontally.
+
+    spanning_v : bool
+        Whether or not cell spans/extends vertically.
+    """
+
+    def __init__(self, x1, y1, x2, y2):
+
+        self.x1 = x1
+        self.y1 = y1
+        self.x2 = x2
+        self.y2 = y2
+        self.lb = (x1, y1)
+        self.lt = (x1, y2)
+        self.rb = (x2, y1)
+        self.rt = (x2, y2)
+        self.bbox = (x1, y1, x2, y2)
+        self.left = False
+        self.right = False
+        self.top = False
+        self.bottom = False
+        self.text_objects = []
+        self.text = ''
+        self.spanning_h = False
+        self.spanning_v = False
+        self.image = None
+
+    def add_text(self, text):
+        """Adds text to cell.
+
+        Parameters
+        ----------
+        text : string
+        """
+        self.text = ''.join([self.text, text])
+
+    def get_text(self):
+        """Returns text assigned to cell.
+
+        Returns
+        -------
+        text : string
+        """
+        return self.text
+
+    def add_object(self, t_object):
+        """Adds PDFMiner text object to cell.
+
+        Parameters
+        ----------
+        t_object : object
+        """
+        self.text_objects.append(t_object)
+
+    def get_objects(self):
+        """Returns list of text objects assigned to cell.
+
+        Returns
+        -------
+        text_objects : list
+        """
+        return self.text_objects
+
+    def get_bounded_edges(self):
+        """Returns the number of edges by which a cell is bounded.
+
+        Returns
+        -------
+        bounded_edges : int
+        """
+        self.bounded_edges = self.top + self.bottom + self.left + self.right
+        return self.bounded_edges
+
+
+class Table(object):
    """Table.
    Defines a table object with coordinates relative to a left-bottom
    origin, which is also PDFMiner's coordinate space.
@ -234,3 +363,7 @@ class Table:
            ar.append([self.cells[r][c].get_text().strip()
                       for c in range(len(self.cols))])
        return ar
+
+
+class TableSet(object):
+    pass
--- a/camelot/lattice.py
+++ b/camelot/lattice.py
@ -8,9 +8,9 @@ import copy_reg
 import warnings
 import subprocess

+from .core import Table
 from .imgproc import (adaptive_threshold, find_lines, find_table_contours,
                      find_table_joints)
-from .table import Table
 from .utils import (scale_to_pdf, scale_to_image, segments_bbox, text_in_bbox,
                    merge_close_values, get_table_index, get_score, count_empty,
                    encode_list, get_text_objects, get_page_layout)
--- a/camelot/stream.py
+++ b/camelot/stream.py
@ -8,7 +8,7 @@ import warnings

 import numpy as np

-from .table import Table
+from .core import Table
 from .utils import (text_in_bbox, get_table_index, get_score, count_empty,
                    encode_list, get_text_objects, get_page_layout)