camelot-py/camelot/parsers/base.py

60 lines
1.5 KiB
Python

# -*- coding: utf-8 -*-
import os
from ..utils import (
get_text_objects
)
from ..core import Table
class BaseParser(object):
"""Defines a base parser.
"""
def __init__(self, parser_id):
self.id = parser_id
# For plotting details of parsing algorithms
self.debug_info = {}
def _generate_layout(self, filename, layout, dimensions,
page_idx, layout_kwargs):
self.filename = filename
self.layout_kwargs = layout_kwargs
self.layout = layout
self.dimensions = dimensions
self.page = page_idx
self.images = get_text_objects(self.layout, ltype="image")
self.horizontal_text = get_text_objects(
self.layout,
ltype="horizontal_text"
)
self.vertical_text = get_text_objects(
self.layout,
ltype="vertical_text"
)
self.pdf_width, self.pdf_height = self.dimensions
self.rootname, __ = os.path.splitext(self.filename)
"""Initialize new table object, ready to be populated
Parameters
----------
table_idx : int
Index of this table within the pdf page analyzed
cols : list
list of coordinate boundaries tuples (left, right)
rows : list
list of coordinate boundaries tuples (bottom, top)
Returns
-------
table : camelot.core.Table
"""
def _initialize_new_table(self, table_idx, cols, rows):
table = Table(cols, rows)
table.page = self.page
table.order = table_idx + 1
return table