added test case for method bbox_no_intersection method
changed the test name to be more aligned with other testspull/252/head
parent
f43235934b
commit
82d0bf2881
|
|
@ -0,0 +1,56 @@
|
|||
"Test to check intersection logic when no intersection area returned"
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pdfminer.pdfparser import PDFParser
|
||||
from pdfminer.pdfdocument import PDFDocument
|
||||
from pdfminer.pdfpage import PDFPage
|
||||
from pdfminer.pdfpage import PDFTextExtractionNotAllowed
|
||||
from pdfminer.pdfinterp import PDFResourceManager
|
||||
from pdfminer.pdfinterp import PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import (
|
||||
LAParams,
|
||||
LTAnno,
|
||||
LTChar,
|
||||
LTTextLineHorizontal,
|
||||
LTTextLineVertical,
|
||||
LTImage,
|
||||
LTTextBoxHorizontal
|
||||
)
|
||||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
|
||||
from camelot.utils import bbox_intersection_area
|
||||
|
||||
def get_text_from_pdf(filename):
|
||||
"Method to extract text object from pdf"
|
||||
#https://stackoverflow.com/questions/22898145/how-to-extract-text-and-text-coordinates-from-a-pdf-file
|
||||
#https://pdfminer-docs.readthedocs.io/programming.html#performing-layout-analysis
|
||||
document = open(filename, 'rb')
|
||||
#Create resource manager
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# Set parameters for analysis.
|
||||
laparams = LAParams()
|
||||
# Create a PDF page aggregator object.
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
for page in PDFPage.get_pages(document):
|
||||
interpreter.process_page(page)
|
||||
# receive the LTPage object for the page.
|
||||
layout = device.get_result()
|
||||
for element in layout:
|
||||
if isinstance(element, LTTextBoxHorizontal):
|
||||
return element
|
||||
|
||||
def test_bbox_intersection_text():
|
||||
"""
|
||||
Test to check area of intersection between both boxes when no intersection area returned
|
||||
"""
|
||||
filename1 = os.path.join(testdir, "foo.pdf")
|
||||
pdftextelement1 = get_text_from_pdf(filename1)
|
||||
filename2 = os.path.join(testdir, "tabula/12s0324.pdf")
|
||||
pdftextelement2 = get_text_from_pdf(filename2)
|
||||
|
||||
assert bbox_intersection_area(pdftextelement1, pdftextelement2) == 0.0
|
||||
Loading…
Reference in New Issue