Fix unit tests, lint, drop Python 2 support
Drop EOL Python 2 support. Resolve unit test discrepancies. Update unit tests to pass in Travis across all supported Py. Linting.pull/153/head
|
|
@ -0,0 +1,3 @@
|
|||
[bandit]
|
||||
# Ignore concerns about asserts, necessary for unit test code
|
||||
skips: B101,B102
|
||||
|
|
@ -4,6 +4,7 @@ __pycache__/
|
|||
|
||||
build/
|
||||
dist/
|
||||
prof/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
.coverage
|
||||
|
|
@ -17,3 +18,5 @@ htmlcov/
|
|||
|
||||
# vscode
|
||||
.vscode
|
||||
|
||||
.DS_Store
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
sudo: true
|
||||
language: python
|
||||
cache: pip
|
||||
addons:
|
||||
|
|
@ -8,10 +7,6 @@ install:
|
|||
- make install
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
python: '2.7'
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class TextEdge(object):
|
|||
intersections: int
|
||||
Number of intersections with horizontal text rows.
|
||||
is_valid: bool
|
||||
A text edge is valid if it intersections with at least
|
||||
A text edge is valid if it intersects with at least
|
||||
TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.
|
||||
|
||||
"""
|
||||
|
|
@ -65,7 +65,8 @@ class TextEdge(object):
|
|||
the is_valid attribute.
|
||||
"""
|
||||
if np.isclose(self.y0, y0, atol=edge_tol):
|
||||
self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
|
||||
self.x = (self.intersections * self.x + x) / \
|
||||
float(self.intersections + 1)
|
||||
self.y0 = y0
|
||||
self.intersections += 1
|
||||
# a textedge is valid only if it extends uninterrupted
|
||||
|
|
@ -141,13 +142,16 @@ class TextEdges(object):
|
|||
"""
|
||||
intersections_sum = {
|
||||
"left": sum(
|
||||
te.intersections for te in self._textedges["left"] if te.is_valid
|
||||
te.intersections for te in self._textedges["left"]
|
||||
if te.is_valid
|
||||
),
|
||||
"right": sum(
|
||||
te.intersections for te in self._textedges["right"] if te.is_valid
|
||||
te.intersections for te in self._textedges["right"]
|
||||
if te.is_valid
|
||||
),
|
||||
"middle": sum(
|
||||
te.intersections for te in self._textedges["middle"] if te.is_valid
|
||||
te.intersections for te in self._textedges["middle"]
|
||||
if te.is_valid
|
||||
),
|
||||
}
|
||||
|
||||
|
|
@ -292,7 +296,10 @@ class Cell(object):
|
|||
|
||||
def __repr__(self):
|
||||
return "<Cell x1={} y1={} x2={} y2={}>".format(
|
||||
round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
|
||||
round(self.x1, 2),
|
||||
round(self.y1, 2),
|
||||
round(self.x2, 2),
|
||||
round(self.y2, 2)
|
||||
)
|
||||
|
||||
@property
|
||||
|
|
@ -342,7 +349,9 @@ class Table(object):
|
|||
def __init__(self, cols, rows):
|
||||
self.cols = cols
|
||||
self.rows = rows
|
||||
self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
|
||||
self.cells = [
|
||||
[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows
|
||||
]
|
||||
self.df = None
|
||||
self.shape = (0, 0)
|
||||
self.accuracy = 0
|
||||
|
|
@ -579,7 +588,8 @@ class Table(object):
|
|||
Output filepath.
|
||||
|
||||
"""
|
||||
kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
|
||||
kw = {"encoding": "utf-8", "index": False, "header": False,
|
||||
"quoting": 1}
|
||||
kw.update(kwargs)
|
||||
self.df.to_csv(path, **kw)
|
||||
|
||||
|
|
@ -616,6 +626,7 @@ class Table(object):
|
|||
"encoding": "utf-8",
|
||||
}
|
||||
kw.update(kwargs)
|
||||
# pylint: disable=abstract-class-instantiated
|
||||
writer = pd.ExcelWriter(path)
|
||||
self.df.to_excel(writer, **kw)
|
||||
writer.save()
|
||||
|
|
@ -692,7 +703,8 @@ class TableList(object):
|
|||
ext = kwargs.get("ext")
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order,
|
||||
ext)
|
||||
)
|
||||
filepath = os.path.join(dirname, filename)
|
||||
to_format = self._format_func(table, f)
|
||||
|
|
@ -707,7 +719,10 @@ class TableList(object):
|
|||
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
||||
"{}-page-{}-table-{}{}".format(root,
|
||||
table.page,
|
||||
table.order,
|
||||
ext)
|
||||
)
|
||||
filepath = os.path.join(dirname, filename)
|
||||
z.write(filepath, os.path.basename(filepath))
|
||||
|
|
@ -739,10 +754,12 @@ class TableList(object):
|
|||
self._compress_dir(**kwargs)
|
||||
elif f == "excel":
|
||||
filepath = os.path.join(dirname, basename)
|
||||
# pylint: disable=abstract-class-instantiated
|
||||
writer = pd.ExcelWriter(filepath)
|
||||
for table in self._tables:
|
||||
sheet_name = "page-{}-table-{}".format(table.page, table.order)
|
||||
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
|
||||
table.df.to_excel(writer, sheet_name=sheet_name,
|
||||
encoding="utf-8")
|
||||
writer.save()
|
||||
if compress:
|
||||
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
||||
|
|
|
|||
|
|
@ -113,14 +113,20 @@ class PDFHandler(object):
|
|||
outfile.addPage(p)
|
||||
with open(fpath, "wb") as f:
|
||||
outfile.write(f)
|
||||
layout, dim = get_page_layout(fpath)
|
||||
layout, __ = get_page_layout(fpath)
|
||||
# fix rotated PDF
|
||||
chars = get_text_objects(layout, ltype="char")
|
||||
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
|
||||
vertical_text = get_text_objects(layout, ltype="vertical_text")
|
||||
rotation = get_rotation(chars, horizontal_text, vertical_text)
|
||||
if rotation != "":
|
||||
fpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
|
||||
fpath_new = "".join(
|
||||
[
|
||||
froot.replace("page", "p"),
|
||||
"_rotated",
|
||||
fext
|
||||
]
|
||||
)
|
||||
os.rename(fpath, fpath_new)
|
||||
infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
|
||||
if infile.isEncrypted:
|
||||
|
|
@ -136,7 +142,8 @@ class PDFHandler(object):
|
|||
outfile.write(f)
|
||||
|
||||
def parse(
|
||||
self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
|
||||
self, flavor="lattice", suppress_stdout=False, layout_kwargs=None,
|
||||
**kwargs
|
||||
):
|
||||
"""Extracts tables by calling parser.get_tables on all single
|
||||
page PDFs.
|
||||
|
|
@ -149,7 +156,7 @@ class PDFHandler(object):
|
|||
suppress_stdout : str (default: False)
|
||||
Suppress logs and warnings.
|
||||
layout_kwargs : dict, optional (default: {})
|
||||
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
|
||||
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs. # noqa
|
||||
kwargs : dict
|
||||
See camelot.read_pdf kwargs.
|
||||
|
||||
|
|
@ -159,17 +166,21 @@ class PDFHandler(object):
|
|||
List of tables found in PDF.
|
||||
|
||||
"""
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
tables = []
|
||||
with TemporaryDirectory() as tempdir:
|
||||
for p in self.pages:
|
||||
self._save_page(self.filepath, p, tempdir)
|
||||
pages = [
|
||||
os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages
|
||||
os.path.join(tempdir, "page-{0}.pdf".format(p))
|
||||
for p in self.pages
|
||||
]
|
||||
parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
|
||||
parser = Lattice(**kwargs) \
|
||||
if flavor == "lattice" else Stream(**kwargs)
|
||||
for p in pages:
|
||||
t = parser.extract_tables(
|
||||
p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
|
||||
p, suppress_stdout=suppress_stdout,
|
||||
layout_kwargs=layout_kwargs
|
||||
)
|
||||
tables.extend(t)
|
||||
return TableList(sorted(tables))
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ def read_pdf(
|
|||
password=None,
|
||||
flavor="lattice",
|
||||
suppress_stdout=False,
|
||||
layout_kwargs={},
|
||||
layout_kwargs=None,
|
||||
**kwargs
|
||||
):
|
||||
"""Read PDF and return extracted tables.
|
||||
|
|
@ -80,16 +80,16 @@ def read_pdf(
|
|||
Size of a pixel neighborhood that is used to calculate a
|
||||
threshold value for the pixel: 3, 5, 7, and so on.
|
||||
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||
threshold_constant* : int, optional (default: -2)
|
||||
Constant subtracted from the mean or weighted mean.
|
||||
Normally, it is positive but may be zero or negative as well.
|
||||
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||
iterations* : int, optional (default: 0)
|
||||
Number of times for erosion/dilation is applied.
|
||||
|
||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
|
||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_. # noqa
|
||||
resolution* : int, optional (default: 300)
|
||||
Resolution used for PDF to PNG conversion.
|
||||
|
||||
|
|
@ -98,6 +98,7 @@ def read_pdf(
|
|||
tables : camelot.core.TableList
|
||||
|
||||
"""
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
if flavor not in ["lattice", "stream"]:
|
||||
raise NotImplementedError(
|
||||
"Unknown flavor specified." " Use either 'lattice' or 'stream'"
|
||||
|
|
|
|||
|
|
@ -12,9 +12,18 @@ class BaseParser(object):
|
|||
def _generate_layout(self, filename, layout_kwargs):
|
||||
self.filename = filename
|
||||
self.layout_kwargs = layout_kwargs
|
||||
self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)
|
||||
self.layout, self.dimensions = get_page_layout(
|
||||
filename,
|
||||
**layout_kwargs
|
||||
)
|
||||
self.images = get_text_objects(self.layout, ltype="image")
|
||||
self.horizontal_text = get_text_objects(self.layout, ltype="horizontal_text")
|
||||
self.vertical_text = get_text_objects(self.layout, ltype="vertical_text")
|
||||
self.horizontal_text = get_text_objects(
|
||||
self.layout,
|
||||
ltype="horizontal_text"
|
||||
)
|
||||
self.vertical_text = get_text_objects(
|
||||
self.layout,
|
||||
ltype="vertical_text"
|
||||
)
|
||||
self.pdf_width, self.pdf_height = self.dimensions
|
||||
self.rootname, __ = os.path.splitext(self.filename)
|
||||
|
|
|
|||
|
|
@ -2,14 +2,10 @@
|
|||
|
||||
from __future__ import division
|
||||
import os
|
||||
import sys
|
||||
import copy
|
||||
import locale
|
||||
import logging
|
||||
import warnings
|
||||
import subprocess
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseParser
|
||||
|
|
@ -80,7 +76,7 @@ class Lattice(BaseParser):
|
|||
Size of a pixel neighborhood that is used to calculate a
|
||||
threshold value for the pixel: 3, 5, 7, and so on.
|
||||
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||
threshold_constant : int, optional (default: -2)
|
||||
Constant subtracted from the mean or weighted mean.
|
||||
Normally, it is positive but may be zero or negative as well.
|
||||
|
|
@ -102,7 +98,7 @@ class Lattice(BaseParser):
|
|||
process_background=False,
|
||||
line_scale=15,
|
||||
copy_text=None,
|
||||
shift_text=["l", "t"],
|
||||
shift_text=None,
|
||||
split_text=False,
|
||||
flag_size=False,
|
||||
strip_text="",
|
||||
|
|
@ -114,6 +110,7 @@ class Lattice(BaseParser):
|
|||
resolution=300,
|
||||
**kwargs
|
||||
):
|
||||
shift_text = shift_text or ["l", "t"]
|
||||
self.table_regions = table_regions
|
||||
self.table_areas = table_areas
|
||||
self.process_background = process_background
|
||||
|
|
@ -217,8 +214,7 @@ class Lattice(BaseParser):
|
|||
)
|
||||
gs_call = gs_call.encode().split()
|
||||
null = open(os.devnull, "wb")
|
||||
with Ghostscript(*gs_call, stdout=null) as gs:
|
||||
pass
|
||||
Ghostscript(*gs_call, stdout=null)
|
||||
null.close()
|
||||
|
||||
def _generate_table_bbox(self):
|
||||
|
|
@ -247,7 +243,8 @@ class Lattice(BaseParser):
|
|||
image_height_scaler = image_height / float(self.pdf_height)
|
||||
pdf_width_scaler = self.pdf_width / float(image_width)
|
||||
pdf_height_scaler = self.pdf_height / float(image_height)
|
||||
image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
|
||||
image_scalers = (image_width_scaler,
|
||||
image_height_scaler, self.pdf_height)
|
||||
pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
|
||||
|
||||
if self.table_areas is None:
|
||||
|
|
@ -291,7 +288,11 @@ class Lattice(BaseParser):
|
|||
|
||||
self.table_bbox_unscaled = copy.deepcopy(table_bbox)
|
||||
|
||||
self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
|
||||
[
|
||||
self.table_bbox,
|
||||
self.vertical_segments,
|
||||
self.horizontal_segments
|
||||
] = scale_image(
|
||||
table_bbox, vertical_segments, horizontal_segments, pdf_scalers
|
||||
)
|
||||
|
||||
|
|
@ -315,7 +316,10 @@ class Lattice(BaseParser):
|
|||
rows.extend([tk[1], tk[3]])
|
||||
# sort horizontal and vertical segments
|
||||
cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
|
||||
rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
|
||||
rows = merge_close_lines(
|
||||
sorted(rows, reverse=True),
|
||||
line_tol=self.line_tol
|
||||
)
|
||||
# make grid using x and y coord of shortlisted rows and cols
|
||||
cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
|
||||
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
||||
|
|
@ -359,7 +363,10 @@ class Lattice(BaseParser):
|
|||
accuracy = compute_accuracy([[100, pos_errors]])
|
||||
|
||||
if self.copy_text is not None:
|
||||
table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
|
||||
table = Lattice._copy_spanning_text(
|
||||
table,
|
||||
copy_text=self.copy_text
|
||||
)
|
||||
|
||||
data = table.data
|
||||
table.df = pd.DataFrame(data)
|
||||
|
|
@ -383,20 +390,28 @@ class Lattice(BaseParser):
|
|||
|
||||
return table
|
||||
|
||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
||||
def extract_tables(
|
||||
self,
|
||||
filename,
|
||||
suppress_stdout=False,
|
||||
layout_kwargs=None
|
||||
):
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
self._generate_layout(filename, layout_kwargs)
|
||||
rootname = os.path.basename(self.rootname)
|
||||
if not suppress_stdout:
|
||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
||||
logger.info("Processing {rootname}".format(rootname=rootname))
|
||||
|
||||
if not self.horizontal_text:
|
||||
if self.images:
|
||||
warnings.warn(
|
||||
"{} is image-based, camelot only works on"
|
||||
" text-based pages.".format(os.path.basename(self.rootname))
|
||||
"{rootname} is image-based, "
|
||||
"camelot only works on text-based pages."
|
||||
.format(rootname=rootname)
|
||||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
||||
"No tables found on {rootname}".format(rootname=rootname)
|
||||
)
|
||||
return []
|
||||
|
||||
|
|
@ -408,8 +423,10 @@ class Lattice(BaseParser):
|
|||
for table_idx, tk in enumerate(
|
||||
sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
|
||||
):
|
||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
|
||||
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(
|
||||
table_idx, tk)
|
||||
table = self._generate_table(
|
||||
table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||
table._bbox = tk
|
||||
_tables.append(table)
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ import pandas as pd
|
|||
|
||||
from .base import BaseParser
|
||||
from ..core import TextEdges, Table
|
||||
from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
|
||||
from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
||||
compute_whitespace)
|
||||
|
||||
|
||||
logger = logging.getLogger("camelot")
|
||||
|
|
@ -70,6 +71,9 @@ class Stream(BaseParser):
|
|||
):
|
||||
self.table_regions = table_regions
|
||||
self.table_areas = table_areas
|
||||
self.table_bbox = None
|
||||
self.t_bbox = None
|
||||
self.textedges = []
|
||||
self.columns = columns
|
||||
self._validate_columns()
|
||||
self.split_text = split_text
|
||||
|
|
@ -95,10 +99,10 @@ class Stream(BaseParser):
|
|||
Tuple (x0, y0, x1, y1) in pdf coordinate space.
|
||||
|
||||
"""
|
||||
xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
|
||||
ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
|
||||
xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
|
||||
ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
|
||||
xmin = min(t.x0 for direction in t_bbox for t in t_bbox[direction])
|
||||
ymin = min(t.y0 for direction in t_bbox for t in t_bbox[direction])
|
||||
xmax = max(t.x1 for direction in t_bbox for t in t_bbox[direction])
|
||||
ymax = max(t.y1 for direction in t_bbox for t in t_bbox[direction])
|
||||
text_bbox = (xmin, ymin, xmax, ymax)
|
||||
return text_bbox
|
||||
|
||||
|
|
@ -119,21 +123,25 @@ class Stream(BaseParser):
|
|||
Two-dimensional list of text objects grouped into rows.
|
||||
|
||||
"""
|
||||
row_y = 0
|
||||
row_y = None
|
||||
rows = []
|
||||
temp = []
|
||||
for t in text:
|
||||
non_empty_text = [t for t in text if t.get_text().strip()]
|
||||
for t in non_empty_text:
|
||||
# is checking for upright necessary?
|
||||
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
||||
# type(obj) is LTChar]):
|
||||
if t.get_text().strip():
|
||||
if not np.isclose(row_y, t.y0, atol=row_tol):
|
||||
# if t.get_text().strip() and all([obj.upright \
|
||||
# for obj in t._objs
|
||||
# if type(obj) is LTChar]):
|
||||
if row_y is None:
|
||||
row_y = t.y0
|
||||
elif not np.isclose(row_y, t.y0, atol=row_tol):
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
temp = []
|
||||
# We update the row's bottom as we go, to be forgiving if there
|
||||
# is a gradual change across multiple columns.
|
||||
row_y = t.y0
|
||||
temp.append(t)
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
__ = rows.pop(0) # TODO: hacky
|
||||
return rows
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -170,7 +178,8 @@ class Stream(BaseParser):
|
|||
merged.append(higher)
|
||||
elif column_tol < 0:
|
||||
if higher[0] <= lower[1]:
|
||||
if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
|
||||
if np.isclose(higher[0], lower[1],
|
||||
atol=abs(column_tol)):
|
||||
merged.append(higher)
|
||||
else:
|
||||
upper_bound = max(lower[1], higher[1])
|
||||
|
|
@ -198,10 +207,13 @@ class Stream(BaseParser):
|
|||
|
||||
"""
|
||||
row_mids = [
|
||||
sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) if len(r) > 0 else 0
|
||||
sum((t.y0 + t.y1) / 2 for t in r) / len(r) if len(r) > 0 else 0
|
||||
for r in rows_grouped
|
||||
]
|
||||
rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
|
||||
rows = [
|
||||
(row_mids[i] + row_mids[i - 1]) / 2
|
||||
for i in range(1, len(row_mids))
|
||||
]
|
||||
rows.insert(0, text_y_max)
|
||||
rows.append(text_y_min)
|
||||
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
||||
|
|
@ -230,7 +242,9 @@ class Stream(BaseParser):
|
|||
text = Stream._group_rows(text, row_tol=row_tol)
|
||||
elements = [len(r) for r in text]
|
||||
new_cols = [
|
||||
(t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
|
||||
(t.x0, t.x1)
|
||||
for r in text if len(r) == max(elements)
|
||||
for t in r
|
||||
]
|
||||
cols.extend(Stream._merge_columns(sorted(new_cols)))
|
||||
return cols
|
||||
|
|
@ -262,12 +276,13 @@ class Stream(BaseParser):
|
|||
def _validate_columns(self):
|
||||
if self.table_areas is not None and self.columns is not None:
|
||||
if len(self.table_areas) != len(self.columns):
|
||||
raise ValueError("Length of table_areas and columns" " should be equal")
|
||||
raise ValueError("Length of table_areas and columns"
|
||||
" should be equal")
|
||||
|
||||
def _nurminen_table_detection(self, textlines):
|
||||
"""A general implementation of the table detection algorithm
|
||||
described by Anssi Nurminen's master's thesis.
|
||||
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3
|
||||
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 # noqa
|
||||
|
||||
Assumes that tables are situated relatively far apart
|
||||
vertically.
|
||||
|
|
@ -284,7 +299,7 @@ class Stream(BaseParser):
|
|||
# guess table areas using textlines and relevant edges
|
||||
table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
|
||||
# treat whole page as table area if no table areas found
|
||||
if not len(table_bbox):
|
||||
if not table_bbox:
|
||||
table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
|
||||
|
||||
return table_bbox
|
||||
|
|
@ -302,7 +317,8 @@ class Stream(BaseParser):
|
|||
y1 = float(y1)
|
||||
x2 = float(x2)
|
||||
y2 = float(y2)
|
||||
region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text)
|
||||
region_text = text_in_bbox(
|
||||
(x1, y2, x2, y1), self.horizontal_text)
|
||||
hor_text.extend(region_text)
|
||||
# find tables based on nurminen's detection algorithm
|
||||
table_bbox = self._nurminen_table_detection(hor_text)
|
||||
|
|
@ -328,8 +344,10 @@ class Stream(BaseParser):
|
|||
|
||||
self.t_bbox = t_bbox
|
||||
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = \
|
||||
self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(
|
||||
self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||
elements = [len(r) for r in rows_grouped]
|
||||
|
||||
|
|
@ -354,14 +372,23 @@ class Stream(BaseParser):
|
|||
# see if the list contains elements, if yes, then use
|
||||
# the mode after removing 1s
|
||||
elements = list(filter(lambda x: x != 1, elements))
|
||||
if len(elements):
|
||||
if elements:
|
||||
ncols = max(set(elements), key=elements.count)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found in table area {}".format(table_idx + 1)
|
||||
"No tables found in table area {}"
|
||||
.format(table_idx + 1)
|
||||
)
|
||||
cols = [
|
||||
(t.x0, t.x1)
|
||||
for r in rows_grouped
|
||||
if len(r) == ncols
|
||||
for t in r
|
||||
]
|
||||
cols = self._merge_columns(
|
||||
sorted(cols),
|
||||
column_tol=self.column_tol
|
||||
)
|
||||
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
|
||||
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
|
||||
inner_text = []
|
||||
for i in range(1, len(cols)):
|
||||
left = cols[i - 1][1]
|
||||
|
|
@ -431,23 +458,30 @@ class Stream(BaseParser):
|
|||
|
||||
return table
|
||||
|
||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
||||
def extract_tables(self, filename, suppress_stdout=False,
|
||||
layout_kwargs=None):
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
self._generate_layout(filename, layout_kwargs)
|
||||
if not suppress_stdout:
|
||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
||||
logger.info("Processing {}".format(
|
||||
os.path.basename(self.rootname)))
|
||||
|
||||
if not self.horizontal_text:
|
||||
if self.images:
|
||||
warnings.warn(
|
||||
"{} is image-based, camelot only works on"
|
||||
" text-based pages.".format(os.path.basename(self.rootname))
|
||||
" text-based pages.".format(
|
||||
os.path.basename(self.rootname))
|
||||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
||||
"No tables found on {}".format(
|
||||
os.path.basename(self.rootname))
|
||||
)
|
||||
return []
|
||||
|
||||
# Identify plausible areas within the doc where tables lie,
|
||||
# populate table_bbox keys with these areas.
|
||||
self._generate_table_bbox()
|
||||
|
||||
_tables = []
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class PlotMethods(object):
|
|||
raise NotImplementedError(
|
||||
"Lattice flavor does not support kind='{}'".format(kind)
|
||||
)
|
||||
elif table.flavor == "stream" and kind in ["joint", "line"]:
|
||||
elif table.flavor == "stream" and kind in ["line"]:
|
||||
raise NotImplementedError(
|
||||
"Stream flavor does not support kind='{}'".format(kind)
|
||||
)
|
||||
|
|
@ -64,7 +64,13 @@ class PlotMethods(object):
|
|||
for t in table._text:
|
||||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.add_patch(patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1]))
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]),
|
||||
t[2] - t[0],
|
||||
t[3] - t[1]
|
||||
)
|
||||
)
|
||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||
return fig
|
||||
|
|
@ -132,7 +138,8 @@ class PlotMethods(object):
|
|||
for t in table_bbox.keys():
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red"
|
||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1],
|
||||
fill=False, color="red"
|
||||
)
|
||||
)
|
||||
if not _FOR_LATTICE:
|
||||
|
|
@ -164,7 +171,10 @@ class PlotMethods(object):
|
|||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.add_patch(
|
||||
patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue")
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1],
|
||||
color="blue"
|
||||
)
|
||||
)
|
||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ from pdfminer.layout import (
|
|||
)
|
||||
|
||||
|
||||
# pylint: disable=import-error
|
||||
# PyLint will evaluate both branches, and will necessarily complain about one
|
||||
# of them.
|
||||
PY3 = sys.version_info[0] >= 3
|
||||
if PY3:
|
||||
from urllib.request import urlopen
|
||||
|
|
@ -310,7 +313,8 @@ def get_rotation(chars, horizontal_text, vertical_text):
|
|||
if hlen < vlen:
|
||||
clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
|
||||
anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
|
||||
rotation = "anticlockwise" if clockwise < anticlockwise else "clockwise"
|
||||
rotation = "anticlockwise" if clockwise < anticlockwise \
|
||||
else "clockwise"
|
||||
return rotation
|
||||
|
||||
|
||||
|
|
@ -341,12 +345,16 @@ def segments_in_bbox(bbox, v_segments, h_segments):
|
|||
v_s = [
|
||||
v
|
||||
for v in v_segments
|
||||
if v[1] > lb[1] - 2 and v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2
|
||||
if v[1] > lb[1] - 2 and
|
||||
v[3] < rt[1] + 2 and
|
||||
lb[0] - 2 <= v[0] <= rt[0] + 2
|
||||
]
|
||||
h_s = [
|
||||
h
|
||||
for h in h_segments
|
||||
if h[0] > lb[0] - 2 and h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2
|
||||
if h[0] > lb[0] - 2 and
|
||||
h[2] < rt[0] + 2 and
|
||||
lb[1] - 2 <= h[1] <= rt[1] + 2
|
||||
]
|
||||
return v_s, h_s
|
||||
|
||||
|
|
@ -464,10 +472,10 @@ def flag_font_size(textline, direction, strip_text=""):
|
|||
for t in textline
|
||||
if not isinstance(t, LTAnno)
|
||||
]
|
||||
l = [np.round(size, decimals=6) for text, size in d]
|
||||
if len(set(l)) > 1:
|
||||
text_sizes = [np.round(size, decimals=6) for text, size in d]
|
||||
if len(set(text_sizes)) > 1:
|
||||
flist = []
|
||||
min_size = min(l)
|
||||
min_size = min(text_sizes)
|
||||
for key, chars in groupby(d, itemgetter(1)):
|
||||
if key == min_size:
|
||||
fchars = [t[0] for t in chars]
|
||||
|
|
@ -511,7 +519,6 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
|||
of row/column and text is the an lttextline substring.
|
||||
|
||||
"""
|
||||
idx = 0
|
||||
cut_text = []
|
||||
bbox = textline.bbox
|
||||
try:
|
||||
|
|
@ -528,7 +535,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
|||
]
|
||||
r = r_idx[0]
|
||||
x_cuts = [
|
||||
(c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right
|
||||
(c, table.cells[r][c].x2)
|
||||
for c in x_overlap
|
||||
if table.cells[r][c].right
|
||||
]
|
||||
if not x_cuts:
|
||||
x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
|
||||
|
|
@ -561,7 +570,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
|||
]
|
||||
c = c_idx[0]
|
||||
y_cuts = [
|
||||
(r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom
|
||||
(r, table.cells[r][c].y1)
|
||||
for r in y_overlap
|
||||
if table.cells[r][c].bottom
|
||||
]
|
||||
if not y_cuts:
|
||||
y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
|
||||
|
|
@ -644,9 +655,8 @@ def get_table_index(
|
|||
"""
|
||||
r_idx, c_idx = [-1] * 2
|
||||
for r in range(len(table.rows)):
|
||||
if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and (t.y0 + t.y1) / 2.0 > table.rows[
|
||||
r
|
||||
][1]:
|
||||
if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and \
|
||||
(t.y0 + t.y1) / 2.0 > table.rows[r][1]:
|
||||
lt_col_overlap = []
|
||||
for c in table.cols:
|
||||
if c[0] <= t.x1 and c[1] >= t.x0:
|
||||
|
|
@ -681,7 +691,9 @@ def get_table_index(
|
|||
X = 1.0 if abs(t.x0 - t.x1) == 0.0 else abs(t.x0 - t.x1)
|
||||
Y = 1.0 if abs(t.y0 - t.y1) == 0.0 else abs(t.y0 - t.y1)
|
||||
charea = X * Y
|
||||
error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
|
||||
error = (
|
||||
(X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))
|
||||
) / charea
|
||||
|
||||
if split_text:
|
||||
return (
|
||||
|
|
@ -697,13 +709,16 @@ def get_table_index(
|
|||
(
|
||||
r_idx,
|
||||
c_idx,
|
||||
flag_font_size(t._objs, direction, strip_text=strip_text),
|
||||
flag_font_size(t._objs,
|
||||
direction,
|
||||
strip_text=strip_text),
|
||||
)
|
||||
],
|
||||
error,
|
||||
)
|
||||
else:
|
||||
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], error
|
||||
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], \
|
||||
error
|
||||
|
||||
|
||||
def compute_accuracy(error_weights):
|
||||
|
|
@ -751,7 +766,6 @@ def compute_whitespace(d):
|
|||
|
||||
"""
|
||||
whitespace = 0
|
||||
r_nempty_cells, c_nempty_cells = [], []
|
||||
for i in d:
|
||||
for j in i:
|
||||
if j.strip() == "":
|
||||
|
|
@ -811,6 +825,7 @@ def get_page_layout(
|
|||
width = layout.bbox[2]
|
||||
height = layout.bbox[3]
|
||||
dim = (width, height)
|
||||
break # we assume a single page pdf
|
||||
return layout, dim
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ The easiest way to install Camelot is to install it with `conda`_, which is a pa
|
|||
|
||||
$ conda install -c conda-forge camelot-py
|
||||
|
||||
.. note:: Camelot is available for Python 2.7, 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
|
||||
.. note:: Camelot is available for Python 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
|
||||
|
||||
.. _conda: https://conda.io/docs/
|
||||
.. _Anaconda: http://docs.continuum.io/anaconda/
|
||||
|
|
|
|||
|
|
@ -4,5 +4,5 @@ numpy>=1.13.3
|
|||
opencv-python>=3.4.2.17
|
||||
openpyxl>=2.5.8
|
||||
pandas>=0.23.4
|
||||
pdfminer.six>=20170720
|
||||
pdfminer.six>=20200402
|
||||
PyPDF2>=1.26.0
|
||||
|
|
|
|||
5
setup.py
|
|
@ -19,7 +19,7 @@ requires = [
|
|||
'numpy>=1.13.3',
|
||||
'openpyxl>=2.5.8',
|
||||
'pandas>=0.23.4',
|
||||
'pdfminer.six>=20170720',
|
||||
'pdfminer.six>=20200402',
|
||||
'PyPDF2>=1.26.0'
|
||||
]
|
||||
|
||||
|
|
@ -69,9 +69,8 @@ def setup_package():
|
|||
},
|
||||
classifiers=[
|
||||
# Trove classifiers
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7'
|
||||
|
|
|
|||
343
tests/data.py
|
|
@ -4,16 +4,6 @@ from __future__ import unicode_literals
|
|||
|
||||
|
||||
data_stream = [
|
||||
[
|
||||
"",
|
||||
"Table: 5 Public Health Outlay 2012-13 (Budget Estimates) (Rs. in 000)",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
|
||||
["", "", "", "", "", "Revenue &", "", ""],
|
||||
["", "Medical &", "Family", "Medical &", "Family", "", "", ""],
|
||||
|
|
@ -80,7 +70,8 @@ data_stream = [
|
|||
"5,000",
|
||||
"33,051,480",
|
||||
],
|
||||
["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560", "4,508,180"],
|
||||
["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560",
|
||||
"4,508,180"],
|
||||
[
|
||||
"Gujarat",
|
||||
"26,328,400",
|
||||
|
|
@ -171,7 +162,8 @@ data_stream = [
|
|||
"313,762",
|
||||
"67,044,159",
|
||||
],
|
||||
["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700", "0", "3,579,700"],
|
||||
["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700",
|
||||
"0", "3,579,700"],
|
||||
[
|
||||
"Meghalaya",
|
||||
"2,894,093",
|
||||
|
|
@ -236,7 +228,8 @@ data_stream = [
|
|||
|
||||
data_stream_table_rotated = [
|
||||
[
|
||||
"Table 21 Current use of contraception by background characteristics\u2014Continued",
|
||||
"Table 21 Current use of contraception by background characteristics"
|
||||
"\u2014Continued",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -330,7 +323,8 @@ data_stream_table_rotated = [
|
|||
"Total",
|
||||
"women",
|
||||
],
|
||||
["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
|
||||
["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
|
||||
"", ""],
|
||||
[
|
||||
"Scheduled caste",
|
||||
"74.8",
|
||||
|
|
@ -407,7 +401,8 @@ data_stream_table_rotated = [
|
|||
"100.0",
|
||||
"3,319",
|
||||
],
|
||||
["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
|
||||
["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "",
|
||||
"", "", "", ""],
|
||||
[
|
||||
"Lowest",
|
||||
"64.5",
|
||||
|
|
@ -830,7 +825,8 @@ data_stream_table_rotated = [
|
|||
|
||||
data_stream_two_tables_1 = [
|
||||
[
|
||||
"[In thousands (11,062.6 represents 11,062,600) For year ending December 31. Based on Uniform Crime Reporting (UCR)",
|
||||
"Program. Represents arrests reported (not charged) by 12,910 "
|
||||
"agencies with a total population of 247,526,916 as estimated",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -842,7 +838,8 @@ data_stream_two_tables_1 = [
|
|||
"",
|
||||
],
|
||||
[
|
||||
"Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated",
|
||||
"by the FBI. Some persons may be arrested more than once during a "
|
||||
"year, therefore, the data in this table, in some cases,",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -854,19 +851,8 @@ data_stream_two_tables_1 = [
|
|||
"",
|
||||
],
|
||||
[
|
||||
"by the FBI. Some persons may be arrested more than once during a year, therefore, the data in this table, in some cases,",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"could represent multiple arrests of the same person. See text, this section and source]",
|
||||
"could represent multiple arrests of the same person. See text, "
|
||||
"this section and source]",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -903,7 +889,8 @@ data_stream_two_tables_1 = [
|
|||
"and over",
|
||||
],
|
||||
[
|
||||
"Total .\n .\n . . . . . .\n . .\n . .\n . .\n . .\n . .\n . .\n . .\n . . .",
|
||||
"Total .\n .\n . . . . . .\n . .\n . .\n . .\n . .\n . "
|
||||
".\n . .\n . .\n . . .",
|
||||
"11,062 .6",
|
||||
"1,540 .0",
|
||||
"9,522 .6",
|
||||
|
|
@ -915,7 +902,8 @@ data_stream_two_tables_1 = [
|
|||
"2,330 .9",
|
||||
],
|
||||
[
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n . .",
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . "
|
||||
".\n . .",
|
||||
"467 .9",
|
||||
"69 .1",
|
||||
"398 .8",
|
||||
|
|
@ -976,7 +964,8 @@ data_stream_two_tables_1 = [
|
|||
"64.5",
|
||||
],
|
||||
[
|
||||
"Property crime . . . .\n . .\n . . .\n . . .\n .\n . . . .",
|
||||
"Property crime . . . .\n . .\n . . .\n . . .\n .\n . . "
|
||||
". .",
|
||||
"1,396 .4",
|
||||
"338 .7",
|
||||
"1,057 .7",
|
||||
|
|
@ -1060,7 +1049,8 @@ data_stream_two_tables_1 = [
|
|||
"25.5",
|
||||
],
|
||||
[
|
||||
"Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n.",
|
||||
"Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n.",
|
||||
"173.7",
|
||||
"5.1",
|
||||
"168.5",
|
||||
|
|
@ -1290,19 +1280,8 @@ data_stream_two_tables_1 = [
|
|||
],
|
||||
[
|
||||
"",
|
||||
"– Represents zero. X Not applicable. 1 Buying, receiving, possessing stolen property. 2 Except forcible rape and prostitution.",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"",
|
||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
|
||||
"– Represents zero. X Not applicable. 1 Buying, receiving, "
|
||||
"possessing stolen property. 2 Except forcible rape and prostitution.",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -1315,17 +1294,10 @@ data_stream_two_tables_1 = [
|
|||
]
|
||||
|
||||
data_stream_two_tables_2 = [
|
||||
[
|
||||
"",
|
||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
|
||||
[
|
||||
"[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies",
|
||||
"[Based on Uniform Crime Reporting (UCR) Program. Represents "
|
||||
"arrests reported (not charged) by 12,371 agencies",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -1333,7 +1305,8 @@ data_stream_two_tables_2 = [
|
|||
"",
|
||||
],
|
||||
[
|
||||
"with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]",
|
||||
"with a total population of 239,839,971 as estimated by the FBI. "
|
||||
"See headnote, Table 324]",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -1344,7 +1317,8 @@ data_stream_two_tables_2 = [
|
|||
["Offense charged", "", "", "", "Indian/Alaskan", "Asian Pacific"],
|
||||
["", "Total", "White", "Black", "Native", "Islander"],
|
||||
[
|
||||
"Total .\n .\n .\n .\n . .\n . . .\n . . .\n .\n . . .\n .\n . . .\n . .\n .\n . . .\n .\n .\n .\n . .\n . .\n . .",
|
||||
"Total .\n .\n .\n .\n . .\n . . .\n . . .\n .\n . . .\n "
|
||||
".\n . . .\n . .\n .\n . . .\n .\n .\n .\n . .\n . .\n . .",
|
||||
"10,690,561",
|
||||
"7,389,208",
|
||||
"3,027,153",
|
||||
|
|
@ -1352,7 +1326,8 @@ data_stream_two_tables_2 = [
|
|||
"123,656",
|
||||
],
|
||||
[
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .",
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . "
|
||||
".\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .",
|
||||
"456,965",
|
||||
"268,346",
|
||||
"177,766",
|
||||
|
|
@ -1368,7 +1343,8 @@ data_stream_two_tables_2 = [
|
|||
"97",
|
||||
],
|
||||
[
|
||||
"Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
||||
"Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"16,362",
|
||||
"10,644",
|
||||
"5,319",
|
||||
|
|
@ -1376,7 +1352,8 @@ data_stream_two_tables_2 = [
|
|||
"230",
|
||||
],
|
||||
[
|
||||
"Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"100,496",
|
||||
"43,039",
|
||||
"55,742",
|
||||
|
|
@ -1384,7 +1361,8 @@ data_stream_two_tables_2 = [
|
|||
"989",
|
||||
],
|
||||
[
|
||||
"Aggravated assault . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. .\n.\n.\n.",
|
||||
"Aggravated assault . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.",
|
||||
"330,368",
|
||||
"209,922",
|
||||
"111,904",
|
||||
|
|
@ -1392,7 +1370,8 @@ data_stream_two_tables_2 = [
|
|||
"3,929",
|
||||
],
|
||||
[
|
||||
"Property crime . . . . .\n . . . . .\n .\n . . .\n .\n . .\n .\n .\n .\n . .\n .\n . .\n .\n .",
|
||||
"Property crime . . . . .\n . . . . .\n .\n . . .\n .\n "
|
||||
". .\n .\n .\n .\n . .\n .\n . .\n .\n .",
|
||||
"1,364,409",
|
||||
"922,139",
|
||||
"406,382",
|
||||
|
|
@ -1400,7 +1379,8 @@ data_stream_two_tables_2 = [
|
|||
"18,289",
|
||||
],
|
||||
[
|
||||
"Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n. . . .",
|
||||
"Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n. . . .",
|
||||
"234,551",
|
||||
"155,994",
|
||||
"74,419",
|
||||
|
|
@ -1408,7 +1388,8 @@ data_stream_two_tables_2 = [
|
|||
"2,117",
|
||||
],
|
||||
[
|
||||
"Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
||||
"Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"1,056,473",
|
||||
"719,983",
|
||||
"306,625",
|
||||
|
|
@ -1416,7 +1397,8 @@ data_stream_two_tables_2 = [
|
|||
"15,219",
|
||||
],
|
||||
[
|
||||
"Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
|
||||
"Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. "
|
||||
".\n.\n. .\n.",
|
||||
"63,919",
|
||||
"39,077",
|
||||
"23,184",
|
||||
|
|
@ -1424,7 +1406,8 @@ data_stream_two_tables_2 = [
|
|||
"841",
|
||||
],
|
||||
[
|
||||
"Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . . . .",
|
||||
"Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n.\n. . . . . .",
|
||||
"9,466",
|
||||
"7,085",
|
||||
"2,154",
|
||||
|
|
@ -1432,7 +1415,8 @@ data_stream_two_tables_2 = [
|
|||
"112",
|
||||
],
|
||||
[
|
||||
"Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
|
||||
"Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. "
|
||||
".\n.\n.\n.\n. .\n.\n. .\n.",
|
||||
"1,032,502",
|
||||
"672,865",
|
||||
"332,435",
|
||||
|
|
@ -1440,7 +1424,8 @@ data_stream_two_tables_2 = [
|
|||
"12,075",
|
||||
],
|
||||
[
|
||||
"Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. .\n. .\n.",
|
||||
"Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. "
|
||||
".\n. .\n.",
|
||||
"67,054",
|
||||
"44,730",
|
||||
"21,251",
|
||||
|
|
@ -1448,7 +1433,8 @@ data_stream_two_tables_2 = [
|
|||
"728",
|
||||
],
|
||||
[
|
||||
"Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. . . . . . .",
|
||||
"Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
|
||||
".\n.\n.\n. . . . . . .",
|
||||
"161,233",
|
||||
"108,032",
|
||||
"50,367",
|
||||
|
|
@ -1456,7 +1442,8 @@ data_stream_two_tables_2 = [
|
|||
"1,519",
|
||||
],
|
||||
[
|
||||
"Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n. .\n.\n.\n.\n.",
|
||||
"Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
|
||||
".\n.\n. .\n.\n.\n.\n.",
|
||||
"13,960",
|
||||
"9,208",
|
||||
"4,429",
|
||||
|
|
@ -1472,7 +1459,8 @@ data_stream_two_tables_2 = [
|
|||
"742",
|
||||
],
|
||||
[
|
||||
"Vandalism . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
|
||||
"Vandalism . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
|
||||
".\n. .\n.\n.\n.\n. .",
|
||||
"212,173",
|
||||
"157,723",
|
||||
"48,746",
|
||||
|
|
@ -1496,7 +1484,8 @@ data_stream_two_tables_2 = [
|
|||
"1,413",
|
||||
],
|
||||
[
|
||||
"Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
||||
"Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"60,175",
|
||||
"44,240",
|
||||
"14,347",
|
||||
|
|
@ -1504,7 +1493,8 @@ data_stream_two_tables_2 = [
|
|||
"873",
|
||||
],
|
||||
[
|
||||
"Drug abuse violations . . . . . . . .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"Drug abuse violations . . . . . . . .\n. . .\n.\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.",
|
||||
"1,301,629",
|
||||
"845,974",
|
||||
"437,623",
|
||||
|
|
@ -1512,7 +1502,8 @@ data_stream_two_tables_2 = [
|
|||
"9,444",
|
||||
],
|
||||
[
|
||||
"Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . .\n.\n.\n.\n.\n. .\n. .",
|
||||
"Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . "
|
||||
".\n.\n.\n.\n.\n. .\n. .",
|
||||
"8,046",
|
||||
"2,290",
|
||||
"5,518",
|
||||
|
|
@ -1528,7 +1519,8 @@ data_stream_two_tables_2 = [
|
|||
"624",
|
||||
],
|
||||
[
|
||||
"Driving under the influence . . . . . . .\n. .\n.\n. .\n.\n.\n.\n.\n. .",
|
||||
"Driving under the influence . . . . . . .\n. .\n.\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"1,105,401",
|
||||
"954,444",
|
||||
"121,594",
|
||||
|
|
@ -1536,7 +1528,8 @@ data_stream_two_tables_2 = [
|
|||
"14,460",
|
||||
],
|
||||
[
|
||||
"Liquor laws . . . . . . . .\n. .\n. .\n. .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"Liquor laws . . . . . . . .\n. .\n. .\n. .\n. .\n. . "
|
||||
".\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"444,087",
|
||||
"373,189",
|
||||
"50,431",
|
||||
|
|
@ -1544,7 +1537,8 @@ data_stream_two_tables_2 = [
|
|||
"5,591",
|
||||
],
|
||||
[
|
||||
"Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . .\n.\n.\n.\n.\n.\n.",
|
||||
"Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . "
|
||||
".\n.\n.\n.\n.\n.\n.",
|
||||
"469,958",
|
||||
"387,542",
|
||||
"71,020",
|
||||
|
|
@ -1552,7 +1546,8 @@ data_stream_two_tables_2 = [
|
|||
"2,844",
|
||||
],
|
||||
[
|
||||
"Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.",
|
||||
"515,689",
|
||||
"326,563",
|
||||
"176,169",
|
||||
|
|
@ -1560,7 +1555,8 @@ data_stream_two_tables_2 = [
|
|||
"4,174",
|
||||
],
|
||||
[
|
||||
"Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"26,347",
|
||||
"14,581",
|
||||
"11,031",
|
||||
|
|
@ -1568,7 +1564,8 @@ data_stream_two_tables_2 = [
|
|||
"192",
|
||||
],
|
||||
[
|
||||
"All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. .\n.",
|
||||
"All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. "
|
||||
".\n.",
|
||||
"2,929,217",
|
||||
"1,937,221",
|
||||
"911,670",
|
||||
|
|
@ -1576,7 +1573,8 @@ data_stream_two_tables_2 = [
|
|||
"36,446",
|
||||
],
|
||||
[
|
||||
"Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n.\n.\n. .\n. . . .",
|
||||
"Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .\n. . . .",
|
||||
"1,513",
|
||||
"677",
|
||||
"828",
|
||||
|
|
@ -1592,7 +1590,8 @@ data_stream_two_tables_2 = [
|
|||
"1,060",
|
||||
],
|
||||
[
|
||||
"Runaways . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
|
||||
"Runaways . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
|
||||
".\n. .\n.\n.\n.\n. .",
|
||||
"73,616",
|
||||
"48,343",
|
||||
"19,670",
|
||||
|
|
@ -1600,14 +1599,6 @@ data_stream_two_tables_2 = [
|
|||
"3,950",
|
||||
],
|
||||
["1 Except forcible rape and prostitution.", "", "", "", "", ""],
|
||||
[
|
||||
"",
|
||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, “Crime in the United States, Arrests,” September 2010,",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
]
|
||||
|
||||
data_stream_table_areas = [
|
||||
|
|
@ -1634,10 +1625,12 @@ data_stream_columns = [
|
|||
"Nombre Localidad",
|
||||
],
|
||||
["Entidad", "", "Municipio", "", "Localidad", ""],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0094", "Granja Adelita"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0094",
|
||||
"Granja Adelita"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0096", "Agua Azul"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0100", "Rancho Alegre"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0102", "Los Arbolitos [Rancho]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0102",
|
||||
"Los Arbolitos [Rancho]"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1655,7 +1648,8 @@ data_stream_columns = [
|
|||
"0112",
|
||||
"Baj\xedo los V\xe1zquez",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0113", "Baj\xedo de Montoro"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0113",
|
||||
"Baj\xedo de Montoro"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1697,8 +1691,10 @@ data_stream_columns = [
|
|||
"Ca\xf1ada Honda [Estaci\xf3n]",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0127", "Los Ca\xf1os"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0128", "El Cari\xf1\xe1n"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0129", "El Carmen [Granja]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0128",
|
||||
"El Cari\xf1\xe1n"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0129",
|
||||
"El Carmen [Granja]"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1733,9 +1729,11 @@ data_stream_columns = [
|
|||
"El Colorado (El Soyatal)",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0146", "El Conejal"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0157", "Cotorina de Abajo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0157",
|
||||
"Cotorina de Abajo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0162", "Coyotes"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0166", "La Huerta (La Cruz)"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0166",
|
||||
"La Huerta (La Cruz)"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1752,17 +1750,20 @@ data_stream_columns = [
|
|||
"0171",
|
||||
"Los Cuervos (Los Ojos de Agua)",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0172", "San Jos\xe9 [Granja]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0172",
|
||||
"San Jos\xe9 [Granja]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0176", "La Chiripa"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0182", "Dolores"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0183", "Los Dolores"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0190", "El Duraznillo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0191", "Los Dur\xf3n"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0197", "La Escondida"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0201", "Brande Vin [Bodegas]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0201",
|
||||
"Brande Vin [Bodegas]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0207", "Valle Redondo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0209", "La Fortuna"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0212", "Lomas del Gachup\xedn"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0212",
|
||||
"Lomas del Gachup\xedn"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1772,22 +1773,12 @@ data_stream_columns = [
|
|||
"El Carmen (Gallinas G\xfceras) [Rancho]",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0216", "La Gloria"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0226", "Hacienda Nueva"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0226",
|
||||
"Hacienda Nueva"],
|
||||
]
|
||||
|
||||
data_stream_split_text = [
|
||||
[
|
||||
"FEB",
|
||||
"RUAR",
|
||||
"Y 2014 M27 (BUS)",
|
||||
"",
|
||||
"ALPHABETIC LISTING BY T",
|
||||
"YPE",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"ABLPDM27",
|
||||
],
|
||||
["FEB", "RUAR", "Y 2014 M27 (BUS)", "", "", "", "", "", "", ""],
|
||||
["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
|
||||
["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
|
||||
["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
|
||||
|
|
@ -1977,7 +1968,18 @@ data_stream_split_text = [
|
|||
"(872) 825-8309",
|
||||
"2014/04/11",
|
||||
],
|
||||
["", "", "A SENSU JAPANESE", "", "7123 SOUTH 92ND EAST", "", "", "", "", ""],
|
||||
[
|
||||
"",
|
||||
"",
|
||||
"A SENSU JAPANESE",
|
||||
"",
|
||||
"7123 SOUTH 92ND EAST",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"625422",
|
||||
"BAW",
|
||||
|
|
@ -2029,7 +2031,18 @@ data_stream_split_text = [
|
|||
"(580) 928-2700",
|
||||
"2014/09/08",
|
||||
],
|
||||
["", "", "ANDOLINI'S PIZZERIA &", "", "12140 EAST 96TH STREET", "", "", "", "", ""],
|
||||
[
|
||||
"",
|
||||
"",
|
||||
"ANDOLINI'S PIZZERIA &",
|
||||
"",
|
||||
"12140 EAST 96TH STREET",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"428377",
|
||||
"BAW",
|
||||
|
|
@ -2148,7 +2161,8 @@ data_stream_flag_size = [
|
|||
"from SBI",
|
||||
"from",
|
||||
],
|
||||
["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other", "NCDC"],
|
||||
["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other",
|
||||
"NCDC"],
|
||||
["", "", "", "", "", "& FIs", "", "", "", "Banks", ""],
|
||||
["1", "2=", "3", "4", "5", "6=", "7", "8", "9", "10", "11"],
|
||||
["", "(3 to 6)+14", "", "", "", "(7 to13)", "", "", "", "", ""],
|
||||
|
|
@ -2165,7 +2179,8 @@ data_stream_flag_size = [
|
|||
"-",
|
||||
"0.25",
|
||||
],
|
||||
["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-", "-", "-"],
|
||||
["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-",
|
||||
"-", "-"],
|
||||
[
|
||||
"Assam",
|
||||
"12.69",
|
||||
|
|
@ -2194,8 +2209,10 @@ data_stream_flag_size = [
|
|||
],
|
||||
["Chhattisgarh", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||
["Goa", "1.4", "1.02", "-", "-", "0.38", "0.31", "-", "0.07", "-", "-"],
|
||||
["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11", "-", "0.44"],
|
||||
["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64", "-", "0.49"],
|
||||
["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11",
|
||||
"-", "0.44"],
|
||||
["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64",
|
||||
"-", "0.49"],
|
||||
[
|
||||
"Himachal Pradesh",
|
||||
"8.02",
|
||||
|
|
@ -2223,7 +2240,8 @@ data_stream_flag_size = [
|
|||
"-",
|
||||
],
|
||||
["Jharkhand", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||
["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89", "-", "0.69"],
|
||||
["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89",
|
||||
"-", "0.69"],
|
||||
[
|
||||
"Kerala",
|
||||
"29.03",
|
||||
|
|
@ -2263,11 +2281,16 @@ data_stream_flag_size = [
|
|||
"0.02",
|
||||
"2.89",
|
||||
],
|
||||
["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-", "0.09"],
|
||||
["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05", "-", "0.03"],
|
||||
["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-", "-", "0.03"],
|
||||
["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-", "0.04"],
|
||||
["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66", "-", "0.2"],
|
||||
["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-",
|
||||
"0.09"],
|
||||
["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05",
|
||||
"-", "0.03"],
|
||||
["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-",
|
||||
"-", "0.03"],
|
||||
["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-",
|
||||
"0.04"],
|
||||
["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66",
|
||||
"-", "0.2"],
|
||||
[
|
||||
"Punjab",
|
||||
"19.18",
|
||||
|
|
@ -2295,8 +2318,10 @@ data_stream_flag_size = [
|
|||
"0.81",
|
||||
],
|
||||
["Sikkim", "0.16", "-", "-", "-", "0.16", "0.03", "-", "-", "-", "0.01"],
|
||||
["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-", "0.68"],
|
||||
["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-", "0.02"],
|
||||
["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-",
|
||||
"0.68"],
|
||||
["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-",
|
||||
"0.02"],
|
||||
["Uttaranchal", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||
[
|
||||
"Uttar Pradesh",
|
||||
|
|
@ -2393,11 +2418,13 @@ data_stream_edge_tol = [
|
|||
["Costs", "(0.21)"],
|
||||
["T\notal investment result per unit", "3.78"],
|
||||
[
|
||||
"1 The results cover the period from inception of the Fund at 8 April 2016 through 31 December 2016.",
|
||||
"1 The results cover the period from inception of the Fund at "
|
||||
"8 April 2016 through 31 December 2016.",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"2 The result per unit is calculated using the total number of outstanding unit as per the end of the",
|
||||
"2 The result per unit is calculated using the total number of "
|
||||
"outstanding unit as per the end of the",
|
||||
"",
|
||||
],
|
||||
["period.", ""],
|
||||
|
|
@ -2454,7 +2481,8 @@ data_lattice_table_rotated = [
|
|||
"Men",
|
||||
"Women",
|
||||
],
|
||||
["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645", "2391"],
|
||||
["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645",
|
||||
"2391"],
|
||||
[
|
||||
"Tamil Nadu",
|
||||
"7387",
|
||||
|
|
@ -2503,11 +2531,16 @@ data_lattice_table_rotated = [
|
|||
"1417",
|
||||
"1599",
|
||||
],
|
||||
["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122", "2503"],
|
||||
["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579", "1709"],
|
||||
["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093", "1628"],
|
||||
["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413", "2027"],
|
||||
["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185", "1366"],
|
||||
["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122",
|
||||
"2503"],
|
||||
["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579",
|
||||
"1709"],
|
||||
["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093",
|
||||
"1628"],
|
||||
["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413",
|
||||
"2027"],
|
||||
["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185",
|
||||
"1366"],
|
||||
[
|
||||
"Pooled",
|
||||
"38742",
|
||||
|
|
@ -2573,7 +2606,8 @@ data_lattice_two_tables_2 = [
|
|||
]
|
||||
|
||||
data_lattice_table_regions = [
|
||||
["Età dell’Assicurato \nall’epoca del decesso", "Misura % di \nmaggiorazione"],
|
||||
["Età dell’Assicurato \nall’epoca del decesso",
|
||||
"Misura % di \nmaggiorazione"],
|
||||
["18-75", "1,00%"],
|
||||
["76-80", "0,50%"],
|
||||
["81 in poi", "0,10%"],
|
||||
|
|
@ -2596,10 +2630,12 @@ data_lattice_table_areas = [
|
|||
["Kerala", "2400", "7.2", "0.5", "25.3", "20.1", "41.5", "5.5", ""],
|
||||
["Tamil Nadu", "2400", "21.4", "2.3", "8.8", "35.5", "25.8", "6.2", ""],
|
||||
["Karnataka", "2399", "37.4", "2.8", "12.5", "18.3", "23.1", "5.8", ""],
|
||||
["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9", ""],
|
||||
["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9",
|
||||
""],
|
||||
["Maharashtra", "2400", "22.0", "0.9", "17.3", "20.3", "32.6", "7.0", ""],
|
||||
["Gujarat", "2390", "28.6", "0.1", "14.4", "23.1", "26.9", "6.8", ""],
|
||||
["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6", ""],
|
||||
["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6",
|
||||
""],
|
||||
["Orissa", "2405", "33.2", "1.0", "10.4", "25.7", "21.2", "8.5", ""],
|
||||
["West Bengal", "2293", "41.7", "4.4", "13.2", "17.1", "21.2", "2.4", ""],
|
||||
["Uttar Pradesh", "2400", "35.3", "2.1", "4.5", "23.3", "27.1", "7.6", ""],
|
||||
|
|
@ -2650,7 +2686,8 @@ data_lattice_process_background = [
|
|||
"3,658",
|
||||
"3,183",
|
||||
],
|
||||
["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173", "855"],
|
||||
["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173",
|
||||
"855"],
|
||||
["Total", "", "47", "92", "11.81", "22,455", "19,584", "10,644"],
|
||||
]
|
||||
|
||||
|
|
@ -2689,7 +2726,8 @@ data_lattice_copy_text = [
|
|||
["COHS", "San Mateo", "Health Plan of San Mateo", "113,202"],
|
||||
["COHS", "Ventura", "Gold Coast Health Plan", "202,217"],
|
||||
["COHS", "Total COHS Enrollment", "", "2,176,064"],
|
||||
["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "", "10,132,022"],
|
||||
["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "",
|
||||
"10,132,022"],
|
||||
["PCCM", "Los Angeles", "AIDS Healthcare Foundation", "828"],
|
||||
["PCCM", "San Francisco", "Family Mosaic", "25"],
|
||||
["PCCM", "Total PHP Enrollment", "", "853"],
|
||||
|
|
@ -2721,7 +2759,8 @@ data_lattice_shift_text_left_top = [
|
|||
],
|
||||
["Blood Pressure #", "2400", "Men (≥ 18yrs)", "10%", "95%", "20%", "1728"],
|
||||
["", "", "Women (≥ 18 yrs)", "", "", "", "1728"],
|
||||
["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
||||
["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%",
|
||||
"1825"],
|
||||
["", "", "Women (≥ 18 yrs)", "", "", "", "1825"],
|
||||
[
|
||||
"Knowledge &\nPractices on HTN &\nDM",
|
||||
|
|
@ -2746,7 +2785,8 @@ data_lattice_shift_text_disable = [
|
|||
"Sample size\nper State",
|
||||
],
|
||||
["Anthropometry", "", "", "", "", "", ""],
|
||||
["Clinical Examination", "2400", "", "All the available individuals", "", "", ""],
|
||||
["Clinical Examination", "2400", "", "All the available individuals",
|
||||
"", "", ""],
|
||||
["History of morbidity", "", "", "", "", "", ""],
|
||||
[
|
||||
"Diet survey",
|
||||
|
|
@ -2758,9 +2798,11 @@ data_lattice_shift_text_disable = [
|
|||
"",
|
||||
],
|
||||
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
|
||||
"1728"],
|
||||
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
|
||||
"1825"],
|
||||
[
|
||||
"Knowledge &\nPractices on HTN &",
|
||||
"2400",
|
||||
|
|
@ -2785,7 +2827,8 @@ data_lattice_shift_text_right_bottom = [
|
|||
],
|
||||
["Anthropometry", "", "", "", "", "", ""],
|
||||
["Clinical Examination", "", "", "", "", "", ""],
|
||||
["History of morbidity", "2400", "", "", "", "", "All the available individuals"],
|
||||
["History of morbidity", "2400", "", "", "", "",
|
||||
"All the available individuals"],
|
||||
[
|
||||
"Diet survey",
|
||||
"1200",
|
||||
|
|
@ -2796,9 +2839,11 @@ data_lattice_shift_text_right_bottom = [
|
|||
"All the individuals partaking meals in the HH",
|
||||
],
|
||||
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
|
||||
"1728"],
|
||||
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
|
||||
"1825"],
|
||||
["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
|
||||
[
|
||||
"Knowledge &\nPractices on HTN &\nDM",
|
||||
|
|
@ -2820,7 +2865,7 @@ data_arabic = [
|
|||
]
|
||||
|
||||
data_stream_layout_kwargs = [
|
||||
["V i n s a u Ve r r e", ""],
|
||||
["V i n s a u V e r r e", ""],
|
||||
["Les Blancs", "12.5CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"],
|
||||
|
|
|
|||
|
Before Width: | Height: | Size: 8.2 KiB After Width: | Height: | Size: 8.2 KiB |
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 6.7 KiB After Width: | Height: | Size: 6.7 KiB |
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 9.7 KiB |
|
Before Width: | Height: | Size: 8.8 KiB After Width: | Height: | Size: 8.9 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 19 KiB |
|
|
@ -19,10 +19,16 @@ def test_help_output():
|
|||
output = result.output
|
||||
|
||||
assert prog_name == "camelot"
|
||||
assert result.output.startswith("Usage: %(prog_name)s [OPTIONS] COMMAND" % locals())
|
||||
assert result.output.startswith(
|
||||
"Usage: %(prog_name)s [OPTIONS] COMMAND" %
|
||||
locals()
|
||||
)
|
||||
assert all(
|
||||
v in result.output
|
||||
for v in ["Options:", "--version", "--help", "Commands:", "lattice", "stream"]
|
||||
for v in [
|
||||
"Options:", "--version", "--help", "Commands:", "lattice",
|
||||
"stream"
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -120,21 +126,24 @@ def test_cli_output_format():
|
|||
# json
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "json", "--output", outfile.format("json"), "stream", infile],
|
||||
["--format", "json", "--output", outfile.format("json"), "stream",
|
||||
infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# excel
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile],
|
||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream",
|
||||
infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# html
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "html", "--output", outfile.format("html"), "stream", infile],
|
||||
["--format", "html", "--output", outfile.format("html"), "stream",
|
||||
infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
|
@ -166,6 +175,10 @@ def test_cli_quiet():
|
|||
assert "No tables found on page-1" in result.output
|
||||
|
||||
result = runner.invoke(
|
||||
cli, ["--quiet", "--format", "csv", "--output", outfile, "stream", infile]
|
||||
cli,
|
||||
[
|
||||
"--quiet", "--format", "csv", "--output", outfile, "stream",
|
||||
infile
|
||||
]
|
||||
)
|
||||
assert "No tables found on page-1" not in result.output
|
||||
|
|
|
|||
|
|
@ -11,12 +11,15 @@ from camelot.__version__ import generate_version
|
|||
|
||||
from .data import *
|
||||
|
||||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
|
||||
|
||||
def test_parsing_report():
|
||||
parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
|
||||
parsing_report = {
|
||||
"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1
|
||||
}
|
||||
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename)
|
||||
|
|
@ -28,9 +31,17 @@ def test_password():
|
|||
|
||||
filename = os.path.join(testdir, "health_protected.pdf")
|
||||
tables = camelot.read_pdf(filename, password="ownerpass", flavor="stream")
|
||||
<<<<<<< HEAD
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
|
||||
=======
|
||||
assert len(tables) == 1
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
|
||||
assert len(tables) == 1
|
||||
>>>>>>> Fix unit tests, lint, drop Python 2 support
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
|
|
@ -229,9 +240,9 @@ def test_repr():
|
|||
tables = camelot.read_pdf(filename)
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
assert \
|
||||
repr(tables[0].cells[0][0]) == \
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
|
||||
|
||||
def test_pages():
|
||||
|
|
@ -239,22 +250,23 @@ def test_pages():
|
|||
tables = camelot.read_pdf(url)
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
assert \
|
||||
repr(tables[0].cells[0][0]) == \
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
|
||||
tables = camelot.read_pdf(url, pages="1-end")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
assert \
|
||||
repr(tables[0].cells[0][0]) == \
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
|
||||
tables = camelot.read_pdf(url, pages="all")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
repr(tables[0].cells[0][0]) ==
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -264,7 +276,8 @@ def test_url():
|
|||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
repr(tables[0].cells[0][0]) ==
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -284,7 +297,12 @@ def test_table_order():
|
|||
return t
|
||||
|
||||
table_list = TableList(
|
||||
[_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
|
||||
[
|
||||
_make_table(2, 1),
|
||||
_make_table(1, 1),
|
||||
_make_table(3, 4),
|
||||
_make_table(1, 2)
|
||||
]
|
||||
)
|
||||
|
||||
assert [(t.page, t.order) for t in sorted(table_list)] == [
|
||||
|
|
|
|||
|
|
@ -4,13 +4,30 @@ import os
|
|||
|
||||
import pytest
|
||||
|
||||
import matplotlib
|
||||
|
||||
import camelot
|
||||
|
||||
# The version of Matplotlib has an impact on some of the tests. Unfortunately,
|
||||
# we can't enforce usage of a recent version of MatplotLib without dropping
|
||||
# support for Python 3.6.
|
||||
# To check the version of matplotlib installed:
|
||||
# pip freeze | grep matplotlib
|
||||
# To force upgrade:
|
||||
# pip install --upgrade --force-reinstall matplotlib
|
||||
# To force usage of a Python 3.6 compatible version:
|
||||
# pip install "matplotlib==2.2.5"
|
||||
# This condition can be removed in favor of a version requirement bump for
|
||||
# matplotlib once support for Python 3.5 is dropped.
|
||||
|
||||
LEGACY_MATPLOTLIB = matplotlib.__version__ < "3.2.1"
|
||||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_text_plot():
|
||||
|
|
@ -26,6 +43,15 @@ def test_grid_plot():
|
|||
tables = camelot.read_pdf(filename)
|
||||
return camelot.plot(tables[0], kind='grid')
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_stream_grid_plot():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
return camelot.plot(tables[0], kind='grid')
|
||||
|
||||
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
|
|
@ -35,6 +61,8 @@ def test_lattice_contour_plot():
|
|||
return camelot.plot(tables[0], kind='contour')
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_stream_contour_plot():
|
||||
|
|
@ -51,6 +79,8 @@ def test_line_plot():
|
|||
return camelot.plot(tables[0], kind='line')
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_joint_plot():
|
||||
|
|
@ -59,6 +89,8 @@ def test_joint_plot():
|
|||
return camelot.plot(tables[0], kind='joint')
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_textedge_plot():
|
||||
|
|
|
|||