Fix unit tests, lint, drop Python 2 support
Drop EOL Python 2 support. Resolve unit test discrepancies. Update unit tests to pass in Travis across all supported Py. Linting.pull/153/head
|
|
@ -0,0 +1,3 @@
|
||||||
|
[bandit]
|
||||||
|
# Ignore concerns about asserts, necessary for unit test code
|
||||||
|
skips: B101,B102
|
||||||
|
|
@ -4,6 +4,7 @@ __pycache__/
|
||||||
|
|
||||||
build/
|
build/
|
||||||
dist/
|
dist/
|
||||||
|
prof/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
.eggs/
|
.eggs/
|
||||||
.coverage
|
.coverage
|
||||||
|
|
@ -17,3 +18,5 @@ htmlcov/
|
||||||
|
|
||||||
# vscode
|
# vscode
|
||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
|
.DS_Store
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
sudo: true
|
|
||||||
language: python
|
language: python
|
||||||
cache: pip
|
cache: pip
|
||||||
addons:
|
addons:
|
||||||
|
|
@ -8,10 +7,6 @@ install:
|
||||||
- make install
|
- make install
|
||||||
jobs:
|
jobs:
|
||||||
include:
|
include:
|
||||||
- stage: test
|
|
||||||
script:
|
|
||||||
- make test
|
|
||||||
python: '2.7'
|
|
||||||
- stage: test
|
- stage: test
|
||||||
script:
|
script:
|
||||||
- make test
|
- make test
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ class TextEdge(object):
|
||||||
intersections: int
|
intersections: int
|
||||||
Number of intersections with horizontal text rows.
|
Number of intersections with horizontal text rows.
|
||||||
is_valid: bool
|
is_valid: bool
|
||||||
A text edge is valid if it intersections with at least
|
A text edge is valid if it intersects with at least
|
||||||
TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.
|
TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
@ -65,7 +65,8 @@ class TextEdge(object):
|
||||||
the is_valid attribute.
|
the is_valid attribute.
|
||||||
"""
|
"""
|
||||||
if np.isclose(self.y0, y0, atol=edge_tol):
|
if np.isclose(self.y0, y0, atol=edge_tol):
|
||||||
self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
|
self.x = (self.intersections * self.x + x) / \
|
||||||
|
float(self.intersections + 1)
|
||||||
self.y0 = y0
|
self.y0 = y0
|
||||||
self.intersections += 1
|
self.intersections += 1
|
||||||
# a textedge is valid only if it extends uninterrupted
|
# a textedge is valid only if it extends uninterrupted
|
||||||
|
|
@ -141,13 +142,16 @@ class TextEdges(object):
|
||||||
"""
|
"""
|
||||||
intersections_sum = {
|
intersections_sum = {
|
||||||
"left": sum(
|
"left": sum(
|
||||||
te.intersections for te in self._textedges["left"] if te.is_valid
|
te.intersections for te in self._textedges["left"]
|
||||||
|
if te.is_valid
|
||||||
),
|
),
|
||||||
"right": sum(
|
"right": sum(
|
||||||
te.intersections for te in self._textedges["right"] if te.is_valid
|
te.intersections for te in self._textedges["right"]
|
||||||
|
if te.is_valid
|
||||||
),
|
),
|
||||||
"middle": sum(
|
"middle": sum(
|
||||||
te.intersections for te in self._textedges["middle"] if te.is_valid
|
te.intersections for te in self._textedges["middle"]
|
||||||
|
if te.is_valid
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -292,7 +296,10 @@ class Cell(object):
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<Cell x1={} y1={} x2={} y2={}>".format(
|
return "<Cell x1={} y1={} x2={} y2={}>".format(
|
||||||
round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
|
round(self.x1, 2),
|
||||||
|
round(self.y1, 2),
|
||||||
|
round(self.x2, 2),
|
||||||
|
round(self.y2, 2)
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -342,7 +349,9 @@ class Table(object):
|
||||||
def __init__(self, cols, rows):
|
def __init__(self, cols, rows):
|
||||||
self.cols = cols
|
self.cols = cols
|
||||||
self.rows = rows
|
self.rows = rows
|
||||||
self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
|
self.cells = [
|
||||||
|
[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows
|
||||||
|
]
|
||||||
self.df = None
|
self.df = None
|
||||||
self.shape = (0, 0)
|
self.shape = (0, 0)
|
||||||
self.accuracy = 0
|
self.accuracy = 0
|
||||||
|
|
@ -579,7 +588,8 @@ class Table(object):
|
||||||
Output filepath.
|
Output filepath.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
|
kw = {"encoding": "utf-8", "index": False, "header": False,
|
||||||
|
"quoting": 1}
|
||||||
kw.update(kwargs)
|
kw.update(kwargs)
|
||||||
self.df.to_csv(path, **kw)
|
self.df.to_csv(path, **kw)
|
||||||
|
|
||||||
|
|
@ -616,6 +626,7 @@ class Table(object):
|
||||||
"encoding": "utf-8",
|
"encoding": "utf-8",
|
||||||
}
|
}
|
||||||
kw.update(kwargs)
|
kw.update(kwargs)
|
||||||
|
# pylint: disable=abstract-class-instantiated
|
||||||
writer = pd.ExcelWriter(path)
|
writer = pd.ExcelWriter(path)
|
||||||
self.df.to_excel(writer, **kw)
|
self.df.to_excel(writer, **kw)
|
||||||
writer.save()
|
writer.save()
|
||||||
|
|
@ -692,7 +703,8 @@ class TableList(object):
|
||||||
ext = kwargs.get("ext")
|
ext = kwargs.get("ext")
|
||||||
for table in self._tables:
|
for table in self._tables:
|
||||||
filename = os.path.join(
|
filename = os.path.join(
|
||||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
"{}-page-{}-table-{}{}".format(root, table.page, table.order,
|
||||||
|
ext)
|
||||||
)
|
)
|
||||||
filepath = os.path.join(dirname, filename)
|
filepath = os.path.join(dirname, filename)
|
||||||
to_format = self._format_func(table, f)
|
to_format = self._format_func(table, f)
|
||||||
|
|
@ -707,7 +719,10 @@ class TableList(object):
|
||||||
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
|
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
|
||||||
for table in self._tables:
|
for table in self._tables:
|
||||||
filename = os.path.join(
|
filename = os.path.join(
|
||||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
"{}-page-{}-table-{}{}".format(root,
|
||||||
|
table.page,
|
||||||
|
table.order,
|
||||||
|
ext)
|
||||||
)
|
)
|
||||||
filepath = os.path.join(dirname, filename)
|
filepath = os.path.join(dirname, filename)
|
||||||
z.write(filepath, os.path.basename(filepath))
|
z.write(filepath, os.path.basename(filepath))
|
||||||
|
|
@ -739,10 +754,12 @@ class TableList(object):
|
||||||
self._compress_dir(**kwargs)
|
self._compress_dir(**kwargs)
|
||||||
elif f == "excel":
|
elif f == "excel":
|
||||||
filepath = os.path.join(dirname, basename)
|
filepath = os.path.join(dirname, basename)
|
||||||
|
# pylint: disable=abstract-class-instantiated
|
||||||
writer = pd.ExcelWriter(filepath)
|
writer = pd.ExcelWriter(filepath)
|
||||||
for table in self._tables:
|
for table in self._tables:
|
||||||
sheet_name = "page-{}-table-{}".format(table.page, table.order)
|
sheet_name = "page-{}-table-{}".format(table.page, table.order)
|
||||||
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
|
table.df.to_excel(writer, sheet_name=sheet_name,
|
||||||
|
encoding="utf-8")
|
||||||
writer.save()
|
writer.save()
|
||||||
if compress:
|
if compress:
|
||||||
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
||||||
|
|
|
||||||
|
|
@ -113,14 +113,20 @@ class PDFHandler(object):
|
||||||
outfile.addPage(p)
|
outfile.addPage(p)
|
||||||
with open(fpath, "wb") as f:
|
with open(fpath, "wb") as f:
|
||||||
outfile.write(f)
|
outfile.write(f)
|
||||||
layout, dim = get_page_layout(fpath)
|
layout, __ = get_page_layout(fpath)
|
||||||
# fix rotated PDF
|
# fix rotated PDF
|
||||||
chars = get_text_objects(layout, ltype="char")
|
chars = get_text_objects(layout, ltype="char")
|
||||||
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
|
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
|
||||||
vertical_text = get_text_objects(layout, ltype="vertical_text")
|
vertical_text = get_text_objects(layout, ltype="vertical_text")
|
||||||
rotation = get_rotation(chars, horizontal_text, vertical_text)
|
rotation = get_rotation(chars, horizontal_text, vertical_text)
|
||||||
if rotation != "":
|
if rotation != "":
|
||||||
fpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
|
fpath_new = "".join(
|
||||||
|
[
|
||||||
|
froot.replace("page", "p"),
|
||||||
|
"_rotated",
|
||||||
|
fext
|
||||||
|
]
|
||||||
|
)
|
||||||
os.rename(fpath, fpath_new)
|
os.rename(fpath, fpath_new)
|
||||||
infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
|
infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
|
||||||
if infile.isEncrypted:
|
if infile.isEncrypted:
|
||||||
|
|
@ -136,7 +142,8 @@ class PDFHandler(object):
|
||||||
outfile.write(f)
|
outfile.write(f)
|
||||||
|
|
||||||
def parse(
|
def parse(
|
||||||
self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
|
self, flavor="lattice", suppress_stdout=False, layout_kwargs=None,
|
||||||
|
**kwargs
|
||||||
):
|
):
|
||||||
"""Extracts tables by calling parser.get_tables on all single
|
"""Extracts tables by calling parser.get_tables on all single
|
||||||
page PDFs.
|
page PDFs.
|
||||||
|
|
@ -149,7 +156,7 @@ class PDFHandler(object):
|
||||||
suppress_stdout : str (default: False)
|
suppress_stdout : str (default: False)
|
||||||
Suppress logs and warnings.
|
Suppress logs and warnings.
|
||||||
layout_kwargs : dict, optional (default: {})
|
layout_kwargs : dict, optional (default: {})
|
||||||
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
|
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs. # noqa
|
||||||
kwargs : dict
|
kwargs : dict
|
||||||
See camelot.read_pdf kwargs.
|
See camelot.read_pdf kwargs.
|
||||||
|
|
||||||
|
|
@ -159,17 +166,21 @@ class PDFHandler(object):
|
||||||
List of tables found in PDF.
|
List of tables found in PDF.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
layout_kwargs = layout_kwargs or {}
|
||||||
tables = []
|
tables = []
|
||||||
with TemporaryDirectory() as tempdir:
|
with TemporaryDirectory() as tempdir:
|
||||||
for p in self.pages:
|
for p in self.pages:
|
||||||
self._save_page(self.filepath, p, tempdir)
|
self._save_page(self.filepath, p, tempdir)
|
||||||
pages = [
|
pages = [
|
||||||
os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages
|
os.path.join(tempdir, "page-{0}.pdf".format(p))
|
||||||
|
for p in self.pages
|
||||||
]
|
]
|
||||||
parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
|
parser = Lattice(**kwargs) \
|
||||||
|
if flavor == "lattice" else Stream(**kwargs)
|
||||||
for p in pages:
|
for p in pages:
|
||||||
t = parser.extract_tables(
|
t = parser.extract_tables(
|
||||||
p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
|
p, suppress_stdout=suppress_stdout,
|
||||||
|
layout_kwargs=layout_kwargs
|
||||||
)
|
)
|
||||||
tables.extend(t)
|
tables.extend(t)
|
||||||
return TableList(sorted(tables))
|
return TableList(sorted(tables))
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ def read_pdf(
|
||||||
password=None,
|
password=None,
|
||||||
flavor="lattice",
|
flavor="lattice",
|
||||||
suppress_stdout=False,
|
suppress_stdout=False,
|
||||||
layout_kwargs={},
|
layout_kwargs=None,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
"""Read PDF and return extracted tables.
|
"""Read PDF and return extracted tables.
|
||||||
|
|
@ -80,16 +80,16 @@ def read_pdf(
|
||||||
Size of a pixel neighborhood that is used to calculate a
|
Size of a pixel neighborhood that is used to calculate a
|
||||||
threshold value for the pixel: 3, 5, 7, and so on.
|
threshold value for the pixel: 3, 5, 7, and so on.
|
||||||
|
|
||||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||||
threshold_constant* : int, optional (default: -2)
|
threshold_constant* : int, optional (default: -2)
|
||||||
Constant subtracted from the mean or weighted mean.
|
Constant subtracted from the mean or weighted mean.
|
||||||
Normally, it is positive but may be zero or negative as well.
|
Normally, it is positive but may be zero or negative as well.
|
||||||
|
|
||||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||||
iterations* : int, optional (default: 0)
|
iterations* : int, optional (default: 0)
|
||||||
Number of times for erosion/dilation is applied.
|
Number of times for erosion/dilation is applied.
|
||||||
|
|
||||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
|
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_. # noqa
|
||||||
resolution* : int, optional (default: 300)
|
resolution* : int, optional (default: 300)
|
||||||
Resolution used for PDF to PNG conversion.
|
Resolution used for PDF to PNG conversion.
|
||||||
|
|
||||||
|
|
@ -98,6 +98,7 @@ def read_pdf(
|
||||||
tables : camelot.core.TableList
|
tables : camelot.core.TableList
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
layout_kwargs = layout_kwargs or {}
|
||||||
if flavor not in ["lattice", "stream"]:
|
if flavor not in ["lattice", "stream"]:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Unknown flavor specified." " Use either 'lattice' or 'stream'"
|
"Unknown flavor specified." " Use either 'lattice' or 'stream'"
|
||||||
|
|
|
||||||
|
|
@ -12,9 +12,18 @@ class BaseParser(object):
|
||||||
def _generate_layout(self, filename, layout_kwargs):
|
def _generate_layout(self, filename, layout_kwargs):
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.layout_kwargs = layout_kwargs
|
self.layout_kwargs = layout_kwargs
|
||||||
self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)
|
self.layout, self.dimensions = get_page_layout(
|
||||||
|
filename,
|
||||||
|
**layout_kwargs
|
||||||
|
)
|
||||||
self.images = get_text_objects(self.layout, ltype="image")
|
self.images = get_text_objects(self.layout, ltype="image")
|
||||||
self.horizontal_text = get_text_objects(self.layout, ltype="horizontal_text")
|
self.horizontal_text = get_text_objects(
|
||||||
self.vertical_text = get_text_objects(self.layout, ltype="vertical_text")
|
self.layout,
|
||||||
|
ltype="horizontal_text"
|
||||||
|
)
|
||||||
|
self.vertical_text = get_text_objects(
|
||||||
|
self.layout,
|
||||||
|
ltype="vertical_text"
|
||||||
|
)
|
||||||
self.pdf_width, self.pdf_height = self.dimensions
|
self.pdf_width, self.pdf_height = self.dimensions
|
||||||
self.rootname, __ = os.path.splitext(self.filename)
|
self.rootname, __ = os.path.splitext(self.filename)
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,10 @@
|
||||||
|
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import copy
|
import copy
|
||||||
import locale
|
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
import subprocess
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from .base import BaseParser
|
from .base import BaseParser
|
||||||
|
|
@ -80,7 +76,7 @@ class Lattice(BaseParser):
|
||||||
Size of a pixel neighborhood that is used to calculate a
|
Size of a pixel neighborhood that is used to calculate a
|
||||||
threshold value for the pixel: 3, 5, 7, and so on.
|
threshold value for the pixel: 3, 5, 7, and so on.
|
||||||
|
|
||||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||||
threshold_constant : int, optional (default: -2)
|
threshold_constant : int, optional (default: -2)
|
||||||
Constant subtracted from the mean or weighted mean.
|
Constant subtracted from the mean or weighted mean.
|
||||||
Normally, it is positive but may be zero or negative as well.
|
Normally, it is positive but may be zero or negative as well.
|
||||||
|
|
@ -102,7 +98,7 @@ class Lattice(BaseParser):
|
||||||
process_background=False,
|
process_background=False,
|
||||||
line_scale=15,
|
line_scale=15,
|
||||||
copy_text=None,
|
copy_text=None,
|
||||||
shift_text=["l", "t"],
|
shift_text=None,
|
||||||
split_text=False,
|
split_text=False,
|
||||||
flag_size=False,
|
flag_size=False,
|
||||||
strip_text="",
|
strip_text="",
|
||||||
|
|
@ -114,6 +110,7 @@ class Lattice(BaseParser):
|
||||||
resolution=300,
|
resolution=300,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
|
shift_text = shift_text or ["l", "t"]
|
||||||
self.table_regions = table_regions
|
self.table_regions = table_regions
|
||||||
self.table_areas = table_areas
|
self.table_areas = table_areas
|
||||||
self.process_background = process_background
|
self.process_background = process_background
|
||||||
|
|
@ -217,8 +214,7 @@ class Lattice(BaseParser):
|
||||||
)
|
)
|
||||||
gs_call = gs_call.encode().split()
|
gs_call = gs_call.encode().split()
|
||||||
null = open(os.devnull, "wb")
|
null = open(os.devnull, "wb")
|
||||||
with Ghostscript(*gs_call, stdout=null) as gs:
|
Ghostscript(*gs_call, stdout=null)
|
||||||
pass
|
|
||||||
null.close()
|
null.close()
|
||||||
|
|
||||||
def _generate_table_bbox(self):
|
def _generate_table_bbox(self):
|
||||||
|
|
@ -247,7 +243,8 @@ class Lattice(BaseParser):
|
||||||
image_height_scaler = image_height / float(self.pdf_height)
|
image_height_scaler = image_height / float(self.pdf_height)
|
||||||
pdf_width_scaler = self.pdf_width / float(image_width)
|
pdf_width_scaler = self.pdf_width / float(image_width)
|
||||||
pdf_height_scaler = self.pdf_height / float(image_height)
|
pdf_height_scaler = self.pdf_height / float(image_height)
|
||||||
image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
|
image_scalers = (image_width_scaler,
|
||||||
|
image_height_scaler, self.pdf_height)
|
||||||
pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
|
pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
|
||||||
|
|
||||||
if self.table_areas is None:
|
if self.table_areas is None:
|
||||||
|
|
@ -291,7 +288,11 @@ class Lattice(BaseParser):
|
||||||
|
|
||||||
self.table_bbox_unscaled = copy.deepcopy(table_bbox)
|
self.table_bbox_unscaled = copy.deepcopy(table_bbox)
|
||||||
|
|
||||||
self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
|
[
|
||||||
|
self.table_bbox,
|
||||||
|
self.vertical_segments,
|
||||||
|
self.horizontal_segments
|
||||||
|
] = scale_image(
|
||||||
table_bbox, vertical_segments, horizontal_segments, pdf_scalers
|
table_bbox, vertical_segments, horizontal_segments, pdf_scalers
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -315,7 +316,10 @@ class Lattice(BaseParser):
|
||||||
rows.extend([tk[1], tk[3]])
|
rows.extend([tk[1], tk[3]])
|
||||||
# sort horizontal and vertical segments
|
# sort horizontal and vertical segments
|
||||||
cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
|
cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
|
||||||
rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
|
rows = merge_close_lines(
|
||||||
|
sorted(rows, reverse=True),
|
||||||
|
line_tol=self.line_tol
|
||||||
|
)
|
||||||
# make grid using x and y coord of shortlisted rows and cols
|
# make grid using x and y coord of shortlisted rows and cols
|
||||||
cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
|
cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
|
||||||
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
||||||
|
|
@ -359,7 +363,10 @@ class Lattice(BaseParser):
|
||||||
accuracy = compute_accuracy([[100, pos_errors]])
|
accuracy = compute_accuracy([[100, pos_errors]])
|
||||||
|
|
||||||
if self.copy_text is not None:
|
if self.copy_text is not None:
|
||||||
table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
|
table = Lattice._copy_spanning_text(
|
||||||
|
table,
|
||||||
|
copy_text=self.copy_text
|
||||||
|
)
|
||||||
|
|
||||||
data = table.data
|
data = table.data
|
||||||
table.df = pd.DataFrame(data)
|
table.df = pd.DataFrame(data)
|
||||||
|
|
@ -383,20 +390,28 @@ class Lattice(BaseParser):
|
||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
def extract_tables(
|
||||||
|
self,
|
||||||
|
filename,
|
||||||
|
suppress_stdout=False,
|
||||||
|
layout_kwargs=None
|
||||||
|
):
|
||||||
|
layout_kwargs = layout_kwargs or {}
|
||||||
self._generate_layout(filename, layout_kwargs)
|
self._generate_layout(filename, layout_kwargs)
|
||||||
|
rootname = os.path.basename(self.rootname)
|
||||||
if not suppress_stdout:
|
if not suppress_stdout:
|
||||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
logger.info("Processing {rootname}".format(rootname=rootname))
|
||||||
|
|
||||||
if not self.horizontal_text:
|
if not self.horizontal_text:
|
||||||
if self.images:
|
if self.images:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"{} is image-based, camelot only works on"
|
"{rootname} is image-based, "
|
||||||
" text-based pages.".format(os.path.basename(self.rootname))
|
"camelot only works on text-based pages."
|
||||||
|
.format(rootname=rootname)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
"No tables found on {rootname}".format(rootname=rootname)
|
||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
@ -408,8 +423,10 @@ class Lattice(BaseParser):
|
||||||
for table_idx, tk in enumerate(
|
for table_idx, tk in enumerate(
|
||||||
sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
|
sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
|
||||||
):
|
):
|
||||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
|
cols, rows, v_s, h_s = self._generate_columns_and_rows(
|
||||||
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
table_idx, tk)
|
||||||
|
table = self._generate_table(
|
||||||
|
table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||||
table._bbox = tk
|
table._bbox = tk
|
||||||
_tables.append(table)
|
_tables.append(table)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,8 @@ import pandas as pd
|
||||||
|
|
||||||
from .base import BaseParser
|
from .base import BaseParser
|
||||||
from ..core import TextEdges, Table
|
from ..core import TextEdges, Table
|
||||||
from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
|
from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
||||||
|
compute_whitespace)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger("camelot")
|
logger = logging.getLogger("camelot")
|
||||||
|
|
@ -70,6 +71,9 @@ class Stream(BaseParser):
|
||||||
):
|
):
|
||||||
self.table_regions = table_regions
|
self.table_regions = table_regions
|
||||||
self.table_areas = table_areas
|
self.table_areas = table_areas
|
||||||
|
self.table_bbox = None
|
||||||
|
self.t_bbox = None
|
||||||
|
self.textedges = []
|
||||||
self.columns = columns
|
self.columns = columns
|
||||||
self._validate_columns()
|
self._validate_columns()
|
||||||
self.split_text = split_text
|
self.split_text = split_text
|
||||||
|
|
@ -95,10 +99,10 @@ class Stream(BaseParser):
|
||||||
Tuple (x0, y0, x1, y1) in pdf coordinate space.
|
Tuple (x0, y0, x1, y1) in pdf coordinate space.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
|
xmin = min(t.x0 for direction in t_bbox for t in t_bbox[direction])
|
||||||
ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
|
ymin = min(t.y0 for direction in t_bbox for t in t_bbox[direction])
|
||||||
xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
|
xmax = max(t.x1 for direction in t_bbox for t in t_bbox[direction])
|
||||||
ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
|
ymax = max(t.y1 for direction in t_bbox for t in t_bbox[direction])
|
||||||
text_bbox = (xmin, ymin, xmax, ymax)
|
text_bbox = (xmin, ymin, xmax, ymax)
|
||||||
return text_bbox
|
return text_bbox
|
||||||
|
|
||||||
|
|
@ -119,21 +123,25 @@ class Stream(BaseParser):
|
||||||
Two-dimensional list of text objects grouped into rows.
|
Two-dimensional list of text objects grouped into rows.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
row_y = 0
|
row_y = None
|
||||||
rows = []
|
rows = []
|
||||||
temp = []
|
temp = []
|
||||||
for t in text:
|
non_empty_text = [t for t in text if t.get_text().strip()]
|
||||||
|
for t in non_empty_text:
|
||||||
# is checking for upright necessary?
|
# is checking for upright necessary?
|
||||||
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
# if t.get_text().strip() and all([obj.upright \
|
||||||
# type(obj) is LTChar]):
|
# for obj in t._objs
|
||||||
if t.get_text().strip():
|
# if type(obj) is LTChar]):
|
||||||
if not np.isclose(row_y, t.y0, atol=row_tol):
|
if row_y is None:
|
||||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
row_y = t.y0
|
||||||
temp = []
|
elif not np.isclose(row_y, t.y0, atol=row_tol):
|
||||||
row_y = t.y0
|
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||||
temp.append(t)
|
temp = []
|
||||||
|
# We update the row's bottom as we go, to be forgiving if there
|
||||||
|
# is a gradual change across multiple columns.
|
||||||
|
row_y = t.y0
|
||||||
|
temp.append(t)
|
||||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||||
__ = rows.pop(0) # TODO: hacky
|
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -170,7 +178,8 @@ class Stream(BaseParser):
|
||||||
merged.append(higher)
|
merged.append(higher)
|
||||||
elif column_tol < 0:
|
elif column_tol < 0:
|
||||||
if higher[0] <= lower[1]:
|
if higher[0] <= lower[1]:
|
||||||
if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
|
if np.isclose(higher[0], lower[1],
|
||||||
|
atol=abs(column_tol)):
|
||||||
merged.append(higher)
|
merged.append(higher)
|
||||||
else:
|
else:
|
||||||
upper_bound = max(lower[1], higher[1])
|
upper_bound = max(lower[1], higher[1])
|
||||||
|
|
@ -198,10 +207,13 @@ class Stream(BaseParser):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
row_mids = [
|
row_mids = [
|
||||||
sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) if len(r) > 0 else 0
|
sum((t.y0 + t.y1) / 2 for t in r) / len(r) if len(r) > 0 else 0
|
||||||
for r in rows_grouped
|
for r in rows_grouped
|
||||||
]
|
]
|
||||||
rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
|
rows = [
|
||||||
|
(row_mids[i] + row_mids[i - 1]) / 2
|
||||||
|
for i in range(1, len(row_mids))
|
||||||
|
]
|
||||||
rows.insert(0, text_y_max)
|
rows.insert(0, text_y_max)
|
||||||
rows.append(text_y_min)
|
rows.append(text_y_min)
|
||||||
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
||||||
|
|
@ -230,7 +242,9 @@ class Stream(BaseParser):
|
||||||
text = Stream._group_rows(text, row_tol=row_tol)
|
text = Stream._group_rows(text, row_tol=row_tol)
|
||||||
elements = [len(r) for r in text]
|
elements = [len(r) for r in text]
|
||||||
new_cols = [
|
new_cols = [
|
||||||
(t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
|
(t.x0, t.x1)
|
||||||
|
for r in text if len(r) == max(elements)
|
||||||
|
for t in r
|
||||||
]
|
]
|
||||||
cols.extend(Stream._merge_columns(sorted(new_cols)))
|
cols.extend(Stream._merge_columns(sorted(new_cols)))
|
||||||
return cols
|
return cols
|
||||||
|
|
@ -262,12 +276,13 @@ class Stream(BaseParser):
|
||||||
def _validate_columns(self):
|
def _validate_columns(self):
|
||||||
if self.table_areas is not None and self.columns is not None:
|
if self.table_areas is not None and self.columns is not None:
|
||||||
if len(self.table_areas) != len(self.columns):
|
if len(self.table_areas) != len(self.columns):
|
||||||
raise ValueError("Length of table_areas and columns" " should be equal")
|
raise ValueError("Length of table_areas and columns"
|
||||||
|
" should be equal")
|
||||||
|
|
||||||
def _nurminen_table_detection(self, textlines):
|
def _nurminen_table_detection(self, textlines):
|
||||||
"""A general implementation of the table detection algorithm
|
"""A general implementation of the table detection algorithm
|
||||||
described by Anssi Nurminen's master's thesis.
|
described by Anssi Nurminen's master's thesis.
|
||||||
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3
|
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 # noqa
|
||||||
|
|
||||||
Assumes that tables are situated relatively far apart
|
Assumes that tables are situated relatively far apart
|
||||||
vertically.
|
vertically.
|
||||||
|
|
@ -284,7 +299,7 @@ class Stream(BaseParser):
|
||||||
# guess table areas using textlines and relevant edges
|
# guess table areas using textlines and relevant edges
|
||||||
table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
|
table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
|
||||||
# treat whole page as table area if no table areas found
|
# treat whole page as table area if no table areas found
|
||||||
if not len(table_bbox):
|
if not table_bbox:
|
||||||
table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
|
table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
|
||||||
|
|
||||||
return table_bbox
|
return table_bbox
|
||||||
|
|
@ -302,7 +317,8 @@ class Stream(BaseParser):
|
||||||
y1 = float(y1)
|
y1 = float(y1)
|
||||||
x2 = float(x2)
|
x2 = float(x2)
|
||||||
y2 = float(y2)
|
y2 = float(y2)
|
||||||
region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text)
|
region_text = text_in_bbox(
|
||||||
|
(x1, y2, x2, y1), self.horizontal_text)
|
||||||
hor_text.extend(region_text)
|
hor_text.extend(region_text)
|
||||||
# find tables based on nurminen's detection algorithm
|
# find tables based on nurminen's detection algorithm
|
||||||
table_bbox = self._nurminen_table_detection(hor_text)
|
table_bbox = self._nurminen_table_detection(hor_text)
|
||||||
|
|
@ -328,8 +344,10 @@ class Stream(BaseParser):
|
||||||
|
|
||||||
self.t_bbox = t_bbox
|
self.t_bbox = t_bbox
|
||||||
|
|
||||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
text_x_min, text_y_min, text_x_max, text_y_max = \
|
||||||
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
|
self._text_bbox(self.t_bbox)
|
||||||
|
rows_grouped = self._group_rows(
|
||||||
|
self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||||
elements = [len(r) for r in rows_grouped]
|
elements = [len(r) for r in rows_grouped]
|
||||||
|
|
||||||
|
|
@ -354,14 +372,23 @@ class Stream(BaseParser):
|
||||||
# see if the list contains elements, if yes, then use
|
# see if the list contains elements, if yes, then use
|
||||||
# the mode after removing 1s
|
# the mode after removing 1s
|
||||||
elements = list(filter(lambda x: x != 1, elements))
|
elements = list(filter(lambda x: x != 1, elements))
|
||||||
if len(elements):
|
if elements:
|
||||||
ncols = max(set(elements), key=elements.count)
|
ncols = max(set(elements), key=elements.count)
|
||||||
else:
|
else:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"No tables found in table area {}".format(table_idx + 1)
|
"No tables found in table area {}"
|
||||||
|
.format(table_idx + 1)
|
||||||
)
|
)
|
||||||
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
|
cols = [
|
||||||
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
|
(t.x0, t.x1)
|
||||||
|
for r in rows_grouped
|
||||||
|
if len(r) == ncols
|
||||||
|
for t in r
|
||||||
|
]
|
||||||
|
cols = self._merge_columns(
|
||||||
|
sorted(cols),
|
||||||
|
column_tol=self.column_tol
|
||||||
|
)
|
||||||
inner_text = []
|
inner_text = []
|
||||||
for i in range(1, len(cols)):
|
for i in range(1, len(cols)):
|
||||||
left = cols[i - 1][1]
|
left = cols[i - 1][1]
|
||||||
|
|
@ -431,23 +458,30 @@ class Stream(BaseParser):
|
||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
def extract_tables(self, filename, suppress_stdout=False,
|
||||||
|
layout_kwargs=None):
|
||||||
|
layout_kwargs = layout_kwargs or {}
|
||||||
self._generate_layout(filename, layout_kwargs)
|
self._generate_layout(filename, layout_kwargs)
|
||||||
if not suppress_stdout:
|
if not suppress_stdout:
|
||||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
logger.info("Processing {}".format(
|
||||||
|
os.path.basename(self.rootname)))
|
||||||
|
|
||||||
if not self.horizontal_text:
|
if not self.horizontal_text:
|
||||||
if self.images:
|
if self.images:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"{} is image-based, camelot only works on"
|
"{} is image-based, camelot only works on"
|
||||||
" text-based pages.".format(os.path.basename(self.rootname))
|
" text-based pages.".format(
|
||||||
|
os.path.basename(self.rootname))
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
"No tables found on {}".format(
|
||||||
|
os.path.basename(self.rootname))
|
||||||
)
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Identify plausible areas within the doc where tables lie,
|
||||||
|
# populate table_bbox keys with these areas.
|
||||||
self._generate_table_bbox()
|
self._generate_table_bbox()
|
||||||
|
|
||||||
_tables = []
|
_tables = []
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ class PlotMethods(object):
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Lattice flavor does not support kind='{}'".format(kind)
|
"Lattice flavor does not support kind='{}'".format(kind)
|
||||||
)
|
)
|
||||||
elif table.flavor == "stream" and kind in ["joint", "line"]:
|
elif table.flavor == "stream" and kind in ["line"]:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Stream flavor does not support kind='{}'".format(kind)
|
"Stream flavor does not support kind='{}'".format(kind)
|
||||||
)
|
)
|
||||||
|
|
@ -64,7 +64,13 @@ class PlotMethods(object):
|
||||||
for t in table._text:
|
for t in table._text:
|
||||||
xs.extend([t[0], t[2]])
|
xs.extend([t[0], t[2]])
|
||||||
ys.extend([t[1], t[3]])
|
ys.extend([t[1], t[3]])
|
||||||
ax.add_patch(patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1]))
|
ax.add_patch(
|
||||||
|
patches.Rectangle(
|
||||||
|
(t[0], t[1]),
|
||||||
|
t[2] - t[0],
|
||||||
|
t[3] - t[1]
|
||||||
|
)
|
||||||
|
)
|
||||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||||
return fig
|
return fig
|
||||||
|
|
@ -132,7 +138,8 @@ class PlotMethods(object):
|
||||||
for t in table_bbox.keys():
|
for t in table_bbox.keys():
|
||||||
ax.add_patch(
|
ax.add_patch(
|
||||||
patches.Rectangle(
|
patches.Rectangle(
|
||||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red"
|
(t[0], t[1]), t[2] - t[0], t[3] - t[1],
|
||||||
|
fill=False, color="red"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if not _FOR_LATTICE:
|
if not _FOR_LATTICE:
|
||||||
|
|
@ -164,7 +171,10 @@ class PlotMethods(object):
|
||||||
xs.extend([t[0], t[2]])
|
xs.extend([t[0], t[2]])
|
||||||
ys.extend([t[1], t[3]])
|
ys.extend([t[1], t[3]])
|
||||||
ax.add_patch(
|
ax.add_patch(
|
||||||
patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue")
|
patches.Rectangle(
|
||||||
|
(t[0], t[1]), t[2] - t[0], t[3] - t[1],
|
||||||
|
color="blue"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,9 @@ from pdfminer.layout import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# pylint: disable=import-error
|
||||||
|
# PyLint will evaluate both branches, and will necessarily complain about one
|
||||||
|
# of them.
|
||||||
PY3 = sys.version_info[0] >= 3
|
PY3 = sys.version_info[0] >= 3
|
||||||
if PY3:
|
if PY3:
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
|
@ -310,7 +313,8 @@ def get_rotation(chars, horizontal_text, vertical_text):
|
||||||
if hlen < vlen:
|
if hlen < vlen:
|
||||||
clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
|
clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
|
||||||
anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
|
anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
|
||||||
rotation = "anticlockwise" if clockwise < anticlockwise else "clockwise"
|
rotation = "anticlockwise" if clockwise < anticlockwise \
|
||||||
|
else "clockwise"
|
||||||
return rotation
|
return rotation
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -341,12 +345,16 @@ def segments_in_bbox(bbox, v_segments, h_segments):
|
||||||
v_s = [
|
v_s = [
|
||||||
v
|
v
|
||||||
for v in v_segments
|
for v in v_segments
|
||||||
if v[1] > lb[1] - 2 and v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2
|
if v[1] > lb[1] - 2 and
|
||||||
|
v[3] < rt[1] + 2 and
|
||||||
|
lb[0] - 2 <= v[0] <= rt[0] + 2
|
||||||
]
|
]
|
||||||
h_s = [
|
h_s = [
|
||||||
h
|
h
|
||||||
for h in h_segments
|
for h in h_segments
|
||||||
if h[0] > lb[0] - 2 and h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2
|
if h[0] > lb[0] - 2 and
|
||||||
|
h[2] < rt[0] + 2 and
|
||||||
|
lb[1] - 2 <= h[1] <= rt[1] + 2
|
||||||
]
|
]
|
||||||
return v_s, h_s
|
return v_s, h_s
|
||||||
|
|
||||||
|
|
@ -464,10 +472,10 @@ def flag_font_size(textline, direction, strip_text=""):
|
||||||
for t in textline
|
for t in textline
|
||||||
if not isinstance(t, LTAnno)
|
if not isinstance(t, LTAnno)
|
||||||
]
|
]
|
||||||
l = [np.round(size, decimals=6) for text, size in d]
|
text_sizes = [np.round(size, decimals=6) for text, size in d]
|
||||||
if len(set(l)) > 1:
|
if len(set(text_sizes)) > 1:
|
||||||
flist = []
|
flist = []
|
||||||
min_size = min(l)
|
min_size = min(text_sizes)
|
||||||
for key, chars in groupby(d, itemgetter(1)):
|
for key, chars in groupby(d, itemgetter(1)):
|
||||||
if key == min_size:
|
if key == min_size:
|
||||||
fchars = [t[0] for t in chars]
|
fchars = [t[0] for t in chars]
|
||||||
|
|
@ -511,7 +519,6 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
||||||
of row/column and text is the an lttextline substring.
|
of row/column and text is the an lttextline substring.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
idx = 0
|
|
||||||
cut_text = []
|
cut_text = []
|
||||||
bbox = textline.bbox
|
bbox = textline.bbox
|
||||||
try:
|
try:
|
||||||
|
|
@ -528,7 +535,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
||||||
]
|
]
|
||||||
r = r_idx[0]
|
r = r_idx[0]
|
||||||
x_cuts = [
|
x_cuts = [
|
||||||
(c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right
|
(c, table.cells[r][c].x2)
|
||||||
|
for c in x_overlap
|
||||||
|
if table.cells[r][c].right
|
||||||
]
|
]
|
||||||
if not x_cuts:
|
if not x_cuts:
|
||||||
x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
|
x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
|
||||||
|
|
@ -561,7 +570,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
||||||
]
|
]
|
||||||
c = c_idx[0]
|
c = c_idx[0]
|
||||||
y_cuts = [
|
y_cuts = [
|
||||||
(r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom
|
(r, table.cells[r][c].y1)
|
||||||
|
for r in y_overlap
|
||||||
|
if table.cells[r][c].bottom
|
||||||
]
|
]
|
||||||
if not y_cuts:
|
if not y_cuts:
|
||||||
y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
|
y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
|
||||||
|
|
@ -644,9 +655,8 @@ def get_table_index(
|
||||||
"""
|
"""
|
||||||
r_idx, c_idx = [-1] * 2
|
r_idx, c_idx = [-1] * 2
|
||||||
for r in range(len(table.rows)):
|
for r in range(len(table.rows)):
|
||||||
if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and (t.y0 + t.y1) / 2.0 > table.rows[
|
if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and \
|
||||||
r
|
(t.y0 + t.y1) / 2.0 > table.rows[r][1]:
|
||||||
][1]:
|
|
||||||
lt_col_overlap = []
|
lt_col_overlap = []
|
||||||
for c in table.cols:
|
for c in table.cols:
|
||||||
if c[0] <= t.x1 and c[1] >= t.x0:
|
if c[0] <= t.x1 and c[1] >= t.x0:
|
||||||
|
|
@ -681,7 +691,9 @@ def get_table_index(
|
||||||
X = 1.0 if abs(t.x0 - t.x1) == 0.0 else abs(t.x0 - t.x1)
|
X = 1.0 if abs(t.x0 - t.x1) == 0.0 else abs(t.x0 - t.x1)
|
||||||
Y = 1.0 if abs(t.y0 - t.y1) == 0.0 else abs(t.y0 - t.y1)
|
Y = 1.0 if abs(t.y0 - t.y1) == 0.0 else abs(t.y0 - t.y1)
|
||||||
charea = X * Y
|
charea = X * Y
|
||||||
error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
|
error = (
|
||||||
|
(X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))
|
||||||
|
) / charea
|
||||||
|
|
||||||
if split_text:
|
if split_text:
|
||||||
return (
|
return (
|
||||||
|
|
@ -697,13 +709,16 @@ def get_table_index(
|
||||||
(
|
(
|
||||||
r_idx,
|
r_idx,
|
||||||
c_idx,
|
c_idx,
|
||||||
flag_font_size(t._objs, direction, strip_text=strip_text),
|
flag_font_size(t._objs,
|
||||||
|
direction,
|
||||||
|
strip_text=strip_text),
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
error,
|
error,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], error
|
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], \
|
||||||
|
error
|
||||||
|
|
||||||
|
|
||||||
def compute_accuracy(error_weights):
|
def compute_accuracy(error_weights):
|
||||||
|
|
@ -751,7 +766,6 @@ def compute_whitespace(d):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
whitespace = 0
|
whitespace = 0
|
||||||
r_nempty_cells, c_nempty_cells = [], []
|
|
||||||
for i in d:
|
for i in d:
|
||||||
for j in i:
|
for j in i:
|
||||||
if j.strip() == "":
|
if j.strip() == "":
|
||||||
|
|
@ -811,6 +825,7 @@ def get_page_layout(
|
||||||
width = layout.bbox[2]
|
width = layout.bbox[2]
|
||||||
height = layout.bbox[3]
|
height = layout.bbox[3]
|
||||||
dim = (width, height)
|
dim = (width, height)
|
||||||
|
break # we assume a single page pdf
|
||||||
return layout, dim
|
return layout, dim
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ The easiest way to install Camelot is to install it with `conda`_, which is a pa
|
||||||
|
|
||||||
$ conda install -c conda-forge camelot-py
|
$ conda install -c conda-forge camelot-py
|
||||||
|
|
||||||
.. note:: Camelot is available for Python 2.7, 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
|
.. note:: Camelot is available for Python 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
|
||||||
|
|
||||||
.. _conda: https://conda.io/docs/
|
.. _conda: https://conda.io/docs/
|
||||||
.. _Anaconda: http://docs.continuum.io/anaconda/
|
.. _Anaconda: http://docs.continuum.io/anaconda/
|
||||||
|
|
|
||||||
|
|
@ -4,5 +4,5 @@ numpy>=1.13.3
|
||||||
opencv-python>=3.4.2.17
|
opencv-python>=3.4.2.17
|
||||||
openpyxl>=2.5.8
|
openpyxl>=2.5.8
|
||||||
pandas>=0.23.4
|
pandas>=0.23.4
|
||||||
pdfminer.six>=20170720
|
pdfminer.six>=20200402
|
||||||
PyPDF2>=1.26.0
|
PyPDF2>=1.26.0
|
||||||
|
|
|
||||||
5
setup.py
|
|
@ -19,7 +19,7 @@ requires = [
|
||||||
'numpy>=1.13.3',
|
'numpy>=1.13.3',
|
||||||
'openpyxl>=2.5.8',
|
'openpyxl>=2.5.8',
|
||||||
'pandas>=0.23.4',
|
'pandas>=0.23.4',
|
||||||
'pdfminer.six>=20170720',
|
'pdfminer.six>=20200402',
|
||||||
'PyPDF2>=1.26.0'
|
'PyPDF2>=1.26.0'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -69,9 +69,8 @@ def setup_package():
|
||||||
},
|
},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
# Trove classifiers
|
# Trove classifiers
|
||||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa
|
||||||
'License :: OSI Approved :: MIT License',
|
'License :: OSI Approved :: MIT License',
|
||||||
'Programming Language :: Python :: 2.7',
|
|
||||||
'Programming Language :: Python :: 3.5',
|
'Programming Language :: Python :: 3.5',
|
||||||
'Programming Language :: Python :: 3.6',
|
'Programming Language :: Python :: 3.6',
|
||||||
'Programming Language :: Python :: 3.7'
|
'Programming Language :: Python :: 3.7'
|
||||||
|
|
|
||||||
343
tests/data.py
|
|
@ -4,16 +4,6 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
data_stream = [
|
data_stream = [
|
||||||
[
|
|
||||||
"",
|
|
||||||
"Table: 5 Public Health Outlay 2012-13 (Budget Estimates) (Rs. in 000)",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
],
|
|
||||||
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
|
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
|
||||||
["", "", "", "", "", "Revenue &", "", ""],
|
["", "", "", "", "", "Revenue &", "", ""],
|
||||||
["", "Medical &", "Family", "Medical &", "Family", "", "", ""],
|
["", "Medical &", "Family", "Medical &", "Family", "", "", ""],
|
||||||
|
|
@ -80,7 +70,8 @@ data_stream = [
|
||||||
"5,000",
|
"5,000",
|
||||||
"33,051,480",
|
"33,051,480",
|
||||||
],
|
],
|
||||||
["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560", "4,508,180"],
|
["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560",
|
||||||
|
"4,508,180"],
|
||||||
[
|
[
|
||||||
"Gujarat",
|
"Gujarat",
|
||||||
"26,328,400",
|
"26,328,400",
|
||||||
|
|
@ -171,7 +162,8 @@ data_stream = [
|
||||||
"313,762",
|
"313,762",
|
||||||
"67,044,159",
|
"67,044,159",
|
||||||
],
|
],
|
||||||
["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700", "0", "3,579,700"],
|
["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700",
|
||||||
|
"0", "3,579,700"],
|
||||||
[
|
[
|
||||||
"Meghalaya",
|
"Meghalaya",
|
||||||
"2,894,093",
|
"2,894,093",
|
||||||
|
|
@ -236,7 +228,8 @@ data_stream = [
|
||||||
|
|
||||||
data_stream_table_rotated = [
|
data_stream_table_rotated = [
|
||||||
[
|
[
|
||||||
"Table 21 Current use of contraception by background characteristics\u2014Continued",
|
"Table 21 Current use of contraception by background characteristics"
|
||||||
|
"\u2014Continued",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -330,7 +323,8 @@ data_stream_table_rotated = [
|
||||||
"Total",
|
"Total",
|
||||||
"women",
|
"women",
|
||||||
],
|
],
|
||||||
["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
|
["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
|
||||||
|
"", ""],
|
||||||
[
|
[
|
||||||
"Scheduled caste",
|
"Scheduled caste",
|
||||||
"74.8",
|
"74.8",
|
||||||
|
|
@ -407,7 +401,8 @@ data_stream_table_rotated = [
|
||||||
"100.0",
|
"100.0",
|
||||||
"3,319",
|
"3,319",
|
||||||
],
|
],
|
||||||
["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
|
["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "",
|
||||||
|
"", "", "", ""],
|
||||||
[
|
[
|
||||||
"Lowest",
|
"Lowest",
|
||||||
"64.5",
|
"64.5",
|
||||||
|
|
@ -830,7 +825,8 @@ data_stream_table_rotated = [
|
||||||
|
|
||||||
data_stream_two_tables_1 = [
|
data_stream_two_tables_1 = [
|
||||||
[
|
[
|
||||||
"[In thousands (11,062.6 represents 11,062,600) For year ending December 31. Based on Uniform Crime Reporting (UCR)",
|
"Program. Represents arrests reported (not charged) by 12,910 "
|
||||||
|
"agencies with a total population of 247,526,916 as estimated",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -842,7 +838,8 @@ data_stream_two_tables_1 = [
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated",
|
"by the FBI. Some persons may be arrested more than once during a "
|
||||||
|
"year, therefore, the data in this table, in some cases,",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -854,19 +851,8 @@ data_stream_two_tables_1 = [
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"by the FBI. Some persons may be arrested more than once during a year, therefore, the data in this table, in some cases,",
|
"could represent multiple arrests of the same person. See text, "
|
||||||
"",
|
"this section and source]",
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"could represent multiple arrests of the same person. See text, this section and source]",
|
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -903,7 +889,8 @@ data_stream_two_tables_1 = [
|
||||||
"and over",
|
"and over",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Total .\n .\n . . . . . .\n . .\n . .\n . .\n . .\n . .\n . .\n . .\n . . .",
|
"Total .\n .\n . . . . . .\n . .\n . .\n . .\n . .\n . "
|
||||||
|
".\n . .\n . .\n . . .",
|
||||||
"11,062 .6",
|
"11,062 .6",
|
||||||
"1,540 .0",
|
"1,540 .0",
|
||||||
"9,522 .6",
|
"9,522 .6",
|
||||||
|
|
@ -915,7 +902,8 @@ data_stream_two_tables_1 = [
|
||||||
"2,330 .9",
|
"2,330 .9",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n . .",
|
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . "
|
||||||
|
".\n . .",
|
||||||
"467 .9",
|
"467 .9",
|
||||||
"69 .1",
|
"69 .1",
|
||||||
"398 .8",
|
"398 .8",
|
||||||
|
|
@ -976,7 +964,8 @@ data_stream_two_tables_1 = [
|
||||||
"64.5",
|
"64.5",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Property crime . . . .\n . .\n . . .\n . . .\n .\n . . . .",
|
"Property crime . . . .\n . .\n . . .\n . . .\n .\n . . "
|
||||||
|
". .",
|
||||||
"1,396 .4",
|
"1,396 .4",
|
||||||
"338 .7",
|
"338 .7",
|
||||||
"1,057 .7",
|
"1,057 .7",
|
||||||
|
|
@ -1060,7 +1049,8 @@ data_stream_two_tables_1 = [
|
||||||
"25.5",
|
"25.5",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n.",
|
"Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||||
|
".\n.\n.\n.",
|
||||||
"173.7",
|
"173.7",
|
||||||
"5.1",
|
"5.1",
|
||||||
"168.5",
|
"168.5",
|
||||||
|
|
@ -1290,19 +1280,8 @@ data_stream_two_tables_1 = [
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"",
|
"",
|
||||||
"– Represents zero. X Not applicable. 1 Buying, receiving, possessing stolen property. 2 Except forcible rape and prostitution.",
|
"– Represents zero. X Not applicable. 1 Buying, receiving, "
|
||||||
"",
|
"possessing stolen property. 2 Except forcible rape and prostitution.",
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"",
|
|
||||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
|
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -1315,17 +1294,10 @@ data_stream_two_tables_1 = [
|
||||||
]
|
]
|
||||||
|
|
||||||
data_stream_two_tables_2 = [
|
data_stream_two_tables_2 = [
|
||||||
[
|
|
||||||
"",
|
|
||||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
],
|
|
||||||
["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
|
["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
|
||||||
[
|
[
|
||||||
"[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies",
|
"[Based on Uniform Crime Reporting (UCR) Program. Represents "
|
||||||
|
"arrests reported (not charged) by 12,371 agencies",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -1333,7 +1305,8 @@ data_stream_two_tables_2 = [
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]",
|
"with a total population of 239,839,971 as estimated by the FBI. "
|
||||||
|
"See headnote, Table 324]",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
|
|
@ -1344,7 +1317,8 @@ data_stream_two_tables_2 = [
|
||||||
["Offense charged", "", "", "", "Indian/Alaskan", "Asian Pacific"],
|
["Offense charged", "", "", "", "Indian/Alaskan", "Asian Pacific"],
|
||||||
["", "Total", "White", "Black", "Native", "Islander"],
|
["", "Total", "White", "Black", "Native", "Islander"],
|
||||||
[
|
[
|
||||||
"Total .\n .\n .\n .\n . .\n . . .\n . . .\n .\n . . .\n .\n . . .\n . .\n .\n . . .\n .\n .\n .\n . .\n . .\n . .",
|
"Total .\n .\n .\n .\n . .\n . . .\n . . .\n .\n . . .\n "
|
||||||
|
".\n . . .\n . .\n .\n . . .\n .\n .\n .\n . .\n . .\n . .",
|
||||||
"10,690,561",
|
"10,690,561",
|
||||||
"7,389,208",
|
"7,389,208",
|
||||||
"3,027,153",
|
"3,027,153",
|
||||||
|
|
@ -1352,7 +1326,8 @@ data_stream_two_tables_2 = [
|
||||||
"123,656",
|
"123,656",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .",
|
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . "
|
||||||
|
".\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .",
|
||||||
"456,965",
|
"456,965",
|
||||||
"268,346",
|
"268,346",
|
||||||
"177,766",
|
"177,766",
|
||||||
|
|
@ -1368,7 +1343,8 @@ data_stream_two_tables_2 = [
|
||||||
"97",
|
"97",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
"Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||||
|
".\n.\n.\n.\n.\n. .",
|
||||||
"16,362",
|
"16,362",
|
||||||
"10,644",
|
"10,644",
|
||||||
"5,319",
|
"5,319",
|
||||||
|
|
@ -1376,7 +1352,8 @@ data_stream_two_tables_2 = [
|
||||||
"230",
|
"230",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
|
"Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
|
||||||
|
".\n.\n.\n. .\n.\n.\n. . . .",
|
||||||
"100,496",
|
"100,496",
|
||||||
"43,039",
|
"43,039",
|
||||||
"55,742",
|
"55,742",
|
||||||
|
|
@ -1384,7 +1361,8 @@ data_stream_two_tables_2 = [
|
||||||
"989",
|
"989",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Aggravated assault . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. .\n.\n.\n.",
|
"Aggravated assault . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. "
|
||||||
|
".\n.\n.\n.",
|
||||||
"330,368",
|
"330,368",
|
||||||
"209,922",
|
"209,922",
|
||||||
"111,904",
|
"111,904",
|
||||||
|
|
@ -1392,7 +1370,8 @@ data_stream_two_tables_2 = [
|
||||||
"3,929",
|
"3,929",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Property crime . . . . .\n . . . . .\n .\n . . .\n .\n . .\n .\n .\n .\n . .\n .\n . .\n .\n .",
|
"Property crime . . . . .\n . . . . .\n .\n . . .\n .\n "
|
||||||
|
". .\n .\n .\n .\n . .\n .\n . .\n .\n .",
|
||||||
"1,364,409",
|
"1,364,409",
|
||||||
"922,139",
|
"922,139",
|
||||||
"406,382",
|
"406,382",
|
||||||
|
|
@ -1400,7 +1379,8 @@ data_stream_two_tables_2 = [
|
||||||
"18,289",
|
"18,289",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n. . . .",
|
"Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||||
|
".\n.\n.\n. .\n.\n. . . .",
|
||||||
"234,551",
|
"234,551",
|
||||||
"155,994",
|
"155,994",
|
||||||
"74,419",
|
"74,419",
|
||||||
|
|
@ -1408,7 +1388,8 @@ data_stream_two_tables_2 = [
|
||||||
"2,117",
|
"2,117",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
"Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||||
|
".\n.\n.\n.\n.\n. .",
|
||||||
"1,056,473",
|
"1,056,473",
|
||||||
"719,983",
|
"719,983",
|
||||||
"306,625",
|
"306,625",
|
||||||
|
|
@ -1416,7 +1397,8 @@ data_stream_two_tables_2 = [
|
||||||
"15,219",
|
"15,219",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
|
"Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. "
|
||||||
|
".\n.\n. .\n.",
|
||||||
"63,919",
|
"63,919",
|
||||||
"39,077",
|
"39,077",
|
||||||
"23,184",
|
"23,184",
|
||||||
|
|
@ -1424,7 +1406,8 @@ data_stream_two_tables_2 = [
|
||||||
"841",
|
"841",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . . . .",
|
"Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||||
|
".\n.\n.\n. .\n.\n.\n. . . . . .",
|
||||||
"9,466",
|
"9,466",
|
||||||
"7,085",
|
"7,085",
|
||||||
"2,154",
|
"2,154",
|
||||||
|
|
@ -1432,7 +1415,8 @@ data_stream_two_tables_2 = [
|
||||||
"112",
|
"112",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
|
"Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. "
|
||||||
|
".\n.\n.\n.\n. .\n.\n. .\n.",
|
||||||
"1,032,502",
|
"1,032,502",
|
||||||
"672,865",
|
"672,865",
|
||||||
"332,435",
|
"332,435",
|
||||||
|
|
@ -1440,7 +1424,8 @@ data_stream_two_tables_2 = [
|
||||||
"12,075",
|
"12,075",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. .\n. .\n.",
|
"Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. "
|
||||||
|
".\n. .\n.",
|
||||||
"67,054",
|
"67,054",
|
||||||
"44,730",
|
"44,730",
|
||||||
"21,251",
|
"21,251",
|
||||||
|
|
@ -1448,7 +1433,8 @@ data_stream_two_tables_2 = [
|
||||||
"728",
|
"728",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. . . . . . .",
|
"Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
|
||||||
|
".\n.\n.\n. . . . . . .",
|
||||||
"161,233",
|
"161,233",
|
||||||
"108,032",
|
"108,032",
|
||||||
"50,367",
|
"50,367",
|
||||||
|
|
@ -1456,7 +1442,8 @@ data_stream_two_tables_2 = [
|
||||||
"1,519",
|
"1,519",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n. .\n.\n.\n.\n.",
|
"Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
|
||||||
|
".\n.\n. .\n.\n.\n.\n.",
|
||||||
"13,960",
|
"13,960",
|
||||||
"9,208",
|
"9,208",
|
||||||
"4,429",
|
"4,429",
|
||||||
|
|
@ -1472,7 +1459,8 @@ data_stream_two_tables_2 = [
|
||||||
"742",
|
"742",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Vandalism . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
|
"Vandalism . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
|
||||||
|
".\n. .\n.\n.\n.\n. .",
|
||||||
"212,173",
|
"212,173",
|
||||||
"157,723",
|
"157,723",
|
||||||
"48,746",
|
"48,746",
|
||||||
|
|
@ -1496,7 +1484,8 @@ data_stream_two_tables_2 = [
|
||||||
"1,413",
|
"1,413",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
"Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||||
|
".\n.\n.\n.\n.\n. .",
|
||||||
"60,175",
|
"60,175",
|
||||||
"44,240",
|
"44,240",
|
||||||
"14,347",
|
"14,347",
|
||||||
|
|
@ -1504,7 +1493,8 @@ data_stream_two_tables_2 = [
|
||||||
"873",
|
"873",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Drug abuse violations . . . . . . . .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
"Drug abuse violations . . . . . . . .\n. . .\n.\n.\n.\n. .\n. "
|
||||||
|
".\n.\n.\n.\n.",
|
||||||
"1,301,629",
|
"1,301,629",
|
||||||
"845,974",
|
"845,974",
|
||||||
"437,623",
|
"437,623",
|
||||||
|
|
@ -1512,7 +1502,8 @@ data_stream_two_tables_2 = [
|
||||||
"9,444",
|
"9,444",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . .\n.\n.\n.\n.\n. .\n. .",
|
"Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . "
|
||||||
|
".\n.\n.\n.\n.\n. .\n. .",
|
||||||
"8,046",
|
"8,046",
|
||||||
"2,290",
|
"2,290",
|
||||||
"5,518",
|
"5,518",
|
||||||
|
|
@ -1528,7 +1519,8 @@ data_stream_two_tables_2 = [
|
||||||
"624",
|
"624",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Driving under the influence . . . . . . .\n. .\n.\n. .\n.\n.\n.\n.\n. .",
|
"Driving under the influence . . . . . . .\n. .\n.\n. "
|
||||||
|
".\n.\n.\n.\n.\n. .",
|
||||||
"1,105,401",
|
"1,105,401",
|
||||||
"954,444",
|
"954,444",
|
||||||
"121,594",
|
"121,594",
|
||||||
|
|
@ -1536,7 +1528,8 @@ data_stream_two_tables_2 = [
|
||||||
"14,460",
|
"14,460",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Liquor laws . . . . . . . .\n. .\n. .\n. .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
"Liquor laws . . . . . . . .\n. .\n. .\n. .\n. .\n. . "
|
||||||
|
".\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||||
"444,087",
|
"444,087",
|
||||||
"373,189",
|
"373,189",
|
||||||
"50,431",
|
"50,431",
|
||||||
|
|
@ -1544,7 +1537,8 @@ data_stream_two_tables_2 = [
|
||||||
"5,591",
|
"5,591",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . .\n.\n.\n.\n.\n.\n.",
|
"Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . "
|
||||||
|
".\n.\n.\n.\n.\n.\n.",
|
||||||
"469,958",
|
"469,958",
|
||||||
"387,542",
|
"387,542",
|
||||||
"71,020",
|
"71,020",
|
||||||
|
|
@ -1552,7 +1546,8 @@ data_stream_two_tables_2 = [
|
||||||
"2,844",
|
"2,844",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
"Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. "
|
||||||
|
".\n.\n.\n.\n.",
|
||||||
"515,689",
|
"515,689",
|
||||||
"326,563",
|
"326,563",
|
||||||
"176,169",
|
"176,169",
|
||||||
|
|
@ -1560,7 +1555,8 @@ data_stream_two_tables_2 = [
|
||||||
"4,174",
|
"4,174",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
|
"Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||||
|
".\n.\n.\n. .\n.\n.\n. . . .",
|
||||||
"26,347",
|
"26,347",
|
||||||
"14,581",
|
"14,581",
|
||||||
"11,031",
|
"11,031",
|
||||||
|
|
@ -1568,7 +1564,8 @@ data_stream_two_tables_2 = [
|
||||||
"192",
|
"192",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. .\n.",
|
"All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. "
|
||||||
|
".\n.",
|
||||||
"2,929,217",
|
"2,929,217",
|
||||||
"1,937,221",
|
"1,937,221",
|
||||||
"911,670",
|
"911,670",
|
||||||
|
|
@ -1576,7 +1573,8 @@ data_stream_two_tables_2 = [
|
||||||
"36,446",
|
"36,446",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n.\n.\n. .\n. . . .",
|
"Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
|
||||||
|
".\n.\n.\n.\n.\n. .\n. . . .",
|
||||||
"1,513",
|
"1,513",
|
||||||
"677",
|
"677",
|
||||||
"828",
|
"828",
|
||||||
|
|
@ -1592,7 +1590,8 @@ data_stream_two_tables_2 = [
|
||||||
"1,060",
|
"1,060",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"Runaways . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
|
"Runaways . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
|
||||||
|
".\n. .\n.\n.\n.\n. .",
|
||||||
"73,616",
|
"73,616",
|
||||||
"48,343",
|
"48,343",
|
||||||
"19,670",
|
"19,670",
|
||||||
|
|
@ -1600,14 +1599,6 @@ data_stream_two_tables_2 = [
|
||||||
"3,950",
|
"3,950",
|
||||||
],
|
],
|
||||||
["1 Except forcible rape and prostitution.", "", "", "", "", ""],
|
["1 Except forcible rape and prostitution.", "", "", "", "", ""],
|
||||||
[
|
|
||||||
"",
|
|
||||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, “Crime in the United States, Arrests,” September 2010,",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
],
|
|
||||||
]
|
]
|
||||||
|
|
||||||
data_stream_table_areas = [
|
data_stream_table_areas = [
|
||||||
|
|
@ -1634,10 +1625,12 @@ data_stream_columns = [
|
||||||
"Nombre Localidad",
|
"Nombre Localidad",
|
||||||
],
|
],
|
||||||
["Entidad", "", "Municipio", "", "Localidad", ""],
|
["Entidad", "", "Municipio", "", "Localidad", ""],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0094", "Granja Adelita"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0094",
|
||||||
|
"Granja Adelita"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0096", "Agua Azul"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0096", "Agua Azul"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0100", "Rancho Alegre"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0100", "Rancho Alegre"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0102", "Los Arbolitos [Rancho]"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0102",
|
||||||
|
"Los Arbolitos [Rancho]"],
|
||||||
[
|
[
|
||||||
"01",
|
"01",
|
||||||
"Aguascalientes",
|
"Aguascalientes",
|
||||||
|
|
@ -1655,7 +1648,8 @@ data_stream_columns = [
|
||||||
"0112",
|
"0112",
|
||||||
"Baj\xedo los V\xe1zquez",
|
"Baj\xedo los V\xe1zquez",
|
||||||
],
|
],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0113", "Baj\xedo de Montoro"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0113",
|
||||||
|
"Baj\xedo de Montoro"],
|
||||||
[
|
[
|
||||||
"01",
|
"01",
|
||||||
"Aguascalientes",
|
"Aguascalientes",
|
||||||
|
|
@ -1697,8 +1691,10 @@ data_stream_columns = [
|
||||||
"Ca\xf1ada Honda [Estaci\xf3n]",
|
"Ca\xf1ada Honda [Estaci\xf3n]",
|
||||||
],
|
],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0127", "Los Ca\xf1os"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0127", "Los Ca\xf1os"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0128", "El Cari\xf1\xe1n"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0128",
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0129", "El Carmen [Granja]"],
|
"El Cari\xf1\xe1n"],
|
||||||
|
["01", "Aguascalientes", "001", "Aguascalientes", "0129",
|
||||||
|
"El Carmen [Granja]"],
|
||||||
[
|
[
|
||||||
"01",
|
"01",
|
||||||
"Aguascalientes",
|
"Aguascalientes",
|
||||||
|
|
@ -1733,9 +1729,11 @@ data_stream_columns = [
|
||||||
"El Colorado (El Soyatal)",
|
"El Colorado (El Soyatal)",
|
||||||
],
|
],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0146", "El Conejal"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0146", "El Conejal"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0157", "Cotorina de Abajo"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0157",
|
||||||
|
"Cotorina de Abajo"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0162", "Coyotes"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0162", "Coyotes"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0166", "La Huerta (La Cruz)"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0166",
|
||||||
|
"La Huerta (La Cruz)"],
|
||||||
[
|
[
|
||||||
"01",
|
"01",
|
||||||
"Aguascalientes",
|
"Aguascalientes",
|
||||||
|
|
@ -1752,17 +1750,20 @@ data_stream_columns = [
|
||||||
"0171",
|
"0171",
|
||||||
"Los Cuervos (Los Ojos de Agua)",
|
"Los Cuervos (Los Ojos de Agua)",
|
||||||
],
|
],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0172", "San Jos\xe9 [Granja]"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0172",
|
||||||
|
"San Jos\xe9 [Granja]"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0176", "La Chiripa"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0176", "La Chiripa"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0182", "Dolores"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0182", "Dolores"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0183", "Los Dolores"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0183", "Los Dolores"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0190", "El Duraznillo"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0190", "El Duraznillo"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0191", "Los Dur\xf3n"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0191", "Los Dur\xf3n"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0197", "La Escondida"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0197", "La Escondida"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0201", "Brande Vin [Bodegas]"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0201",
|
||||||
|
"Brande Vin [Bodegas]"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0207", "Valle Redondo"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0207", "Valle Redondo"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0209", "La Fortuna"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0209", "La Fortuna"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0212", "Lomas del Gachup\xedn"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0212",
|
||||||
|
"Lomas del Gachup\xedn"],
|
||||||
[
|
[
|
||||||
"01",
|
"01",
|
||||||
"Aguascalientes",
|
"Aguascalientes",
|
||||||
|
|
@ -1772,22 +1773,12 @@ data_stream_columns = [
|
||||||
"El Carmen (Gallinas G\xfceras) [Rancho]",
|
"El Carmen (Gallinas G\xfceras) [Rancho]",
|
||||||
],
|
],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0216", "La Gloria"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0216", "La Gloria"],
|
||||||
["01", "Aguascalientes", "001", "Aguascalientes", "0226", "Hacienda Nueva"],
|
["01", "Aguascalientes", "001", "Aguascalientes", "0226",
|
||||||
|
"Hacienda Nueva"],
|
||||||
]
|
]
|
||||||
|
|
||||||
data_stream_split_text = [
|
data_stream_split_text = [
|
||||||
[
|
["FEB", "RUAR", "Y 2014 M27 (BUS)", "", "", "", "", "", "", ""],
|
||||||
"FEB",
|
|
||||||
"RUAR",
|
|
||||||
"Y 2014 M27 (BUS)",
|
|
||||||
"",
|
|
||||||
"ALPHABETIC LISTING BY T",
|
|
||||||
"YPE",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"ABLPDM27",
|
|
||||||
],
|
|
||||||
["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
|
["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
|
||||||
["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
|
["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
|
||||||
["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
|
["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
|
||||||
|
|
@ -1977,7 +1968,18 @@ data_stream_split_text = [
|
||||||
"(872) 825-8309",
|
"(872) 825-8309",
|
||||||
"2014/04/11",
|
"2014/04/11",
|
||||||
],
|
],
|
||||||
["", "", "A SENSU JAPANESE", "", "7123 SOUTH 92ND EAST", "", "", "", "", ""],
|
[
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"A SENSU JAPANESE",
|
||||||
|
"",
|
||||||
|
"7123 SOUTH 92ND EAST",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"625422",
|
"625422",
|
||||||
"BAW",
|
"BAW",
|
||||||
|
|
@ -2029,7 +2031,18 @@ data_stream_split_text = [
|
||||||
"(580) 928-2700",
|
"(580) 928-2700",
|
||||||
"2014/09/08",
|
"2014/09/08",
|
||||||
],
|
],
|
||||||
["", "", "ANDOLINI'S PIZZERIA &", "", "12140 EAST 96TH STREET", "", "", "", "", ""],
|
[
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"ANDOLINI'S PIZZERIA &",
|
||||||
|
"",
|
||||||
|
"12140 EAST 96TH STREET",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
],
|
||||||
[
|
[
|
||||||
"428377",
|
"428377",
|
||||||
"BAW",
|
"BAW",
|
||||||
|
|
@ -2148,7 +2161,8 @@ data_stream_flag_size = [
|
||||||
"from SBI",
|
"from SBI",
|
||||||
"from",
|
"from",
|
||||||
],
|
],
|
||||||
["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other", "NCDC"],
|
["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other",
|
||||||
|
"NCDC"],
|
||||||
["", "", "", "", "", "& FIs", "", "", "", "Banks", ""],
|
["", "", "", "", "", "& FIs", "", "", "", "Banks", ""],
|
||||||
["1", "2=", "3", "4", "5", "6=", "7", "8", "9", "10", "11"],
|
["1", "2=", "3", "4", "5", "6=", "7", "8", "9", "10", "11"],
|
||||||
["", "(3 to 6)+14", "", "", "", "(7 to13)", "", "", "", "", ""],
|
["", "(3 to 6)+14", "", "", "", "(7 to13)", "", "", "", "", ""],
|
||||||
|
|
@ -2165,7 +2179,8 @@ data_stream_flag_size = [
|
||||||
"-",
|
"-",
|
||||||
"0.25",
|
"0.25",
|
||||||
],
|
],
|
||||||
["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-", "-", "-"],
|
["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-",
|
||||||
|
"-", "-"],
|
||||||
[
|
[
|
||||||
"Assam",
|
"Assam",
|
||||||
"12.69",
|
"12.69",
|
||||||
|
|
@ -2194,8 +2209,10 @@ data_stream_flag_size = [
|
||||||
],
|
],
|
||||||
["Chhattisgarh", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
["Chhattisgarh", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||||
["Goa", "1.4", "1.02", "-", "-", "0.38", "0.31", "-", "0.07", "-", "-"],
|
["Goa", "1.4", "1.02", "-", "-", "0.38", "0.31", "-", "0.07", "-", "-"],
|
||||||
["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11", "-", "0.44"],
|
["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11",
|
||||||
["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64", "-", "0.49"],
|
"-", "0.44"],
|
||||||
|
["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64",
|
||||||
|
"-", "0.49"],
|
||||||
[
|
[
|
||||||
"Himachal Pradesh",
|
"Himachal Pradesh",
|
||||||
"8.02",
|
"8.02",
|
||||||
|
|
@ -2223,7 +2240,8 @@ data_stream_flag_size = [
|
||||||
"-",
|
"-",
|
||||||
],
|
],
|
||||||
["Jharkhand", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
["Jharkhand", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||||
["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89", "-", "0.69"],
|
["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89",
|
||||||
|
"-", "0.69"],
|
||||||
[
|
[
|
||||||
"Kerala",
|
"Kerala",
|
||||||
"29.03",
|
"29.03",
|
||||||
|
|
@ -2263,11 +2281,16 @@ data_stream_flag_size = [
|
||||||
"0.02",
|
"0.02",
|
||||||
"2.89",
|
"2.89",
|
||||||
],
|
],
|
||||||
["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-", "0.09"],
|
["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-",
|
||||||
["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05", "-", "0.03"],
|
"0.09"],
|
||||||
["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-", "-", "0.03"],
|
["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05",
|
||||||
["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-", "0.04"],
|
"-", "0.03"],
|
||||||
["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66", "-", "0.2"],
|
["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-",
|
||||||
|
"-", "0.03"],
|
||||||
|
["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-",
|
||||||
|
"0.04"],
|
||||||
|
["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66",
|
||||||
|
"-", "0.2"],
|
||||||
[
|
[
|
||||||
"Punjab",
|
"Punjab",
|
||||||
"19.18",
|
"19.18",
|
||||||
|
|
@ -2295,8 +2318,10 @@ data_stream_flag_size = [
|
||||||
"0.81",
|
"0.81",
|
||||||
],
|
],
|
||||||
["Sikkim", "0.16", "-", "-", "-", "0.16", "0.03", "-", "-", "-", "0.01"],
|
["Sikkim", "0.16", "-", "-", "-", "0.16", "0.03", "-", "-", "-", "0.01"],
|
||||||
["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-", "0.68"],
|
["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-",
|
||||||
["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-", "0.02"],
|
"0.68"],
|
||||||
|
["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-",
|
||||||
|
"0.02"],
|
||||||
["Uttaranchal", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
["Uttaranchal", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||||
[
|
[
|
||||||
"Uttar Pradesh",
|
"Uttar Pradesh",
|
||||||
|
|
@ -2393,11 +2418,13 @@ data_stream_edge_tol = [
|
||||||
["Costs", "(0.21)"],
|
["Costs", "(0.21)"],
|
||||||
["T\notal investment result per unit", "3.78"],
|
["T\notal investment result per unit", "3.78"],
|
||||||
[
|
[
|
||||||
"1 The results cover the period from inception of the Fund at 8 April 2016 through 31 December 2016.",
|
"1 The results cover the period from inception of the Fund at "
|
||||||
|
"8 April 2016 through 31 December 2016.",
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
"2 The result per unit is calculated using the total number of outstanding unit as per the end of the",
|
"2 The result per unit is calculated using the total number of "
|
||||||
|
"outstanding unit as per the end of the",
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
["period.", ""],
|
["period.", ""],
|
||||||
|
|
@ -2454,7 +2481,8 @@ data_lattice_table_rotated = [
|
||||||
"Men",
|
"Men",
|
||||||
"Women",
|
"Women",
|
||||||
],
|
],
|
||||||
["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645", "2391"],
|
["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645",
|
||||||
|
"2391"],
|
||||||
[
|
[
|
||||||
"Tamil Nadu",
|
"Tamil Nadu",
|
||||||
"7387",
|
"7387",
|
||||||
|
|
@ -2503,11 +2531,16 @@ data_lattice_table_rotated = [
|
||||||
"1417",
|
"1417",
|
||||||
"1599",
|
"1599",
|
||||||
],
|
],
|
||||||
["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122", "2503"],
|
["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122",
|
||||||
["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579", "1709"],
|
"2503"],
|
||||||
["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093", "1628"],
|
["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579",
|
||||||
["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413", "2027"],
|
"1709"],
|
||||||
["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185", "1366"],
|
["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093",
|
||||||
|
"1628"],
|
||||||
|
["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413",
|
||||||
|
"2027"],
|
||||||
|
["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185",
|
||||||
|
"1366"],
|
||||||
[
|
[
|
||||||
"Pooled",
|
"Pooled",
|
||||||
"38742",
|
"38742",
|
||||||
|
|
@ -2573,7 +2606,8 @@ data_lattice_two_tables_2 = [
|
||||||
]
|
]
|
||||||
|
|
||||||
data_lattice_table_regions = [
|
data_lattice_table_regions = [
|
||||||
["Età dell’Assicurato \nall’epoca del decesso", "Misura % di \nmaggiorazione"],
|
["Età dell’Assicurato \nall’epoca del decesso",
|
||||||
|
"Misura % di \nmaggiorazione"],
|
||||||
["18-75", "1,00%"],
|
["18-75", "1,00%"],
|
||||||
["76-80", "0,50%"],
|
["76-80", "0,50%"],
|
||||||
["81 in poi", "0,10%"],
|
["81 in poi", "0,10%"],
|
||||||
|
|
@ -2596,10 +2630,12 @@ data_lattice_table_areas = [
|
||||||
["Kerala", "2400", "7.2", "0.5", "25.3", "20.1", "41.5", "5.5", ""],
|
["Kerala", "2400", "7.2", "0.5", "25.3", "20.1", "41.5", "5.5", ""],
|
||||||
["Tamil Nadu", "2400", "21.4", "2.3", "8.8", "35.5", "25.8", "6.2", ""],
|
["Tamil Nadu", "2400", "21.4", "2.3", "8.8", "35.5", "25.8", "6.2", ""],
|
||||||
["Karnataka", "2399", "37.4", "2.8", "12.5", "18.3", "23.1", "5.8", ""],
|
["Karnataka", "2399", "37.4", "2.8", "12.5", "18.3", "23.1", "5.8", ""],
|
||||||
["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9", ""],
|
["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9",
|
||||||
|
""],
|
||||||
["Maharashtra", "2400", "22.0", "0.9", "17.3", "20.3", "32.6", "7.0", ""],
|
["Maharashtra", "2400", "22.0", "0.9", "17.3", "20.3", "32.6", "7.0", ""],
|
||||||
["Gujarat", "2390", "28.6", "0.1", "14.4", "23.1", "26.9", "6.8", ""],
|
["Gujarat", "2390", "28.6", "0.1", "14.4", "23.1", "26.9", "6.8", ""],
|
||||||
["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6", ""],
|
["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6",
|
||||||
|
""],
|
||||||
["Orissa", "2405", "33.2", "1.0", "10.4", "25.7", "21.2", "8.5", ""],
|
["Orissa", "2405", "33.2", "1.0", "10.4", "25.7", "21.2", "8.5", ""],
|
||||||
["West Bengal", "2293", "41.7", "4.4", "13.2", "17.1", "21.2", "2.4", ""],
|
["West Bengal", "2293", "41.7", "4.4", "13.2", "17.1", "21.2", "2.4", ""],
|
||||||
["Uttar Pradesh", "2400", "35.3", "2.1", "4.5", "23.3", "27.1", "7.6", ""],
|
["Uttar Pradesh", "2400", "35.3", "2.1", "4.5", "23.3", "27.1", "7.6", ""],
|
||||||
|
|
@ -2650,7 +2686,8 @@ data_lattice_process_background = [
|
||||||
"3,658",
|
"3,658",
|
||||||
"3,183",
|
"3,183",
|
||||||
],
|
],
|
||||||
["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173", "855"],
|
["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173",
|
||||||
|
"855"],
|
||||||
["Total", "", "47", "92", "11.81", "22,455", "19,584", "10,644"],
|
["Total", "", "47", "92", "11.81", "22,455", "19,584", "10,644"],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -2689,7 +2726,8 @@ data_lattice_copy_text = [
|
||||||
["COHS", "San Mateo", "Health Plan of San Mateo", "113,202"],
|
["COHS", "San Mateo", "Health Plan of San Mateo", "113,202"],
|
||||||
["COHS", "Ventura", "Gold Coast Health Plan", "202,217"],
|
["COHS", "Ventura", "Gold Coast Health Plan", "202,217"],
|
||||||
["COHS", "Total COHS Enrollment", "", "2,176,064"],
|
["COHS", "Total COHS Enrollment", "", "2,176,064"],
|
||||||
["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "", "10,132,022"],
|
["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "",
|
||||||
|
"10,132,022"],
|
||||||
["PCCM", "Los Angeles", "AIDS Healthcare Foundation", "828"],
|
["PCCM", "Los Angeles", "AIDS Healthcare Foundation", "828"],
|
||||||
["PCCM", "San Francisco", "Family Mosaic", "25"],
|
["PCCM", "San Francisco", "Family Mosaic", "25"],
|
||||||
["PCCM", "Total PHP Enrollment", "", "853"],
|
["PCCM", "Total PHP Enrollment", "", "853"],
|
||||||
|
|
@ -2721,7 +2759,8 @@ data_lattice_shift_text_left_top = [
|
||||||
],
|
],
|
||||||
["Blood Pressure #", "2400", "Men (≥ 18yrs)", "10%", "95%", "20%", "1728"],
|
["Blood Pressure #", "2400", "Men (≥ 18yrs)", "10%", "95%", "20%", "1728"],
|
||||||
["", "", "Women (≥ 18 yrs)", "", "", "", "1728"],
|
["", "", "Women (≥ 18 yrs)", "", "", "", "1728"],
|
||||||
["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%",
|
||||||
|
"1825"],
|
||||||
["", "", "Women (≥ 18 yrs)", "", "", "", "1825"],
|
["", "", "Women (≥ 18 yrs)", "", "", "", "1825"],
|
||||||
[
|
[
|
||||||
"Knowledge &\nPractices on HTN &\nDM",
|
"Knowledge &\nPractices on HTN &\nDM",
|
||||||
|
|
@ -2746,7 +2785,8 @@ data_lattice_shift_text_disable = [
|
||||||
"Sample size\nper State",
|
"Sample size\nper State",
|
||||||
],
|
],
|
||||||
["Anthropometry", "", "", "", "", "", ""],
|
["Anthropometry", "", "", "", "", "", ""],
|
||||||
["Clinical Examination", "2400", "", "All the available individuals", "", "", ""],
|
["Clinical Examination", "2400", "", "All the available individuals",
|
||||||
|
"", "", ""],
|
||||||
["History of morbidity", "", "", "", "", "", ""],
|
["History of morbidity", "", "", "", "", "", ""],
|
||||||
[
|
[
|
||||||
"Diet survey",
|
"Diet survey",
|
||||||
|
|
@ -2758,9 +2798,11 @@ data_lattice_shift_text_disable = [
|
||||||
"",
|
"",
|
||||||
],
|
],
|
||||||
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
||||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
|
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
|
||||||
|
"1728"],
|
||||||
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
||||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
|
||||||
|
"1825"],
|
||||||
[
|
[
|
||||||
"Knowledge &\nPractices on HTN &",
|
"Knowledge &\nPractices on HTN &",
|
||||||
"2400",
|
"2400",
|
||||||
|
|
@ -2785,7 +2827,8 @@ data_lattice_shift_text_right_bottom = [
|
||||||
],
|
],
|
||||||
["Anthropometry", "", "", "", "", "", ""],
|
["Anthropometry", "", "", "", "", "", ""],
|
||||||
["Clinical Examination", "", "", "", "", "", ""],
|
["Clinical Examination", "", "", "", "", "", ""],
|
||||||
["History of morbidity", "2400", "", "", "", "", "All the available individuals"],
|
["History of morbidity", "2400", "", "", "", "",
|
||||||
|
"All the available individuals"],
|
||||||
[
|
[
|
||||||
"Diet survey",
|
"Diet survey",
|
||||||
"1200",
|
"1200",
|
||||||
|
|
@ -2796,9 +2839,11 @@ data_lattice_shift_text_right_bottom = [
|
||||||
"All the individuals partaking meals in the HH",
|
"All the individuals partaking meals in the HH",
|
||||||
],
|
],
|
||||||
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
||||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
|
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
|
||||||
|
"1728"],
|
||||||
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
||||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
|
||||||
|
"1825"],
|
||||||
["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
|
["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
|
||||||
[
|
[
|
||||||
"Knowledge &\nPractices on HTN &\nDM",
|
"Knowledge &\nPractices on HTN &\nDM",
|
||||||
|
|
@ -2820,7 +2865,7 @@ data_arabic = [
|
||||||
]
|
]
|
||||||
|
|
||||||
data_stream_layout_kwargs = [
|
data_stream_layout_kwargs = [
|
||||||
["V i n s a u Ve r r e", ""],
|
["V i n s a u V e r r e", ""],
|
||||||
["Les Blancs", "12.5CL"],
|
["Les Blancs", "12.5CL"],
|
||||||
["A.O.P Côtes du Rhône", ""],
|
["A.O.P Côtes du Rhône", ""],
|
||||||
["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"],
|
["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"],
|
||||||
|
|
|
||||||
|
Before Width: | Height: | Size: 8.2 KiB After Width: | Height: | Size: 8.2 KiB |
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 6.7 KiB After Width: | Height: | Size: 6.7 KiB |
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 9.7 KiB |
|
Before Width: | Height: | Size: 8.8 KiB After Width: | Height: | Size: 8.9 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 19 KiB |
|
|
@ -19,10 +19,16 @@ def test_help_output():
|
||||||
output = result.output
|
output = result.output
|
||||||
|
|
||||||
assert prog_name == "camelot"
|
assert prog_name == "camelot"
|
||||||
assert result.output.startswith("Usage: %(prog_name)s [OPTIONS] COMMAND" % locals())
|
assert result.output.startswith(
|
||||||
|
"Usage: %(prog_name)s [OPTIONS] COMMAND" %
|
||||||
|
locals()
|
||||||
|
)
|
||||||
assert all(
|
assert all(
|
||||||
v in result.output
|
v in result.output
|
||||||
for v in ["Options:", "--version", "--help", "Commands:", "lattice", "stream"]
|
for v in [
|
||||||
|
"Options:", "--version", "--help", "Commands:", "lattice",
|
||||||
|
"stream"
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -120,21 +126,24 @@ def test_cli_output_format():
|
||||||
# json
|
# json
|
||||||
result = runner.invoke(
|
result = runner.invoke(
|
||||||
cli,
|
cli,
|
||||||
["--format", "json", "--output", outfile.format("json"), "stream", infile],
|
["--format", "json", "--output", outfile.format("json"), "stream",
|
||||||
|
infile],
|
||||||
)
|
)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
# excel
|
# excel
|
||||||
result = runner.invoke(
|
result = runner.invoke(
|
||||||
cli,
|
cli,
|
||||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile],
|
["--format", "excel", "--output", outfile.format("xlsx"), "stream",
|
||||||
|
infile],
|
||||||
)
|
)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
# html
|
# html
|
||||||
result = runner.invoke(
|
result = runner.invoke(
|
||||||
cli,
|
cli,
|
||||||
["--format", "html", "--output", outfile.format("html"), "stream", infile],
|
["--format", "html", "--output", outfile.format("html"), "stream",
|
||||||
|
infile],
|
||||||
)
|
)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
|
@ -166,6 +175,10 @@ def test_cli_quiet():
|
||||||
assert "No tables found on page-1" in result.output
|
assert "No tables found on page-1" in result.output
|
||||||
|
|
||||||
result = runner.invoke(
|
result = runner.invoke(
|
||||||
cli, ["--quiet", "--format", "csv", "--output", outfile, "stream", infile]
|
cli,
|
||||||
|
[
|
||||||
|
"--quiet", "--format", "csv", "--output", outfile, "stream",
|
||||||
|
infile
|
||||||
|
]
|
||||||
)
|
)
|
||||||
assert "No tables found on page-1" not in result.output
|
assert "No tables found on page-1" not in result.output
|
||||||
|
|
|
||||||
|
|
@ -11,12 +11,15 @@ from camelot.__version__ import generate_version
|
||||||
|
|
||||||
from .data import *
|
from .data import *
|
||||||
|
|
||||||
|
|
||||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
|
|
||||||
|
|
||||||
def test_parsing_report():
|
def test_parsing_report():
|
||||||
parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
|
parsing_report = {
|
||||||
|
"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1
|
||||||
|
}
|
||||||
|
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
|
|
@ -28,9 +31,17 @@ def test_password():
|
||||||
|
|
||||||
filename = os.path.join(testdir, "health_protected.pdf")
|
filename = os.path.join(testdir, "health_protected.pdf")
|
||||||
tables = camelot.read_pdf(filename, password="ownerpass", flavor="stream")
|
tables = camelot.read_pdf(filename, password="ownerpass", flavor="stream")
|
||||||
|
<<<<<<< HEAD
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
|
tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
|
||||||
|
=======
|
||||||
|
assert len(tables) == 1
|
||||||
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
|
||||||
|
assert len(tables) == 1
|
||||||
|
>>>>>>> Fix unit tests, lint, drop Python 2 support
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -229,9 +240,9 @@ def test_repr():
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert \
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
repr(tables[0].cells[0][0]) == \
|
||||||
)
|
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||||
|
|
||||||
|
|
||||||
def test_pages():
|
def test_pages():
|
||||||
|
|
@ -239,22 +250,23 @@ def test_pages():
|
||||||
tables = camelot.read_pdf(url)
|
tables = camelot.read_pdf(url)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert \
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
repr(tables[0].cells[0][0]) == \
|
||||||
)
|
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||||
|
|
||||||
tables = camelot.read_pdf(url, pages="1-end")
|
tables = camelot.read_pdf(url, pages="1-end")
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert \
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
repr(tables[0].cells[0][0]) == \
|
||||||
)
|
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||||
|
|
||||||
tables = camelot.read_pdf(url, pages="all")
|
tables = camelot.read_pdf(url, pages="all")
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert (
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
repr(tables[0].cells[0][0]) ==
|
||||||
|
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -264,7 +276,8 @@ def test_url():
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert (
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
repr(tables[0].cells[0][0]) ==
|
||||||
|
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -284,7 +297,12 @@ def test_table_order():
|
||||||
return t
|
return t
|
||||||
|
|
||||||
table_list = TableList(
|
table_list = TableList(
|
||||||
[_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
|
[
|
||||||
|
_make_table(2, 1),
|
||||||
|
_make_table(1, 1),
|
||||||
|
_make_table(3, 4),
|
||||||
|
_make_table(1, 2)
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
assert [(t.page, t.order) for t in sorted(table_list)] == [
|
assert [(t.page, t.order) for t in sorted(table_list)] == [
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,30 @@ import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
|
||||||
import camelot
|
import camelot
|
||||||
|
|
||||||
|
# The version of Matplotlib has an impact on some of the tests. Unfortunately,
|
||||||
|
# we can't enforce usage of a recent version of MatplotLib without dropping
|
||||||
|
# support for Python 3.6.
|
||||||
|
# To check the version of matplotlib installed:
|
||||||
|
# pip freeze | grep matplotlib
|
||||||
|
# To force upgrade:
|
||||||
|
# pip install --upgrade --force-reinstall matplotlib
|
||||||
|
# To force usage of a Python 3.6 compatible version:
|
||||||
|
# pip install "matplotlib==2.2.5"
|
||||||
|
# This condition can be removed in favor of a version requirement bump for
|
||||||
|
# matplotlib once support for Python 3.5 is dropped.
|
||||||
|
|
||||||
|
LEGACY_MATPLOTLIB = matplotlib.__version__ < "3.2.1"
|
||||||
|
|
||||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||||
|
reason="depends on a recent version of MatPlotLib")
|
||||||
@pytest.mark.mpl_image_compare(
|
@pytest.mark.mpl_image_compare(
|
||||||
baseline_dir="files/baseline_plots", remove_text=True)
|
baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_text_plot():
|
def test_text_plot():
|
||||||
|
|
@ -26,6 +43,15 @@ def test_grid_plot():
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
return camelot.plot(tables[0], kind='grid')
|
return camelot.plot(tables[0], kind='grid')
|
||||||
|
|
||||||
|
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||||
|
reason="depends on a recent version of MatPlotLib")
|
||||||
|
@pytest.mark.mpl_image_compare(
|
||||||
|
baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
|
def test_stream_grid_plot():
|
||||||
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
|
tables = camelot.read_pdf(filename, flavor="stream")
|
||||||
|
return camelot.plot(tables[0], kind='grid')
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.mpl_image_compare(
|
@pytest.mark.mpl_image_compare(
|
||||||
baseline_dir="files/baseline_plots", remove_text=True)
|
baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
|
|
@ -35,6 +61,8 @@ def test_lattice_contour_plot():
|
||||||
return camelot.plot(tables[0], kind='contour')
|
return camelot.plot(tables[0], kind='contour')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||||
|
reason="depends on a recent version of MatPlotLib")
|
||||||
@pytest.mark.mpl_image_compare(
|
@pytest.mark.mpl_image_compare(
|
||||||
baseline_dir="files/baseline_plots", remove_text=True)
|
baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_stream_contour_plot():
|
def test_stream_contour_plot():
|
||||||
|
|
@ -51,6 +79,8 @@ def test_line_plot():
|
||||||
return camelot.plot(tables[0], kind='line')
|
return camelot.plot(tables[0], kind='line')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||||
|
reason="depends on a recent version of MatPlotLib")
|
||||||
@pytest.mark.mpl_image_compare(
|
@pytest.mark.mpl_image_compare(
|
||||||
baseline_dir="files/baseline_plots", remove_text=True)
|
baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_joint_plot():
|
def test_joint_plot():
|
||||||
|
|
@ -59,6 +89,8 @@ def test_joint_plot():
|
||||||
return camelot.plot(tables[0], kind='joint')
|
return camelot.plot(tables[0], kind='joint')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||||
|
reason="depends on a recent version of MatPlotLib")
|
||||||
@pytest.mark.mpl_image_compare(
|
@pytest.mark.mpl_image_compare(
|
||||||
baseline_dir="files/baseline_plots", remove_text=True)
|
baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_textedge_plot():
|
def test_textedge_plot():
|
||||||
|
|
|
||||||