Fix unit tests, lint, drop Python 2 support
Drop EOL Python 2 support. Resolve unit test discrepancies. Update unit tests to pass in Travis across all supported Py. Linting.pull/134/head
|
|
@ -0,0 +1,3 @@
|
|||
[bandit]
|
||||
# Ignore concerns about asserts, necessary for unit test code
|
||||
skips: B101,B102
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
version = 1
|
||||
|
||||
exclude_patterns = [
|
||||
"camelot/ext/**"
|
||||
]
|
||||
|
||||
[[analyzers]]
|
||||
name = "python"
|
||||
enabled = true
|
||||
|
||||
[analyzers.meta]
|
||||
runtime_version = "3.x.x"
|
||||
|
|
@ -4,6 +4,7 @@ __pycache__/
|
|||
|
||||
build/
|
||||
dist/
|
||||
prof/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
.coverage
|
||||
|
|
@ -17,3 +18,5 @@ htmlcov/
|
|||
|
||||
# vscode
|
||||
.vscode
|
||||
|
||||
.DS_Store
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
sudo: true
|
||||
language: python
|
||||
cache: pip
|
||||
addons:
|
||||
|
|
@ -8,10 +7,6 @@ install:
|
|||
- make install
|
||||
jobs:
|
||||
include:
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
python: '2.7'
|
||||
- stage: test
|
||||
script:
|
||||
- make test
|
||||
|
|
|
|||
14
README.md
|
|
@ -7,7 +7,7 @@
|
|||
[](https://travis-ci.org/camelot-dev/camelot) [](https://camelot-py.readthedocs.io/en/master/)
|
||||
[](https://codecov.io/github/camelot-dev/camelot?branch=master)
|
||||
[](https://pypi.org/project/camelot-py/) [](https://pypi.org/project/camelot-py/) [](https://pypi.org/project/camelot-py/) [](https://gitter.im/camelot-dev/Lobby)
|
||||
[](https://github.com/ambv/black)
|
||||
[](https://github.com/ambv/black) [](https://deepsource.io/gh/camelot-dev/camelot/?ref=repository-badge)
|
||||
|
||||
|
||||
**Camelot** is a Python library that makes it easy for *anyone* to extract tables from PDF files!
|
||||
|
|
@ -71,7 +71,7 @@ $ conda install -c conda-forge camelot-py
|
|||
|
||||
### Using pip
|
||||
|
||||
After [installing the dependencies](https://camelot-py.readthedocs.io/en/master/user/install-deps.html) ([tk](https://packages.ubuntu.com/trusty/python-tk) and [ghostscript](https://www.ghostscript.com/)), you can simply use pip to install Camelot:
|
||||
After [installing the dependencies](https://camelot-py.readthedocs.io/en/master/user/install-deps.html) ([tk](https://packages.ubuntu.com/bionic/python/python-tk) and [ghostscript](https://www.ghostscript.com/)), you can simply use pip to install Camelot:
|
||||
|
||||
<pre>
|
||||
$ pip install camelot-py[cv]
|
||||
|
|
@ -124,6 +124,10 @@ After installation, you can run tests using:
|
|||
$ python setup.py test
|
||||
</pre>
|
||||
|
||||
## Wrappers
|
||||
|
||||
- [camelot-php](https://github.com/randomstate/camelot-php) provides a [PHP](https://www.php.net/) wrapper on Camelot.
|
||||
|
||||
## Versioning
|
||||
|
||||
Camelot uses [Semantic Versioning](https://semver.org/). For the available versions, see the tags on this repository. For the changelog, you can check out [HISTORY.md](https://github.com/camelot-dev/camelot/blob/master/HISTORY.md).
|
||||
|
|
@ -132,4 +136,8 @@ Camelot uses [Semantic Versioning](https://semver.org/). For the available versi
|
|||
|
||||
This project is licensed under the MIT License, see the [LICENSE](https://github.com/camelot-dev/camelot/blob/master/LICENSE) file for details.
|
||||
|
||||
<img src="http://i65.tinypic.com/9h4ajs.png" align="centre" />
|
||||
## Support the development
|
||||
|
||||
You can support our work on Camelot with a one-time or monthly donation [on OpenCollective](https://opencollective.com/camelot). Organizations who use camelot can also sponsor the project for an acknowledgement on [our documentation site](https://camelot-py.readthedocs.io/en/master/) and this README.
|
||||
|
||||
Special thanks to all the users, organizations and contributors that support Camelot!
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class TextEdge(object):
|
|||
intersections: int
|
||||
Number of intersections with horizontal text rows.
|
||||
is_valid: bool
|
||||
A text edge is valid if it intersections with at least
|
||||
A text edge is valid if it intersects with at least
|
||||
TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.
|
||||
|
||||
"""
|
||||
|
|
@ -65,7 +65,8 @@ class TextEdge(object):
|
|||
the is_valid attribute.
|
||||
"""
|
||||
if np.isclose(self.y0, y0, atol=edge_tol):
|
||||
self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
|
||||
self.x = (self.intersections * self.x + x) / \
|
||||
float(self.intersections + 1)
|
||||
self.y0 = y0
|
||||
self.intersections += 1
|
||||
# a textedge is valid only if it extends uninterrupted
|
||||
|
|
@ -141,13 +142,16 @@ class TextEdges(object):
|
|||
"""
|
||||
intersections_sum = {
|
||||
"left": sum(
|
||||
te.intersections for te in self._textedges["left"] if te.is_valid
|
||||
te.intersections for te in self._textedges["left"]
|
||||
if te.is_valid
|
||||
),
|
||||
"right": sum(
|
||||
te.intersections for te in self._textedges["right"] if te.is_valid
|
||||
te.intersections for te in self._textedges["right"]
|
||||
if te.is_valid
|
||||
),
|
||||
"middle": sum(
|
||||
te.intersections for te in self._textedges["middle"] if te.is_valid
|
||||
te.intersections for te in self._textedges["middle"]
|
||||
if te.is_valid
|
||||
),
|
||||
}
|
||||
|
||||
|
|
@ -292,7 +296,10 @@ class Cell(object):
|
|||
|
||||
def __repr__(self):
|
||||
return "<Cell x1={} y1={} x2={} y2={}>".format(
|
||||
round(self.x1, 2), round(self.y1, 2), round(self.x2, 2), round(self.y2, 2)
|
||||
round(self.x1, 2),
|
||||
round(self.y1, 2),
|
||||
round(self.x2, 2),
|
||||
round(self.y2, 2)
|
||||
)
|
||||
|
||||
@property
|
||||
|
|
@ -342,7 +349,9 @@ class Table(object):
|
|||
def __init__(self, cols, rows):
|
||||
self.cols = cols
|
||||
self.rows = rows
|
||||
self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
|
||||
self.cells = [
|
||||
[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows
|
||||
]
|
||||
self.df = None
|
||||
self.shape = (0, 0)
|
||||
self.accuracy = 0
|
||||
|
|
@ -579,7 +588,8 @@ class Table(object):
|
|||
Output filepath.
|
||||
|
||||
"""
|
||||
kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
|
||||
kw = {"encoding": "utf-8", "index": False, "header": False,
|
||||
"quoting": 1}
|
||||
kw.update(kwargs)
|
||||
self.df.to_csv(path, **kw)
|
||||
|
||||
|
|
@ -616,6 +626,7 @@ class Table(object):
|
|||
"encoding": "utf-8",
|
||||
}
|
||||
kw.update(kwargs)
|
||||
# pylint: disable=abstract-class-instantiated
|
||||
writer = pd.ExcelWriter(path)
|
||||
self.df.to_excel(writer, **kw)
|
||||
writer.save()
|
||||
|
|
@ -692,7 +703,8 @@ class TableList(object):
|
|||
ext = kwargs.get("ext")
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order,
|
||||
ext)
|
||||
)
|
||||
filepath = os.path.join(dirname, filename)
|
||||
to_format = self._format_func(table, f)
|
||||
|
|
@ -707,7 +719,10 @@ class TableList(object):
|
|||
with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
|
||||
for table in self._tables:
|
||||
filename = os.path.join(
|
||||
"{}-page-{}-table-{}{}".format(root, table.page, table.order, ext)
|
||||
"{}-page-{}-table-{}{}".format(root,
|
||||
table.page,
|
||||
table.order,
|
||||
ext)
|
||||
)
|
||||
filepath = os.path.join(dirname, filename)
|
||||
z.write(filepath, os.path.basename(filepath))
|
||||
|
|
@ -739,10 +754,12 @@ class TableList(object):
|
|||
self._compress_dir(**kwargs)
|
||||
elif f == "excel":
|
||||
filepath = os.path.join(dirname, basename)
|
||||
# pylint: disable=abstract-class-instantiated
|
||||
writer = pd.ExcelWriter(filepath)
|
||||
for table in self._tables:
|
||||
sheet_name = "page-{}-table-{}".format(table.page, table.order)
|
||||
table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
|
||||
table.df.to_excel(writer, sheet_name=sheet_name,
|
||||
encoding="utf-8")
|
||||
writer.save()
|
||||
if compress:
|
||||
zipname = os.path.join(os.path.dirname(path), root) + ".zip"
|
||||
|
|
|
|||
|
|
@ -113,14 +113,20 @@ class PDFHandler(object):
|
|||
outfile.addPage(p)
|
||||
with open(fpath, "wb") as f:
|
||||
outfile.write(f)
|
||||
layout, dim = get_page_layout(fpath)
|
||||
layout, __ = get_page_layout(fpath)
|
||||
# fix rotated PDF
|
||||
chars = get_text_objects(layout, ltype="char")
|
||||
horizontal_text = get_text_objects(layout, ltype="horizontal_text")
|
||||
vertical_text = get_text_objects(layout, ltype="vertical_text")
|
||||
rotation = get_rotation(chars, horizontal_text, vertical_text)
|
||||
if rotation != "":
|
||||
fpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
|
||||
fpath_new = "".join(
|
||||
[
|
||||
froot.replace("page", "p"),
|
||||
"_rotated",
|
||||
fext
|
||||
]
|
||||
)
|
||||
os.rename(fpath, fpath_new)
|
||||
infile = PdfFileReader(open(fpath_new, "rb"), strict=False)
|
||||
if infile.isEncrypted:
|
||||
|
|
@ -136,7 +142,8 @@ class PDFHandler(object):
|
|||
outfile.write(f)
|
||||
|
||||
def parse(
|
||||
self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
|
||||
self, flavor="lattice", suppress_stdout=False, layout_kwargs=None,
|
||||
**kwargs
|
||||
):
|
||||
"""Extracts tables by calling parser.get_tables on all single
|
||||
page PDFs.
|
||||
|
|
@ -149,7 +156,7 @@ class PDFHandler(object):
|
|||
suppress_stdout : str (default: False)
|
||||
Suppress logs and warnings.
|
||||
layout_kwargs : dict, optional (default: {})
|
||||
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
|
||||
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs. # noqa
|
||||
kwargs : dict
|
||||
See camelot.read_pdf kwargs.
|
||||
|
||||
|
|
@ -159,17 +166,21 @@ class PDFHandler(object):
|
|||
List of tables found in PDF.
|
||||
|
||||
"""
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
tables = []
|
||||
with TemporaryDirectory() as tempdir:
|
||||
for p in self.pages:
|
||||
self._save_page(self.filepath, p, tempdir)
|
||||
pages = [
|
||||
os.path.join(tempdir, "page-{0}.pdf".format(p)) for p in self.pages
|
||||
os.path.join(tempdir, "page-{0}.pdf".format(p))
|
||||
for p in self.pages
|
||||
]
|
||||
parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
|
||||
parser = Lattice(**kwargs) \
|
||||
if flavor == "lattice" else Stream(**kwargs)
|
||||
for p in pages:
|
||||
t = parser.extract_tables(
|
||||
p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
|
||||
p, suppress_stdout=suppress_stdout,
|
||||
layout_kwargs=layout_kwargs
|
||||
)
|
||||
tables.extend(t)
|
||||
return TableList(sorted(tables))
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ def read_pdf(
|
|||
password=None,
|
||||
flavor="lattice",
|
||||
suppress_stdout=False,
|
||||
layout_kwargs={},
|
||||
layout_kwargs=None,
|
||||
**kwargs
|
||||
):
|
||||
"""Read PDF and return extracted tables.
|
||||
|
|
@ -80,16 +80,16 @@ def read_pdf(
|
|||
Size of a pixel neighborhood that is used to calculate a
|
||||
threshold value for the pixel: 3, 5, 7, and so on.
|
||||
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||
threshold_constant* : int, optional (default: -2)
|
||||
Constant subtracted from the mean or weighted mean.
|
||||
Normally, it is positive but may be zero or negative as well.
|
||||
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||
iterations* : int, optional (default: 0)
|
||||
Number of times for erosion/dilation is applied.
|
||||
|
||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
|
||||
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_. # noqa
|
||||
resolution* : int, optional (default: 300)
|
||||
Resolution used for PDF to PNG conversion.
|
||||
|
||||
|
|
@ -98,6 +98,7 @@ def read_pdf(
|
|||
tables : camelot.core.TableList
|
||||
|
||||
"""
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
if flavor not in ["lattice", "stream"]:
|
||||
raise NotImplementedError(
|
||||
"Unknown flavor specified." " Use either 'lattice' or 'stream'"
|
||||
|
|
|
|||
|
|
@ -12,9 +12,18 @@ class BaseParser(object):
|
|||
def _generate_layout(self, filename, layout_kwargs):
|
||||
self.filename = filename
|
||||
self.layout_kwargs = layout_kwargs
|
||||
self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)
|
||||
self.layout, self.dimensions = get_page_layout(
|
||||
filename,
|
||||
**layout_kwargs
|
||||
)
|
||||
self.images = get_text_objects(self.layout, ltype="image")
|
||||
self.horizontal_text = get_text_objects(self.layout, ltype="horizontal_text")
|
||||
self.vertical_text = get_text_objects(self.layout, ltype="vertical_text")
|
||||
self.horizontal_text = get_text_objects(
|
||||
self.layout,
|
||||
ltype="horizontal_text"
|
||||
)
|
||||
self.vertical_text = get_text_objects(
|
||||
self.layout,
|
||||
ltype="vertical_text"
|
||||
)
|
||||
self.pdf_width, self.pdf_height = self.dimensions
|
||||
self.rootname, __ = os.path.splitext(self.filename)
|
||||
|
|
|
|||
|
|
@ -2,14 +2,10 @@
|
|||
|
||||
from __future__ import division
|
||||
import os
|
||||
import sys
|
||||
import copy
|
||||
import locale
|
||||
import logging
|
||||
import warnings
|
||||
import subprocess
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .base import BaseParser
|
||||
|
|
@ -80,7 +76,7 @@ class Lattice(BaseParser):
|
|||
Size of a pixel neighborhood that is used to calculate a
|
||||
threshold value for the pixel: 3, 5, 7, and so on.
|
||||
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
|
||||
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_. # noqa
|
||||
threshold_constant : int, optional (default: -2)
|
||||
Constant subtracted from the mean or weighted mean.
|
||||
Normally, it is positive but may be zero or negative as well.
|
||||
|
|
@ -102,7 +98,7 @@ class Lattice(BaseParser):
|
|||
process_background=False,
|
||||
line_scale=15,
|
||||
copy_text=None,
|
||||
shift_text=["l", "t"],
|
||||
shift_text=None,
|
||||
split_text=False,
|
||||
flag_size=False,
|
||||
strip_text="",
|
||||
|
|
@ -114,6 +110,7 @@ class Lattice(BaseParser):
|
|||
resolution=300,
|
||||
**kwargs
|
||||
):
|
||||
shift_text = shift_text or ["l", "t"]
|
||||
self.table_regions = table_regions
|
||||
self.table_areas = table_areas
|
||||
self.process_background = process_background
|
||||
|
|
@ -217,8 +214,7 @@ class Lattice(BaseParser):
|
|||
)
|
||||
gs_call = gs_call.encode().split()
|
||||
null = open(os.devnull, "wb")
|
||||
with Ghostscript(*gs_call, stdout=null) as gs:
|
||||
pass
|
||||
Ghostscript(*gs_call, stdout=null)
|
||||
null.close()
|
||||
|
||||
def _generate_table_bbox(self):
|
||||
|
|
@ -247,7 +243,8 @@ class Lattice(BaseParser):
|
|||
image_height_scaler = image_height / float(self.pdf_height)
|
||||
pdf_width_scaler = self.pdf_width / float(image_width)
|
||||
pdf_height_scaler = self.pdf_height / float(image_height)
|
||||
image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
|
||||
image_scalers = (image_width_scaler,
|
||||
image_height_scaler, self.pdf_height)
|
||||
pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
|
||||
|
||||
if self.table_areas is None:
|
||||
|
|
@ -291,7 +288,11 @@ class Lattice(BaseParser):
|
|||
|
||||
self.table_bbox_unscaled = copy.deepcopy(table_bbox)
|
||||
|
||||
self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
|
||||
[
|
||||
self.table_bbox,
|
||||
self.vertical_segments,
|
||||
self.horizontal_segments
|
||||
] = scale_image(
|
||||
table_bbox, vertical_segments, horizontal_segments, pdf_scalers
|
||||
)
|
||||
|
||||
|
|
@ -315,7 +316,10 @@ class Lattice(BaseParser):
|
|||
rows.extend([tk[1], tk[3]])
|
||||
# sort horizontal and vertical segments
|
||||
cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
|
||||
rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
|
||||
rows = merge_close_lines(
|
||||
sorted(rows, reverse=True),
|
||||
line_tol=self.line_tol
|
||||
)
|
||||
# make grid using x and y coord of shortlisted rows and cols
|
||||
cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
|
||||
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
||||
|
|
@ -359,7 +363,10 @@ class Lattice(BaseParser):
|
|||
accuracy = compute_accuracy([[100, pos_errors]])
|
||||
|
||||
if self.copy_text is not None:
|
||||
table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
|
||||
table = Lattice._copy_spanning_text(
|
||||
table,
|
||||
copy_text=self.copy_text
|
||||
)
|
||||
|
||||
data = table.data
|
||||
table.df = pd.DataFrame(data)
|
||||
|
|
@ -383,20 +390,28 @@ class Lattice(BaseParser):
|
|||
|
||||
return table
|
||||
|
||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
||||
def extract_tables(
|
||||
self,
|
||||
filename,
|
||||
suppress_stdout=False,
|
||||
layout_kwargs=None
|
||||
):
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
self._generate_layout(filename, layout_kwargs)
|
||||
rootname = os.path.basename(self.rootname)
|
||||
if not suppress_stdout:
|
||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
||||
logger.info("Processing {rootname}".format(rootname=rootname))
|
||||
|
||||
if not self.horizontal_text:
|
||||
if self.images:
|
||||
warnings.warn(
|
||||
"{} is image-based, camelot only works on"
|
||||
" text-based pages.".format(os.path.basename(self.rootname))
|
||||
"{rootname} is image-based, "
|
||||
"camelot only works on text-based pages."
|
||||
.format(rootname=rootname)
|
||||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
||||
"No tables found on {rootname}".format(rootname=rootname)
|
||||
)
|
||||
return []
|
||||
|
||||
|
|
@ -408,8 +423,10 @@ class Lattice(BaseParser):
|
|||
for table_idx, tk in enumerate(
|
||||
sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
|
||||
):
|
||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
|
||||
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||
cols, rows, v_s, h_s = self._generate_columns_and_rows(
|
||||
table_idx, tk)
|
||||
table = self._generate_table(
|
||||
table_idx, cols, rows, v_s=v_s, h_s=h_s)
|
||||
table._bbox = tk
|
||||
_tables.append(table)
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ import pandas as pd
|
|||
|
||||
from .base import BaseParser
|
||||
from ..core import TextEdges, Table
|
||||
from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
|
||||
from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
||||
compute_whitespace)
|
||||
|
||||
|
||||
logger = logging.getLogger("camelot")
|
||||
|
|
@ -70,6 +71,9 @@ class Stream(BaseParser):
|
|||
):
|
||||
self.table_regions = table_regions
|
||||
self.table_areas = table_areas
|
||||
self.table_bbox = None
|
||||
self.t_bbox = None
|
||||
self.textedges = []
|
||||
self.columns = columns
|
||||
self._validate_columns()
|
||||
self.split_text = split_text
|
||||
|
|
@ -95,10 +99,10 @@ class Stream(BaseParser):
|
|||
Tuple (x0, y0, x1, y1) in pdf coordinate space.
|
||||
|
||||
"""
|
||||
xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
|
||||
ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
|
||||
xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
|
||||
ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
|
||||
xmin = min(t.x0 for direction in t_bbox for t in t_bbox[direction])
|
||||
ymin = min(t.y0 for direction in t_bbox for t in t_bbox[direction])
|
||||
xmax = max(t.x1 for direction in t_bbox for t in t_bbox[direction])
|
||||
ymax = max(t.y1 for direction in t_bbox for t in t_bbox[direction])
|
||||
text_bbox = (xmin, ymin, xmax, ymax)
|
||||
return text_bbox
|
||||
|
||||
|
|
@ -119,21 +123,25 @@ class Stream(BaseParser):
|
|||
Two-dimensional list of text objects grouped into rows.
|
||||
|
||||
"""
|
||||
row_y = 0
|
||||
row_y = None
|
||||
rows = []
|
||||
temp = []
|
||||
for t in text:
|
||||
non_empty_text = [t for t in text if t.get_text().strip()]
|
||||
for t in non_empty_text:
|
||||
# is checking for upright necessary?
|
||||
# if t.get_text().strip() and all([obj.upright for obj in t._objs if
|
||||
# type(obj) is LTChar]):
|
||||
if t.get_text().strip():
|
||||
if not np.isclose(row_y, t.y0, atol=row_tol):
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
temp = []
|
||||
row_y = t.y0
|
||||
temp.append(t)
|
||||
# if t.get_text().strip() and all([obj.upright \
|
||||
# for obj in t._objs
|
||||
# if type(obj) is LTChar]):
|
||||
if row_y is None:
|
||||
row_y = t.y0
|
||||
elif not np.isclose(row_y, t.y0, atol=row_tol):
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
temp = []
|
||||
# We update the row's bottom as we go, to be forgiving if there
|
||||
# is a gradual change across multiple columns.
|
||||
row_y = t.y0
|
||||
temp.append(t)
|
||||
rows.append(sorted(temp, key=lambda t: t.x0))
|
||||
__ = rows.pop(0) # TODO: hacky
|
||||
return rows
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -170,7 +178,8 @@ class Stream(BaseParser):
|
|||
merged.append(higher)
|
||||
elif column_tol < 0:
|
||||
if higher[0] <= lower[1]:
|
||||
if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
|
||||
if np.isclose(higher[0], lower[1],
|
||||
atol=abs(column_tol)):
|
||||
merged.append(higher)
|
||||
else:
|
||||
upper_bound = max(lower[1], higher[1])
|
||||
|
|
@ -198,10 +207,13 @@ class Stream(BaseParser):
|
|||
|
||||
"""
|
||||
row_mids = [
|
||||
sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) if len(r) > 0 else 0
|
||||
sum((t.y0 + t.y1) / 2 for t in r) / len(r) if len(r) > 0 else 0
|
||||
for r in rows_grouped
|
||||
]
|
||||
rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
|
||||
rows = [
|
||||
(row_mids[i] + row_mids[i - 1]) / 2
|
||||
for i in range(1, len(row_mids))
|
||||
]
|
||||
rows.insert(0, text_y_max)
|
||||
rows.append(text_y_min)
|
||||
rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
|
||||
|
|
@ -230,7 +242,9 @@ class Stream(BaseParser):
|
|||
text = Stream._group_rows(text, row_tol=row_tol)
|
||||
elements = [len(r) for r in text]
|
||||
new_cols = [
|
||||
(t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
|
||||
(t.x0, t.x1)
|
||||
for r in text if len(r) == max(elements)
|
||||
for t in r
|
||||
]
|
||||
cols.extend(Stream._merge_columns(sorted(new_cols)))
|
||||
return cols
|
||||
|
|
@ -262,12 +276,13 @@ class Stream(BaseParser):
|
|||
def _validate_columns(self):
|
||||
if self.table_areas is not None and self.columns is not None:
|
||||
if len(self.table_areas) != len(self.columns):
|
||||
raise ValueError("Length of table_areas and columns" " should be equal")
|
||||
raise ValueError("Length of table_areas and columns"
|
||||
" should be equal")
|
||||
|
||||
def _nurminen_table_detection(self, textlines):
|
||||
"""A general implementation of the table detection algorithm
|
||||
described by Anssi Nurminen's master's thesis.
|
||||
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3
|
||||
Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3 # noqa
|
||||
|
||||
Assumes that tables are situated relatively far apart
|
||||
vertically.
|
||||
|
|
@ -284,7 +299,7 @@ class Stream(BaseParser):
|
|||
# guess table areas using textlines and relevant edges
|
||||
table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
|
||||
# treat whole page as table area if no table areas found
|
||||
if not len(table_bbox):
|
||||
if not table_bbox:
|
||||
table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
|
||||
|
||||
return table_bbox
|
||||
|
|
@ -302,7 +317,8 @@ class Stream(BaseParser):
|
|||
y1 = float(y1)
|
||||
x2 = float(x2)
|
||||
y2 = float(y2)
|
||||
region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text)
|
||||
region_text = text_in_bbox(
|
||||
(x1, y2, x2, y1), self.horizontal_text)
|
||||
hor_text.extend(region_text)
|
||||
# find tables based on nurminen's detection algorithm
|
||||
table_bbox = self._nurminen_table_detection(hor_text)
|
||||
|
|
@ -328,8 +344,10 @@ class Stream(BaseParser):
|
|||
|
||||
self.t_bbox = t_bbox
|
||||
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||
text_x_min, text_y_min, text_x_max, text_y_max = \
|
||||
self._text_bbox(self.t_bbox)
|
||||
rows_grouped = self._group_rows(
|
||||
self.t_bbox["horizontal"], row_tol=self.row_tol)
|
||||
rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
|
||||
elements = [len(r) for r in rows_grouped]
|
||||
|
||||
|
|
@ -354,14 +372,23 @@ class Stream(BaseParser):
|
|||
# see if the list contains elements, if yes, then use
|
||||
# the mode after removing 1s
|
||||
elements = list(filter(lambda x: x != 1, elements))
|
||||
if len(elements):
|
||||
if elements:
|
||||
ncols = max(set(elements), key=elements.count)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found in table area {}".format(table_idx + 1)
|
||||
"No tables found in table area {}"
|
||||
.format(table_idx + 1)
|
||||
)
|
||||
cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r]
|
||||
cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
|
||||
cols = [
|
||||
(t.x0, t.x1)
|
||||
for r in rows_grouped
|
||||
if len(r) == ncols
|
||||
for t in r
|
||||
]
|
||||
cols = self._merge_columns(
|
||||
sorted(cols),
|
||||
column_tol=self.column_tol
|
||||
)
|
||||
inner_text = []
|
||||
for i in range(1, len(cols)):
|
||||
left = cols[i - 1][1]
|
||||
|
|
@ -431,23 +458,30 @@ class Stream(BaseParser):
|
|||
|
||||
return table
|
||||
|
||||
def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
|
||||
def extract_tables(self, filename, suppress_stdout=False,
|
||||
layout_kwargs=None):
|
||||
layout_kwargs = layout_kwargs or {}
|
||||
self._generate_layout(filename, layout_kwargs)
|
||||
if not suppress_stdout:
|
||||
logger.info("Processing {}".format(os.path.basename(self.rootname)))
|
||||
logger.info("Processing {}".format(
|
||||
os.path.basename(self.rootname)))
|
||||
|
||||
if not self.horizontal_text:
|
||||
if self.images:
|
||||
warnings.warn(
|
||||
"{} is image-based, camelot only works on"
|
||||
" text-based pages.".format(os.path.basename(self.rootname))
|
||||
" text-based pages.".format(
|
||||
os.path.basename(self.rootname))
|
||||
)
|
||||
else:
|
||||
warnings.warn(
|
||||
"No tables found on {}".format(os.path.basename(self.rootname))
|
||||
"No tables found on {}".format(
|
||||
os.path.basename(self.rootname))
|
||||
)
|
||||
return []
|
||||
|
||||
# Identify plausible areas within the doc where tables lie,
|
||||
# populate table_bbox keys with these areas.
|
||||
self._generate_table_bbox()
|
||||
|
||||
_tables = []
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class PlotMethods(object):
|
|||
raise NotImplementedError(
|
||||
"Lattice flavor does not support kind='{}'".format(kind)
|
||||
)
|
||||
elif table.flavor == "stream" and kind in ["joint", "line"]:
|
||||
elif table.flavor == "stream" and kind in ["line"]:
|
||||
raise NotImplementedError(
|
||||
"Stream flavor does not support kind='{}'".format(kind)
|
||||
)
|
||||
|
|
@ -64,7 +64,13 @@ class PlotMethods(object):
|
|||
for t in table._text:
|
||||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.add_patch(patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1]))
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]),
|
||||
t[2] - t[0],
|
||||
t[3] - t[1]
|
||||
)
|
||||
)
|
||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||
return fig
|
||||
|
|
@ -132,7 +138,8 @@ class PlotMethods(object):
|
|||
for t in table_bbox.keys():
|
||||
ax.add_patch(
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red"
|
||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1],
|
||||
fill=False, color="red"
|
||||
)
|
||||
)
|
||||
if not _FOR_LATTICE:
|
||||
|
|
@ -164,7 +171,10 @@ class PlotMethods(object):
|
|||
xs.extend([t[0], t[2]])
|
||||
ys.extend([t[1], t[3]])
|
||||
ax.add_patch(
|
||||
patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue")
|
||||
patches.Rectangle(
|
||||
(t[0], t[1]), t[2] - t[0], t[3] - t[1],
|
||||
color="blue"
|
||||
)
|
||||
)
|
||||
ax.set_xlim(min(xs) - 10, max(xs) + 10)
|
||||
ax.set_ylim(min(ys) - 10, max(ys) + 10)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ from pdfminer.layout import (
|
|||
)
|
||||
|
||||
|
||||
# pylint: disable=import-error
|
||||
# PyLint will evaluate both branches, and will necessarily complain about one
|
||||
# of them.
|
||||
PY3 = sys.version_info[0] >= 3
|
||||
if PY3:
|
||||
from urllib.request import urlopen
|
||||
|
|
@ -310,7 +313,8 @@ def get_rotation(chars, horizontal_text, vertical_text):
|
|||
if hlen < vlen:
|
||||
clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
|
||||
anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
|
||||
rotation = "anticlockwise" if clockwise < anticlockwise else "clockwise"
|
||||
rotation = "anticlockwise" if clockwise < anticlockwise \
|
||||
else "clockwise"
|
||||
return rotation
|
||||
|
||||
|
||||
|
|
@ -341,12 +345,16 @@ def segments_in_bbox(bbox, v_segments, h_segments):
|
|||
v_s = [
|
||||
v
|
||||
for v in v_segments
|
||||
if v[1] > lb[1] - 2 and v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2
|
||||
if v[1] > lb[1] - 2 and
|
||||
v[3] < rt[1] + 2 and
|
||||
lb[0] - 2 <= v[0] <= rt[0] + 2
|
||||
]
|
||||
h_s = [
|
||||
h
|
||||
for h in h_segments
|
||||
if h[0] > lb[0] - 2 and h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2
|
||||
if h[0] > lb[0] - 2 and
|
||||
h[2] < rt[0] + 2 and
|
||||
lb[1] - 2 <= h[1] <= rt[1] + 2
|
||||
]
|
||||
return v_s, h_s
|
||||
|
||||
|
|
@ -464,10 +472,10 @@ def flag_font_size(textline, direction, strip_text=""):
|
|||
for t in textline
|
||||
if not isinstance(t, LTAnno)
|
||||
]
|
||||
l = [np.round(size, decimals=6) for text, size in d]
|
||||
if len(set(l)) > 1:
|
||||
text_sizes = [np.round(size, decimals=6) for text, size in d]
|
||||
if len(set(text_sizes)) > 1:
|
||||
flist = []
|
||||
min_size = min(l)
|
||||
min_size = min(text_sizes)
|
||||
for key, chars in groupby(d, itemgetter(1)):
|
||||
if key == min_size:
|
||||
fchars = [t[0] for t in chars]
|
||||
|
|
@ -511,7 +519,6 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
|||
of row/column and text is the an lttextline substring.
|
||||
|
||||
"""
|
||||
idx = 0
|
||||
cut_text = []
|
||||
bbox = textline.bbox
|
||||
try:
|
||||
|
|
@ -528,7 +535,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
|||
]
|
||||
r = r_idx[0]
|
||||
x_cuts = [
|
||||
(c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right
|
||||
(c, table.cells[r][c].x2)
|
||||
for c in x_overlap
|
||||
if table.cells[r][c].right
|
||||
]
|
||||
if not x_cuts:
|
||||
x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
|
||||
|
|
@ -561,7 +570,9 @@ def split_textline(table, textline, direction, flag_size=False, strip_text=""):
|
|||
]
|
||||
c = c_idx[0]
|
||||
y_cuts = [
|
||||
(r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom
|
||||
(r, table.cells[r][c].y1)
|
||||
for r in y_overlap
|
||||
if table.cells[r][c].bottom
|
||||
]
|
||||
if not y_cuts:
|
||||
y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
|
||||
|
|
@ -644,9 +655,8 @@ def get_table_index(
|
|||
"""
|
||||
r_idx, c_idx = [-1] * 2
|
||||
for r in range(len(table.rows)):
|
||||
if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and (t.y0 + t.y1) / 2.0 > table.rows[
|
||||
r
|
||||
][1]:
|
||||
if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and \
|
||||
(t.y0 + t.y1) / 2.0 > table.rows[r][1]:
|
||||
lt_col_overlap = []
|
||||
for c in table.cols:
|
||||
if c[0] <= t.x1 and c[1] >= t.x0:
|
||||
|
|
@ -681,7 +691,9 @@ def get_table_index(
|
|||
X = 1.0 if abs(t.x0 - t.x1) == 0.0 else abs(t.x0 - t.x1)
|
||||
Y = 1.0 if abs(t.y0 - t.y1) == 0.0 else abs(t.y0 - t.y1)
|
||||
charea = X * Y
|
||||
error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
|
||||
error = (
|
||||
(X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))
|
||||
) / charea
|
||||
|
||||
if split_text:
|
||||
return (
|
||||
|
|
@ -697,13 +709,16 @@ def get_table_index(
|
|||
(
|
||||
r_idx,
|
||||
c_idx,
|
||||
flag_font_size(t._objs, direction, strip_text=strip_text),
|
||||
flag_font_size(t._objs,
|
||||
direction,
|
||||
strip_text=strip_text),
|
||||
)
|
||||
],
|
||||
error,
|
||||
)
|
||||
else:
|
||||
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], error
|
||||
return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], \
|
||||
error
|
||||
|
||||
|
||||
def compute_accuracy(error_weights):
|
||||
|
|
@ -751,7 +766,6 @@ def compute_whitespace(d):
|
|||
|
||||
"""
|
||||
whitespace = 0
|
||||
r_nempty_cells, c_nempty_cells = [], []
|
||||
for i in d:
|
||||
for j in i:
|
||||
if j.strip() == "":
|
||||
|
|
@ -811,6 +825,7 @@ def get_page_layout(
|
|||
width = layout.bbox[2]
|
||||
height = layout.bbox[3]
|
||||
dim = (width, height)
|
||||
break # we assume a single page pdf
|
||||
return layout, dim
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,9 @@ Release v\ |version|. (:ref:`Installation <install>`)
|
|||
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
|
||||
:target: https://github.com/ambv/black
|
||||
|
||||
.. image:: https://img.shields.io/badge/continous%20quality-deepsource-lightgrey
|
||||
:target: https://deepsource.io/gh/camelot-dev/camelot/?ref=repository-badge
|
||||
|
||||
**Camelot** is a Python library that makes it easy for *anyone* to extract tables from PDF files!
|
||||
|
||||
.. note:: You can also check out `Excalibur`_, which is a web interface for Camelot!
|
||||
|
|
@ -86,6 +89,13 @@ See `comparison with other PDF table extraction libraries and tools`_.
|
|||
.. _ETL and data analysis workflows: https://gist.github.com/vinayak-mehta/e5949f7c2410a0e12f25d3682dc9e873
|
||||
.. _comparison with other PDF table extraction libraries and tools: https://github.com/camelot-dev/camelot/wiki/Comparison-with-other-PDF-Table-Extraction-libraries-and-tools
|
||||
|
||||
Support us on OpenCollective
|
||||
----------------------------
|
||||
|
||||
If Camelot helped you extract tables from PDFs, please consider supporting its development by `becoming a backer or a sponsor on OpenCollective`_!
|
||||
|
||||
.. _becoming a backer or a sponsor on OpenCollective: https://opencollective.com/camelot
|
||||
|
||||
The User Guide
|
||||
--------------
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ Stream can be used to parse tables that have whitespaces between cells to simula
|
|||
|
||||
3. The number of columns inside each table area are then guessed. This is done by calculating the mode of number of words in each text row. Based on this mode, words in each text row are chosen to calculate a list of column *x* ranges.
|
||||
|
||||
4. Words that lie inside/outside the current column *x* ranges are then used to extend extend the current list of columns.
|
||||
4. Words that lie inside/outside the current column *x* ranges are then used to extend the current list of columns.
|
||||
|
||||
5. Finally, a table is formed using the text rows' *y* ranges and column *x* ranges and words found on the page are assigned to the table's cells based on their *x* and *y* coordinates.
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ The easiest way to install Camelot is to install it with `conda`_, which is a pa
|
|||
|
||||
$ conda install -c conda-forge camelot-py
|
||||
|
||||
.. note:: Camelot is available for Python 2.7, 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
|
||||
.. note:: Camelot is available for Python 3.5, 3.6 and 3.7 on Linux, macOS and Windows. For Windows, you will need to install ghostscript which you can get from their `downloads page`_.
|
||||
|
||||
.. _conda: https://conda.io/docs/
|
||||
.. _Anaconda: http://docs.continuum.io/anaconda/
|
||||
|
|
|
|||
|
|
@ -4,5 +4,5 @@ numpy>=1.13.3
|
|||
opencv-python>=3.4.2.17
|
||||
openpyxl>=2.5.8
|
||||
pandas>=0.23.4
|
||||
pdfminer.six>=20170720
|
||||
pdfminer.six>=20200402
|
||||
PyPDF2>=1.26.0
|
||||
|
|
|
|||
5
setup.py
|
|
@ -19,7 +19,7 @@ requires = [
|
|||
'numpy>=1.13.3',
|
||||
'openpyxl>=2.5.8',
|
||||
'pandas>=0.23.4',
|
||||
'pdfminer.six>=20170720',
|
||||
'pdfminer.six>=20200402',
|
||||
'PyPDF2>=1.26.0'
|
||||
]
|
||||
|
||||
|
|
@ -69,9 +69,8 @@ def setup_package():
|
|||
},
|
||||
classifiers=[
|
||||
# Trove classifiers
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
||||
# Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers # noqa
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7'
|
||||
|
|
|
|||
343
tests/data.py
|
|
@ -4,16 +4,6 @@ from __future__ import unicode_literals
|
|||
|
||||
|
||||
data_stream = [
|
||||
[
|
||||
"",
|
||||
"Table: 5 Public Health Outlay 2012-13 (Budget Estimates) (Rs. in 000)",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"],
|
||||
["", "", "", "", "", "Revenue &", "", ""],
|
||||
["", "Medical &", "Family", "Medical &", "Family", "", "", ""],
|
||||
|
|
@ -80,7 +70,8 @@ data_stream = [
|
|||
"5,000",
|
||||
"33,051,480",
|
||||
],
|
||||
["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560", "4,508,180"],
|
||||
["Goa", "4,055,567", "110,000", "330,053", "0", "4,495,620", "12,560",
|
||||
"4,508,180"],
|
||||
[
|
||||
"Gujarat",
|
||||
"26,328,400",
|
||||
|
|
@ -171,7 +162,8 @@ data_stream = [
|
|||
"313,762",
|
||||
"67,044,159",
|
||||
],
|
||||
["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700", "0", "3,579,700"],
|
||||
["Manipur", "2,494,600", "187,700", "897,400", "0", "3,579,700",
|
||||
"0", "3,579,700"],
|
||||
[
|
||||
"Meghalaya",
|
||||
"2,894,093",
|
||||
|
|
@ -236,7 +228,8 @@ data_stream = [
|
|||
|
||||
data_stream_table_rotated = [
|
||||
[
|
||||
"Table 21 Current use of contraception by background characteristics\u2014Continued",
|
||||
"Table 21 Current use of contraception by background characteristics"
|
||||
"\u2014Continued",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -330,7 +323,8 @@ data_stream_table_rotated = [
|
|||
"Total",
|
||||
"women",
|
||||
],
|
||||
["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
|
||||
["Caste/tribe", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
|
||||
"", ""],
|
||||
[
|
||||
"Scheduled caste",
|
||||
"74.8",
|
||||
|
|
@ -407,7 +401,8 @@ data_stream_table_rotated = [
|
|||
"100.0",
|
||||
"3,319",
|
||||
],
|
||||
["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
|
||||
["Wealth index", "", "", "", "", "", "", "", "", "", "", "", "",
|
||||
"", "", "", ""],
|
||||
[
|
||||
"Lowest",
|
||||
"64.5",
|
||||
|
|
@ -830,7 +825,8 @@ data_stream_table_rotated = [
|
|||
|
||||
data_stream_two_tables_1 = [
|
||||
[
|
||||
"[In thousands (11,062.6 represents 11,062,600) For year ending December 31. Based on Uniform Crime Reporting (UCR)",
|
||||
"Program. Represents arrests reported (not charged) by 12,910 "
|
||||
"agencies with a total population of 247,526,916 as estimated",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -842,7 +838,8 @@ data_stream_two_tables_1 = [
|
|||
"",
|
||||
],
|
||||
[
|
||||
"Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated",
|
||||
"by the FBI. Some persons may be arrested more than once during a "
|
||||
"year, therefore, the data in this table, in some cases,",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -854,19 +851,8 @@ data_stream_two_tables_1 = [
|
|||
"",
|
||||
],
|
||||
[
|
||||
"by the FBI. Some persons may be arrested more than once during a year, therefore, the data in this table, in some cases,",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"could represent multiple arrests of the same person. See text, this section and source]",
|
||||
"could represent multiple arrests of the same person. See text, "
|
||||
"this section and source]",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -903,7 +889,8 @@ data_stream_two_tables_1 = [
|
|||
"and over",
|
||||
],
|
||||
[
|
||||
"Total .\n .\n . . . . . .\n . .\n . .\n . .\n . .\n . .\n . .\n . .\n . . .",
|
||||
"Total .\n .\n . . . . . .\n . .\n . .\n . .\n . .\n . "
|
||||
".\n . .\n . .\n . . .",
|
||||
"11,062 .6",
|
||||
"1,540 .0",
|
||||
"9,522 .6",
|
||||
|
|
@ -915,7 +902,8 @@ data_stream_two_tables_1 = [
|
|||
"2,330 .9",
|
||||
],
|
||||
[
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n . .",
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . "
|
||||
".\n . .",
|
||||
"467 .9",
|
||||
"69 .1",
|
||||
"398 .8",
|
||||
|
|
@ -976,7 +964,8 @@ data_stream_two_tables_1 = [
|
|||
"64.5",
|
||||
],
|
||||
[
|
||||
"Property crime . . . .\n . .\n . . .\n . . .\n .\n . . . .",
|
||||
"Property crime . . . .\n . .\n . . .\n . . .\n .\n . . "
|
||||
". .",
|
||||
"1,396 .4",
|
||||
"338 .7",
|
||||
"1,057 .7",
|
||||
|
|
@ -1060,7 +1049,8 @@ data_stream_two_tables_1 = [
|
|||
"25.5",
|
||||
],
|
||||
[
|
||||
"Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n.",
|
||||
"Fraud .\n.\n.\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n.",
|
||||
"173.7",
|
||||
"5.1",
|
||||
"168.5",
|
||||
|
|
@ -1290,19 +1280,8 @@ data_stream_two_tables_1 = [
|
|||
],
|
||||
[
|
||||
"",
|
||||
"– Represents zero. X Not applicable. 1 Buying, receiving, possessing stolen property. 2 Except forcible rape and prostitution.",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"",
|
||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
|
||||
"– Represents zero. X Not applicable. 1 Buying, receiving, "
|
||||
"possessing stolen property. 2 Except forcible rape and prostitution.",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -1315,17 +1294,10 @@ data_stream_two_tables_1 = [
|
|||
]
|
||||
|
||||
data_stream_two_tables_2 = [
|
||||
[
|
||||
"",
|
||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
|
||||
[
|
||||
"[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies",
|
||||
"[Based on Uniform Crime Reporting (UCR) Program. Represents "
|
||||
"arrests reported (not charged) by 12,371 agencies",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -1333,7 +1305,8 @@ data_stream_two_tables_2 = [
|
|||
"",
|
||||
],
|
||||
[
|
||||
"with a total population of 239,839,971 as estimated by the FBI. See headnote, Table 324]",
|
||||
"with a total population of 239,839,971 as estimated by the FBI. "
|
||||
"See headnote, Table 324]",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
|
|
@ -1344,7 +1317,8 @@ data_stream_two_tables_2 = [
|
|||
["Offense charged", "", "", "", "Indian/Alaskan", "Asian Pacific"],
|
||||
["", "Total", "White", "Black", "Native", "Islander"],
|
||||
[
|
||||
"Total .\n .\n .\n .\n . .\n . . .\n . . .\n .\n . . .\n .\n . . .\n . .\n .\n . . .\n .\n .\n .\n . .\n . .\n . .",
|
||||
"Total .\n .\n .\n .\n . .\n . . .\n . . .\n .\n . . .\n "
|
||||
".\n . . .\n . .\n .\n . . .\n .\n .\n .\n . .\n . .\n . .",
|
||||
"10,690,561",
|
||||
"7,389,208",
|
||||
"3,027,153",
|
||||
|
|
@ -1352,7 +1326,8 @@ data_stream_two_tables_2 = [
|
|||
"123,656",
|
||||
],
|
||||
[
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .",
|
||||
"Violent crime . . . . . . . .\n . .\n . .\n . .\n . "
|
||||
".\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .",
|
||||
"456,965",
|
||||
"268,346",
|
||||
"177,766",
|
||||
|
|
@ -1368,7 +1343,8 @@ data_stream_two_tables_2 = [
|
|||
"97",
|
||||
],
|
||||
[
|
||||
"Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
||||
"Forcible rape . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"16,362",
|
||||
"10,644",
|
||||
"5,319",
|
||||
|
|
@ -1376,7 +1352,8 @@ data_stream_two_tables_2 = [
|
|||
"230",
|
||||
],
|
||||
[
|
||||
"Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"Robbery . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"100,496",
|
||||
"43,039",
|
||||
"55,742",
|
||||
|
|
@ -1384,7 +1361,8 @@ data_stream_two_tables_2 = [
|
|||
"989",
|
||||
],
|
||||
[
|
||||
"Aggravated assault . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. .\n.\n.\n.",
|
||||
"Aggravated assault . . . . . . . .\n. .\n. .\n.\n.\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.",
|
||||
"330,368",
|
||||
"209,922",
|
||||
"111,904",
|
||||
|
|
@ -1392,7 +1370,8 @@ data_stream_two_tables_2 = [
|
|||
"3,929",
|
||||
],
|
||||
[
|
||||
"Property crime . . . . .\n . . . . .\n .\n . . .\n .\n . .\n .\n .\n .\n . .\n .\n . .\n .\n .",
|
||||
"Property crime . . . . .\n . . . . .\n .\n . . .\n .\n "
|
||||
". .\n .\n .\n .\n . .\n .\n . .\n .\n .",
|
||||
"1,364,409",
|
||||
"922,139",
|
||||
"406,382",
|
||||
|
|
@ -1400,7 +1379,8 @@ data_stream_two_tables_2 = [
|
|||
"18,289",
|
||||
],
|
||||
[
|
||||
"Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n. . . .",
|
||||
"Burglary . . .\n. . . . .\n. . .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n. . . .",
|
||||
"234,551",
|
||||
"155,994",
|
||||
"74,419",
|
||||
|
|
@ -1408,7 +1388,8 @@ data_stream_two_tables_2 = [
|
|||
"2,117",
|
||||
],
|
||||
[
|
||||
"Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
||||
"Larceny-theft . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"1,056,473",
|
||||
"719,983",
|
||||
"306,625",
|
||||
|
|
@ -1416,7 +1397,8 @@ data_stream_two_tables_2 = [
|
|||
"15,219",
|
||||
],
|
||||
[
|
||||
"Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
|
||||
"Motor vehicle theft . . . . . .\n. .\n.\n. . .\n.\n. .\n.\n.\n.\n. "
|
||||
".\n.\n. .\n.",
|
||||
"63,919",
|
||||
"39,077",
|
||||
"23,184",
|
||||
|
|
@ -1424,7 +1406,8 @@ data_stream_two_tables_2 = [
|
|||
"841",
|
||||
],
|
||||
[
|
||||
"Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . . . .",
|
||||
"Arson .\n. . . .\n. .\n. .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n.\n. . . . . .",
|
||||
"9,466",
|
||||
"7,085",
|
||||
"2,154",
|
||||
|
|
@ -1432,7 +1415,8 @@ data_stream_two_tables_2 = [
|
|||
"112",
|
||||
],
|
||||
[
|
||||
"Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. .\n.\n.\n.\n. .\n.\n. .\n.",
|
||||
"Other assaults .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n. "
|
||||
".\n.\n.\n.\n. .\n.\n. .\n.",
|
||||
"1,032,502",
|
||||
"672,865",
|
||||
"332,435",
|
||||
|
|
@ -1440,7 +1424,8 @@ data_stream_two_tables_2 = [
|
|||
"12,075",
|
||||
],
|
||||
[
|
||||
"Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. .\n. .\n.",
|
||||
"Forgery and counterfeiting .\n. . . . . . .\n.\n. .\n.\n.\n.\n. "
|
||||
".\n. .\n.",
|
||||
"67,054",
|
||||
"44,730",
|
||||
"21,251",
|
||||
|
|
@ -1448,7 +1433,8 @@ data_stream_two_tables_2 = [
|
|||
"728",
|
||||
],
|
||||
[
|
||||
"Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. . . . . . .",
|
||||
"Fraud .\n.\n. . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
|
||||
".\n.\n.\n. . . . . . .",
|
||||
"161,233",
|
||||
"108,032",
|
||||
"50,367",
|
||||
|
|
@ -1456,7 +1442,8 @@ data_stream_two_tables_2 = [
|
|||
"1,519",
|
||||
],
|
||||
[
|
||||
"Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. .\n.\n. .\n.\n.\n.\n.",
|
||||
"Embezzlement . . . .\n. . . . .\n.\n. . .\n.\n. . .\n.\n.\n. "
|
||||
".\n.\n. .\n.\n.\n.\n.",
|
||||
"13,960",
|
||||
"9,208",
|
||||
"4,429",
|
||||
|
|
@ -1472,7 +1459,8 @@ data_stream_two_tables_2 = [
|
|||
"742",
|
||||
],
|
||||
[
|
||||
"Vandalism . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
|
||||
"Vandalism . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
|
||||
".\n. .\n.\n.\n.\n. .",
|
||||
"212,173",
|
||||
"157,723",
|
||||
"48,746",
|
||||
|
|
@ -1496,7 +1484,8 @@ data_stream_two_tables_2 = [
|
|||
"1,413",
|
||||
],
|
||||
[
|
||||
"Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n.\n. .",
|
||||
"Sex offenses 1 . . . . . . . .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"60,175",
|
||||
"44,240",
|
||||
"14,347",
|
||||
|
|
@ -1504,7 +1493,8 @@ data_stream_two_tables_2 = [
|
|||
"873",
|
||||
],
|
||||
[
|
||||
"Drug abuse violations . . . . . . . .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"Drug abuse violations . . . . . . . .\n. . .\n.\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.",
|
||||
"1,301,629",
|
||||
"845,974",
|
||||
"437,623",
|
||||
|
|
@ -1512,7 +1502,8 @@ data_stream_two_tables_2 = [
|
|||
"9,444",
|
||||
],
|
||||
[
|
||||
"Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . .\n.\n.\n.\n.\n. .\n. .",
|
||||
"Gambling . . . . .\n. . . . .\n.\n. . .\n.\n. . .\n. .\n.\n. . "
|
||||
".\n.\n.\n.\n.\n. .\n. .",
|
||||
"8,046",
|
||||
"2,290",
|
||||
"5,518",
|
||||
|
|
@ -1528,7 +1519,8 @@ data_stream_two_tables_2 = [
|
|||
"624",
|
||||
],
|
||||
[
|
||||
"Driving under the influence . . . . . . .\n. .\n.\n. .\n.\n.\n.\n.\n. .",
|
||||
"Driving under the influence . . . . . . .\n. .\n.\n. "
|
||||
".\n.\n.\n.\n.\n. .",
|
||||
"1,105,401",
|
||||
"954,444",
|
||||
"121,594",
|
||||
|
|
@ -1536,7 +1528,8 @@ data_stream_two_tables_2 = [
|
|||
"14,460",
|
||||
],
|
||||
[
|
||||
"Liquor laws . . . . . . . .\n. .\n. .\n. .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"Liquor laws . . . . . . . .\n. .\n. .\n. .\n. .\n. . "
|
||||
".\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"444,087",
|
||||
"373,189",
|
||||
"50,431",
|
||||
|
|
@ -1544,7 +1537,8 @@ data_stream_two_tables_2 = [
|
|||
"5,591",
|
||||
],
|
||||
[
|
||||
"Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . .\n.\n.\n.\n.\n.\n.",
|
||||
"Drunkenness . .\n. . . . . . .\n.\n. . .\n.\n. . .\n.\n.\n.\n. . "
|
||||
".\n.\n.\n.\n.\n.\n.",
|
||||
"469,958",
|
||||
"387,542",
|
||||
"71,020",
|
||||
|
|
@ -1552,7 +1546,8 @@ data_stream_two_tables_2 = [
|
|||
"2,844",
|
||||
],
|
||||
[
|
||||
"Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. .\n.\n.\n.\n.",
|
||||
"Disorderly conduct . . .\n. . . . . .\n. .\n. . .\n.\n.\n.\n. .\n. "
|
||||
".\n.\n.\n.\n.",
|
||||
"515,689",
|
||||
"326,563",
|
||||
"176,169",
|
||||
|
|
@ -1560,7 +1555,8 @@ data_stream_two_tables_2 = [
|
|||
"4,174",
|
||||
],
|
||||
[
|
||||
"Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"Vagrancy . . .\n. .\n. . . .\n. .\n.\n. .\n.\n.\n. .\n.\n.\n. "
|
||||
".\n.\n.\n. .\n.\n.\n. . . .",
|
||||
"26,347",
|
||||
"14,581",
|
||||
"11,031",
|
||||
|
|
@ -1568,7 +1564,8 @@ data_stream_two_tables_2 = [
|
|||
"192",
|
||||
],
|
||||
[
|
||||
"All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. .\n.",
|
||||
"All other offenses (except traffic) . .\n. .\n. .\n. .\n.\n.\n.\n. "
|
||||
".\n.",
|
||||
"2,929,217",
|
||||
"1,937,221",
|
||||
"911,670",
|
||||
|
|
@ -1576,7 +1573,8 @@ data_stream_two_tables_2 = [
|
|||
"36,446",
|
||||
],
|
||||
[
|
||||
"Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n.\n.\n. .\n. . . .",
|
||||
"Suspicion . . .\n. . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n. "
|
||||
".\n.\n.\n.\n.\n. .\n. . . .",
|
||||
"1,513",
|
||||
"677",
|
||||
"828",
|
||||
|
|
@ -1592,7 +1590,8 @@ data_stream_two_tables_2 = [
|
|||
"1,060",
|
||||
],
|
||||
[
|
||||
"Runaways . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. .\n. .\n.\n.\n.\n. .",
|
||||
"Runaways . . . . . . . .\n. .\n. .\n. .\n. .\n. .\n. .\n.\n.\n. "
|
||||
".\n. .\n.\n.\n.\n. .",
|
||||
"73,616",
|
||||
"48,343",
|
||||
"19,670",
|
||||
|
|
@ -1600,14 +1599,6 @@ data_stream_two_tables_2 = [
|
|||
"3,950",
|
||||
],
|
||||
["1 Except forcible rape and prostitution.", "", "", "", "", ""],
|
||||
[
|
||||
"",
|
||||
"Source: U.S. Department of Justice, Federal Bureau of Investigation, “Crime in the United States, Arrests,” September 2010,",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
]
|
||||
|
||||
data_stream_table_areas = [
|
||||
|
|
@ -1634,10 +1625,12 @@ data_stream_columns = [
|
|||
"Nombre Localidad",
|
||||
],
|
||||
["Entidad", "", "Municipio", "", "Localidad", ""],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0094", "Granja Adelita"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0094",
|
||||
"Granja Adelita"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0096", "Agua Azul"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0100", "Rancho Alegre"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0102", "Los Arbolitos [Rancho]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0102",
|
||||
"Los Arbolitos [Rancho]"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1655,7 +1648,8 @@ data_stream_columns = [
|
|||
"0112",
|
||||
"Baj\xedo los V\xe1zquez",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0113", "Baj\xedo de Montoro"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0113",
|
||||
"Baj\xedo de Montoro"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1697,8 +1691,10 @@ data_stream_columns = [
|
|||
"Ca\xf1ada Honda [Estaci\xf3n]",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0127", "Los Ca\xf1os"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0128", "El Cari\xf1\xe1n"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0129", "El Carmen [Granja]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0128",
|
||||
"El Cari\xf1\xe1n"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0129",
|
||||
"El Carmen [Granja]"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1733,9 +1729,11 @@ data_stream_columns = [
|
|||
"El Colorado (El Soyatal)",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0146", "El Conejal"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0157", "Cotorina de Abajo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0157",
|
||||
"Cotorina de Abajo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0162", "Coyotes"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0166", "La Huerta (La Cruz)"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0166",
|
||||
"La Huerta (La Cruz)"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1752,17 +1750,20 @@ data_stream_columns = [
|
|||
"0171",
|
||||
"Los Cuervos (Los Ojos de Agua)",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0172", "San Jos\xe9 [Granja]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0172",
|
||||
"San Jos\xe9 [Granja]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0176", "La Chiripa"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0182", "Dolores"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0183", "Los Dolores"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0190", "El Duraznillo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0191", "Los Dur\xf3n"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0197", "La Escondida"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0201", "Brande Vin [Bodegas]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0201",
|
||||
"Brande Vin [Bodegas]"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0207", "Valle Redondo"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0209", "La Fortuna"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0212", "Lomas del Gachup\xedn"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0212",
|
||||
"Lomas del Gachup\xedn"],
|
||||
[
|
||||
"01",
|
||||
"Aguascalientes",
|
||||
|
|
@ -1772,22 +1773,12 @@ data_stream_columns = [
|
|||
"El Carmen (Gallinas G\xfceras) [Rancho]",
|
||||
],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0216", "La Gloria"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0226", "Hacienda Nueva"],
|
||||
["01", "Aguascalientes", "001", "Aguascalientes", "0226",
|
||||
"Hacienda Nueva"],
|
||||
]
|
||||
|
||||
data_stream_split_text = [
|
||||
[
|
||||
"FEB",
|
||||
"RUAR",
|
||||
"Y 2014 M27 (BUS)",
|
||||
"",
|
||||
"ALPHABETIC LISTING BY T",
|
||||
"YPE",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"ABLPDM27",
|
||||
],
|
||||
["FEB", "RUAR", "Y 2014 M27 (BUS)", "", "", "", "", "", "", ""],
|
||||
["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"],
|
||||
["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""],
|
||||
["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""],
|
||||
|
|
@ -1977,7 +1968,18 @@ data_stream_split_text = [
|
|||
"(872) 825-8309",
|
||||
"2014/04/11",
|
||||
],
|
||||
["", "", "A SENSU JAPANESE", "", "7123 SOUTH 92ND EAST", "", "", "", "", ""],
|
||||
[
|
||||
"",
|
||||
"",
|
||||
"A SENSU JAPANESE",
|
||||
"",
|
||||
"7123 SOUTH 92ND EAST",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"625422",
|
||||
"BAW",
|
||||
|
|
@ -2029,7 +2031,18 @@ data_stream_split_text = [
|
|||
"(580) 928-2700",
|
||||
"2014/09/08",
|
||||
],
|
||||
["", "", "ANDOLINI'S PIZZERIA &", "", "12140 EAST 96TH STREET", "", "", "", "", ""],
|
||||
[
|
||||
"",
|
||||
"",
|
||||
"ANDOLINI'S PIZZERIA &",
|
||||
"",
|
||||
"12140 EAST 96TH STREET",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"428377",
|
||||
"BAW",
|
||||
|
|
@ -2148,7 +2161,8 @@ data_stream_flag_size = [
|
|||
"from SBI",
|
||||
"from",
|
||||
],
|
||||
["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other", "NCDC"],
|
||||
["", "Debt", "", "", "RBI", "Banks", "LIC", "GIC", "NABARD", "& Other",
|
||||
"NCDC"],
|
||||
["", "", "", "", "", "& FIs", "", "", "", "Banks", ""],
|
||||
["1", "2=", "3", "4", "5", "6=", "7", "8", "9", "10", "11"],
|
||||
["", "(3 to 6)+14", "", "", "", "(7 to13)", "", "", "", "", ""],
|
||||
|
|
@ -2165,7 +2179,8 @@ data_stream_flag_size = [
|
|||
"-",
|
||||
"0.25",
|
||||
],
|
||||
["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-", "-", "-"],
|
||||
["Arunachal Pradesh", "1.23", "1.1", "-", "-", "0.13", "-", "-", "-",
|
||||
"-", "-"],
|
||||
[
|
||||
"Assam",
|
||||
"12.69",
|
||||
|
|
@ -2194,8 +2209,10 @@ data_stream_flag_size = [
|
|||
],
|
||||
["Chhattisgarh", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||
["Goa", "1.4", "1.02", "-", "-", "0.38", "0.31", "-", "0.07", "-", "-"],
|
||||
["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11", "-", "0.44"],
|
||||
["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64", "-", "0.49"],
|
||||
["Gujarat", "19.75", "17.1", "-", "-", "2.64", "1.17", "-", "1.11",
|
||||
"-", "0.44"],
|
||||
["Haryana", "11.53", "9.67", "-", "0.06", "1.8", "0.55", "-", "0.64",
|
||||
"-", "0.49"],
|
||||
[
|
||||
"Himachal Pradesh",
|
||||
"8.02",
|
||||
|
|
@ -2223,7 +2240,8 @@ data_stream_flag_size = [
|
|||
"-",
|
||||
],
|
||||
["Jharkhand", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||
["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89", "-", "0.69"],
|
||||
["Karnataka", "22.44", "19.59", "-", "-", "2.86", "1.22", "-", "0.89",
|
||||
"-", "0.69"],
|
||||
[
|
||||
"Kerala",
|
||||
"29.03",
|
||||
|
|
@ -2263,11 +2281,16 @@ data_stream_flag_size = [
|
|||
"0.02",
|
||||
"2.89",
|
||||
],
|
||||
["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-", "0.09"],
|
||||
["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05", "-", "0.03"],
|
||||
["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-", "-", "0.03"],
|
||||
["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-", "0.04"],
|
||||
["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66", "-", "0.2"],
|
||||
["Manipur", "2.17", "1.61", "-", "0.26", "0.29", "0.08", "-", "-", "-",
|
||||
"0.09"],
|
||||
["Meghalaya", "1.36", "1.38", "-", "-", "-0.02", "0.04", "-", "-0.05",
|
||||
"-", "0.03"],
|
||||
["Mizoram", "1.17", "0.46", "-", "0.27", "0.43", "0.11", "-", "-",
|
||||
"-", "0.03"],
|
||||
["Nagaland", "2.99", "2.6", "-", "-", "0.39", "0.24", "-", "-", "-",
|
||||
"0.04"],
|
||||
["Odisha", "34.04", "27.58", "-", "4.4", "2.06", "0.56", "-", "0.66",
|
||||
"-", "0.2"],
|
||||
[
|
||||
"Punjab",
|
||||
"19.18",
|
||||
|
|
@ -2295,8 +2318,10 @@ data_stream_flag_size = [
|
|||
"0.81",
|
||||
],
|
||||
["Sikkim", "0.16", "-", "-", "-", "0.16", "0.03", "-", "-", "-", "0.01"],
|
||||
["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-", "0.68"],
|
||||
["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-", "0.02"],
|
||||
["Tamil Nadu", "34.11", "31.41", "-", "-", "2.7", "1.3", "-", "0.6", "-",
|
||||
"0.68"],
|
||||
["Tripura", "2.3", "1.89", "-", "-", "0.41", "0.41", "-", "-0.05", "-",
|
||||
"0.02"],
|
||||
["Uttaranchal", "-", "-", "-", "-", "-", "-", "-", "-", "-", "-"],
|
||||
[
|
||||
"Uttar Pradesh",
|
||||
|
|
@ -2393,11 +2418,13 @@ data_stream_edge_tol = [
|
|||
["Costs", "(0.21)"],
|
||||
["T\notal investment result per unit", "3.78"],
|
||||
[
|
||||
"1 The results cover the period from inception of the Fund at 8 April 2016 through 31 December 2016.",
|
||||
"1 The results cover the period from inception of the Fund at "
|
||||
"8 April 2016 through 31 December 2016.",
|
||||
"",
|
||||
],
|
||||
[
|
||||
"2 The result per unit is calculated using the total number of outstanding unit as per the end of the",
|
||||
"2 The result per unit is calculated using the total number of "
|
||||
"outstanding unit as per the end of the",
|
||||
"",
|
||||
],
|
||||
["period.", ""],
|
||||
|
|
@ -2454,7 +2481,8 @@ data_lattice_table_rotated = [
|
|||
"Men",
|
||||
"Women",
|
||||
],
|
||||
["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645", "2391"],
|
||||
["Kerala", "5738", "6633", "8864", "8297", "245", "2161", "3195", "1645",
|
||||
"2391"],
|
||||
[
|
||||
"Tamil Nadu",
|
||||
"7387",
|
||||
|
|
@ -2503,11 +2531,16 @@ data_lattice_table_rotated = [
|
|||
"1417",
|
||||
"1599",
|
||||
],
|
||||
["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122", "2503"],
|
||||
["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579", "1709"],
|
||||
["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093", "1628"],
|
||||
["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413", "2027"],
|
||||
["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185", "1366"],
|
||||
["Gujarat", "4403", "5374", "4866", "9645", "477", "2687", "3021", "2122",
|
||||
"2503"],
|
||||
["Madhya Pradesh", "*", "*", "*", "7942", "470", "1965", "2150", "1579",
|
||||
"1709"],
|
||||
["Orissa", "3756", "5540", "12024", "8473", "398", "2040", "2624", "1093",
|
||||
"1628"],
|
||||
["West Bengal", "*", "*", "*", "8047", "423", "2058", "2743", "1413",
|
||||
"2027"],
|
||||
["Uttar Pradesh", "*", "*", "*", "9860", "581", "2139", "2415", "1185",
|
||||
"1366"],
|
||||
[
|
||||
"Pooled",
|
||||
"38742",
|
||||
|
|
@ -2573,7 +2606,8 @@ data_lattice_two_tables_2 = [
|
|||
]
|
||||
|
||||
data_lattice_table_regions = [
|
||||
["Età dell’Assicurato \nall’epoca del decesso", "Misura % di \nmaggiorazione"],
|
||||
["Età dell’Assicurato \nall’epoca del decesso",
|
||||
"Misura % di \nmaggiorazione"],
|
||||
["18-75", "1,00%"],
|
||||
["76-80", "0,50%"],
|
||||
["81 in poi", "0,10%"],
|
||||
|
|
@ -2596,10 +2630,12 @@ data_lattice_table_areas = [
|
|||
["Kerala", "2400", "7.2", "0.5", "25.3", "20.1", "41.5", "5.5", ""],
|
||||
["Tamil Nadu", "2400", "21.4", "2.3", "8.8", "35.5", "25.8", "6.2", ""],
|
||||
["Karnataka", "2399", "37.4", "2.8", "12.5", "18.3", "23.1", "5.8", ""],
|
||||
["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9", ""],
|
||||
["Andhra Pradesh", "2400", "54.0", "1.7", "8.4", "13.2", "18.8", "3.9",
|
||||
""],
|
||||
["Maharashtra", "2400", "22.0", "0.9", "17.3", "20.3", "32.6", "7.0", ""],
|
||||
["Gujarat", "2390", "28.6", "0.1", "14.4", "23.1", "26.9", "6.8", ""],
|
||||
["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6", ""],
|
||||
["Madhya Pradesh", "2402", "29.1", "3.4", "8.5", "35.1", "13.3", "10.6",
|
||||
""],
|
||||
["Orissa", "2405", "33.2", "1.0", "10.4", "25.7", "21.2", "8.5", ""],
|
||||
["West Bengal", "2293", "41.7", "4.4", "13.2", "17.1", "21.2", "2.4", ""],
|
||||
["Uttar Pradesh", "2400", "35.3", "2.1", "4.5", "23.3", "27.1", "7.6", ""],
|
||||
|
|
@ -2650,7 +2686,8 @@ data_lattice_process_background = [
|
|||
"3,658",
|
||||
"3,183",
|
||||
],
|
||||
["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173", "855"],
|
||||
["Kerala", "23.2.2010 to \n11.3.2010", "9", "17", "1.42", "3,559", "2,173",
|
||||
"855"],
|
||||
["Total", "", "47", "92", "11.81", "22,455", "19,584", "10,644"],
|
||||
]
|
||||
|
||||
|
|
@ -2689,7 +2726,8 @@ data_lattice_copy_text = [
|
|||
["COHS", "San Mateo", "Health Plan of San Mateo", "113,202"],
|
||||
["COHS", "Ventura", "Gold Coast Health Plan", "202,217"],
|
||||
["COHS", "Total COHS Enrollment", "", "2,176,064"],
|
||||
["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "", "10,132,022"],
|
||||
["Subtotal for Two-Plan, Regional Model, GMC and COHS", "", "",
|
||||
"10,132,022"],
|
||||
["PCCM", "Los Angeles", "AIDS Healthcare Foundation", "828"],
|
||||
["PCCM", "San Francisco", "Family Mosaic", "25"],
|
||||
["PCCM", "Total PHP Enrollment", "", "853"],
|
||||
|
|
@ -2721,7 +2759,8 @@ data_lattice_shift_text_left_top = [
|
|||
],
|
||||
["Blood Pressure #", "2400", "Men (≥ 18yrs)", "10%", "95%", "20%", "1728"],
|
||||
["", "", "Women (≥ 18 yrs)", "", "", "", "1728"],
|
||||
["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
||||
["Fasting blood glucose", "2400", "Men (≥ 18 yrs)", "5%", "95%", "20%",
|
||||
"1825"],
|
||||
["", "", "Women (≥ 18 yrs)", "", "", "", "1825"],
|
||||
[
|
||||
"Knowledge &\nPractices on HTN &\nDM",
|
||||
|
|
@ -2746,7 +2785,8 @@ data_lattice_shift_text_disable = [
|
|||
"Sample size\nper State",
|
||||
],
|
||||
["Anthropometry", "", "", "", "", "", ""],
|
||||
["Clinical Examination", "2400", "", "All the available individuals", "", "", ""],
|
||||
["Clinical Examination", "2400", "", "All the available individuals",
|
||||
"", "", ""],
|
||||
["History of morbidity", "", "", "", "", "", ""],
|
||||
[
|
||||
"Diet survey",
|
||||
|
|
@ -2758,9 +2798,11 @@ data_lattice_shift_text_disable = [
|
|||
"",
|
||||
],
|
||||
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
|
||||
"1728"],
|
||||
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
|
||||
"1825"],
|
||||
[
|
||||
"Knowledge &\nPractices on HTN &",
|
||||
"2400",
|
||||
|
|
@ -2785,7 +2827,8 @@ data_lattice_shift_text_right_bottom = [
|
|||
],
|
||||
["Anthropometry", "", "", "", "", "", ""],
|
||||
["Clinical Examination", "", "", "", "", "", ""],
|
||||
["History of morbidity", "2400", "", "", "", "", "All the available individuals"],
|
||||
["History of morbidity", "2400", "", "", "", "",
|
||||
"All the available individuals"],
|
||||
[
|
||||
"Diet survey",
|
||||
"1200",
|
||||
|
|
@ -2796,9 +2839,11 @@ data_lattice_shift_text_right_bottom = [
|
|||
"All the individuals partaking meals in the HH",
|
||||
],
|
||||
["", "", "Men (≥ 18yrs)", "", "", "", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%", "1728"],
|
||||
["Blood Pressure #", "2400", "Women (≥ 18 yrs)", "10%", "95%", "20%",
|
||||
"1728"],
|
||||
["", "", "Men (≥ 18 yrs)", "", "", "", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%", "1825"],
|
||||
["Fasting blood glucose", "2400", "Women (≥ 18 yrs)", "5%", "95%", "20%",
|
||||
"1825"],
|
||||
["", "2400", "Men (≥ 18 yrs)", "-", "-", "-", "1728"],
|
||||
[
|
||||
"Knowledge &\nPractices on HTN &\nDM",
|
||||
|
|
@ -2820,7 +2865,7 @@ data_arabic = [
|
|||
]
|
||||
|
||||
data_stream_layout_kwargs = [
|
||||
["V i n s a u Ve r r e", ""],
|
||||
["V i n s a u V e r r e", ""],
|
||||
["Les Blancs", "12.5CL"],
|
||||
["A.O.P Côtes du Rhône", ""],
|
||||
["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"],
|
||||
|
|
|
|||
|
Before Width: | Height: | Size: 8.2 KiB After Width: | Height: | Size: 8.2 KiB |
|
Before Width: | Height: | Size: 48 KiB After Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 6.7 KiB After Width: | Height: | Size: 6.7 KiB |
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 9.7 KiB |
|
Before Width: | Height: | Size: 8.8 KiB After Width: | Height: | Size: 8.9 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 19 KiB |
|
|
@ -19,10 +19,16 @@ def test_help_output():
|
|||
output = result.output
|
||||
|
||||
assert prog_name == "camelot"
|
||||
assert result.output.startswith("Usage: %(prog_name)s [OPTIONS] COMMAND" % locals())
|
||||
assert result.output.startswith(
|
||||
"Usage: %(prog_name)s [OPTIONS] COMMAND" %
|
||||
locals()
|
||||
)
|
||||
assert all(
|
||||
v in result.output
|
||||
for v in ["Options:", "--version", "--help", "Commands:", "lattice", "stream"]
|
||||
for v in [
|
||||
"Options:", "--version", "--help", "Commands:", "lattice",
|
||||
"stream"
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -120,21 +126,24 @@ def test_cli_output_format():
|
|||
# json
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "json", "--output", outfile.format("json"), "stream", infile],
|
||||
["--format", "json", "--output", outfile.format("json"), "stream",
|
||||
infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# excel
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream", infile],
|
||||
["--format", "excel", "--output", outfile.format("xlsx"), "stream",
|
||||
infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# html
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
["--format", "html", "--output", outfile.format("html"), "stream", infile],
|
||||
["--format", "html", "--output", outfile.format("html"), "stream",
|
||||
infile],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
|
@ -166,6 +175,10 @@ def test_cli_quiet():
|
|||
assert "No tables found on page-1" in result.output
|
||||
|
||||
result = runner.invoke(
|
||||
cli, ["--quiet", "--format", "csv", "--output", outfile, "stream", infile]
|
||||
cli,
|
||||
[
|
||||
"--quiet", "--format", "csv", "--output", outfile, "stream",
|
||||
infile
|
||||
]
|
||||
)
|
||||
assert "No tables found on page-1" not in result.output
|
||||
|
|
|
|||
|
|
@ -3,18 +3,23 @@
|
|||
import os
|
||||
|
||||
import pandas as pd
|
||||
from pandas.testing import assert_frame_equal
|
||||
|
||||
import camelot
|
||||
from camelot.core import Table, TableList
|
||||
from camelot.__version__ import generate_version
|
||||
|
||||
from .data import *
|
||||
|
||||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
|
||||
|
||||
def test_parsing_report():
|
||||
parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
|
||||
parsing_report = {
|
||||
"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1
|
||||
}
|
||||
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename)
|
||||
|
|
@ -26,10 +31,12 @@ def test_password():
|
|||
|
||||
filename = os.path.join(testdir, "health_protected.pdf")
|
||||
tables = camelot.read_pdf(filename, password="ownerpass", flavor="stream")
|
||||
assert df.equals(tables[0].df)
|
||||
assert len(tables) == 1
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
tables = camelot.read_pdf(filename, password="userpass", flavor="stream")
|
||||
assert df.equals(tables[0].df)
|
||||
assert len(tables) == 1
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream():
|
||||
|
|
@ -37,7 +44,7 @@ def test_stream():
|
|||
|
||||
filename = os.path.join(testdir, "health.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_table_rotated():
|
||||
|
|
@ -45,11 +52,11 @@ def test_stream_table_rotated():
|
|||
|
||||
filename = os.path.join(testdir, "clockwise_table_2.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_two_tables():
|
||||
|
|
@ -71,7 +78,7 @@ def test_stream_table_regions():
|
|||
tables = camelot.read_pdf(
|
||||
filename, flavor="stream", table_regions=["320,460,573,335"]
|
||||
)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_table_areas():
|
||||
|
|
@ -81,7 +88,7 @@ def test_stream_table_areas():
|
|||
tables = camelot.read_pdf(
|
||||
filename, flavor="stream", table_areas=["320,500,573,335"]
|
||||
)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_columns():
|
||||
|
|
@ -91,7 +98,7 @@ def test_stream_columns():
|
|||
tables = camelot.read_pdf(
|
||||
filename, flavor="stream", columns=["67,180,230,425,475"], row_tol=10
|
||||
)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_split_text():
|
||||
|
|
@ -104,7 +111,7 @@ def test_stream_split_text():
|
|||
columns=["72,95,209,327,442,529,566,606,683"],
|
||||
split_text=True,
|
||||
)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_flag_size():
|
||||
|
|
@ -112,7 +119,7 @@ def test_stream_flag_size():
|
|||
|
||||
filename = os.path.join(testdir, "superscript.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream", flag_size=True)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_strip_text():
|
||||
|
|
@ -120,7 +127,7 @@ def test_stream_strip_text():
|
|||
|
||||
filename = os.path.join(testdir, "detect_vertical_false.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream", strip_text=" ,\n")
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_edge_tol():
|
||||
|
|
@ -128,7 +135,7 @@ def test_stream_edge_tol():
|
|||
|
||||
filename = os.path.join(testdir, "edge_tol.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream", edge_tol=500)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_stream_layout_kwargs():
|
||||
|
|
@ -138,7 +145,7 @@ def test_stream_layout_kwargs():
|
|||
tables = camelot.read_pdf(
|
||||
filename, flavor="stream", layout_kwargs={"detect_vertical": False}
|
||||
)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_lattice():
|
||||
|
|
@ -148,7 +155,7 @@ def test_lattice():
|
|||
testdir, "tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf"
|
||||
)
|
||||
tables = camelot.read_pdf(filename, pages="2")
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_lattice_table_rotated():
|
||||
|
|
@ -156,11 +163,11 @@ def test_lattice_table_rotated():
|
|||
|
||||
filename = os.path.join(testdir, "clockwise_table_1.pdf")
|
||||
tables = camelot.read_pdf(filename)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
|
||||
tables = camelot.read_pdf(filename)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_lattice_two_tables():
|
||||
|
|
@ -179,7 +186,7 @@ def test_lattice_table_regions():
|
|||
|
||||
filename = os.path.join(testdir, "table_region.pdf")
|
||||
tables = camelot.read_pdf(filename, table_regions=["170,370,560,270"])
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_lattice_table_areas():
|
||||
|
|
@ -187,7 +194,7 @@ def test_lattice_table_areas():
|
|||
|
||||
filename = os.path.join(testdir, "twotables_2.pdf")
|
||||
tables = camelot.read_pdf(filename, table_areas=["80,693,535,448"])
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_lattice_process_background():
|
||||
|
|
@ -195,7 +202,7 @@ def test_lattice_process_background():
|
|||
|
||||
filename = os.path.join(testdir, "background_lines_1.pdf")
|
||||
tables = camelot.read_pdf(filename, process_background=True)
|
||||
assert df.equals(tables[1].df)
|
||||
assert_frame_equal(df, tables[1].df)
|
||||
|
||||
|
||||
def test_lattice_copy_text():
|
||||
|
|
@ -203,7 +210,7 @@ def test_lattice_copy_text():
|
|||
|
||||
filename = os.path.join(testdir, "row_span_1.pdf")
|
||||
tables = camelot.read_pdf(filename, line_scale=60, copy_text="v")
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_lattice_shift_text():
|
||||
|
|
@ -227,9 +234,9 @@ def test_repr():
|
|||
tables = camelot.read_pdf(filename)
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
assert \
|
||||
repr(tables[0].cells[0][0]) == \
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
|
||||
|
||||
def test_pages():
|
||||
|
|
@ -237,22 +244,23 @@ def test_pages():
|
|||
tables = camelot.read_pdf(url)
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
assert \
|
||||
repr(tables[0].cells[0][0]) == \
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
|
||||
tables = camelot.read_pdf(url, pages="1-end")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
assert \
|
||||
repr(tables[0].cells[0][0]) == \
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
|
||||
tables = camelot.read_pdf(url, pages="all")
|
||||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
repr(tables[0].cells[0][0]) ==
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -262,7 +270,8 @@ def test_url():
|
|||
assert repr(tables) == "<TableList n=1>"
|
||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||
assert (
|
||||
repr(tables[0].cells[0][0]) == "<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
repr(tables[0].cells[0][0]) ==
|
||||
"<Cell x1=120.48 y1=218.43 x2=164.64 y2=233.77>"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -271,7 +280,7 @@ def test_arabic():
|
|||
|
||||
filename = os.path.join(testdir, "tabula/arabic.pdf")
|
||||
tables = camelot.read_pdf(filename)
|
||||
assert df.equals(tables[0].df)
|
||||
assert_frame_equal(df, tables[0].df)
|
||||
|
||||
|
||||
def test_table_order():
|
||||
|
|
@ -282,7 +291,12 @@ def test_table_order():
|
|||
return t
|
||||
|
||||
table_list = TableList(
|
||||
[_make_table(2, 1), _make_table(1, 1), _make_table(3, 4), _make_table(1, 2)]
|
||||
[
|
||||
_make_table(2, 1),
|
||||
_make_table(1, 1),
|
||||
_make_table(3, 4),
|
||||
_make_table(1, 2)
|
||||
]
|
||||
)
|
||||
|
||||
assert [(t.page, t.order) for t in sorted(table_list)] == [
|
||||
|
|
@ -297,3 +311,18 @@ def test_table_order():
|
|||
(1, 2),
|
||||
(1, 1),
|
||||
]
|
||||
|
||||
|
||||
def test_version_generation():
|
||||
version = (0, 7, 3)
|
||||
assert generate_version(version, prerelease=None, revision=None) == "0.7.3"
|
||||
|
||||
|
||||
def test_version_generation_with_prerelease_revision():
|
||||
version = (0, 7, 3)
|
||||
prerelease = "alpha"
|
||||
revision = 2
|
||||
assert (
|
||||
generate_version(version, prerelease=prerelease, revision=revision)
|
||||
== "0.7.3-alpha.2"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,13 +4,30 @@ import os
|
|||
|
||||
import pytest
|
||||
|
||||
import matplotlib
|
||||
|
||||
import camelot
|
||||
|
||||
# The version of Matplotlib has an impact on some of the tests. Unfortunately,
|
||||
# we can't enforce usage of a recent version of MatplotLib without dropping
|
||||
# support for Python 3.6.
|
||||
# To check the version of matplotlib installed:
|
||||
# pip freeze | grep matplotlib
|
||||
# To force upgrade:
|
||||
# pip install --upgrade --force-reinstall matplotlib
|
||||
# To force usage of a Python 3.6 compatible version:
|
||||
# pip install "matplotlib==2.2.5"
|
||||
# This condition can be removed in favor of a version requirement bump for
|
||||
# matplotlib once support for Python 3.5 is dropped.
|
||||
|
||||
LEGACY_MATPLOTLIB = matplotlib.__version__ < "3.2.1"
|
||||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_text_plot():
|
||||
|
|
@ -26,6 +43,15 @@ def test_grid_plot():
|
|||
tables = camelot.read_pdf(filename)
|
||||
return camelot.plot(tables[0], kind='grid')
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_stream_grid_plot():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
return camelot.plot(tables[0], kind='grid')
|
||||
|
||||
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
|
|
@ -35,6 +61,8 @@ def test_lattice_contour_plot():
|
|||
return camelot.plot(tables[0], kind='contour')
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_stream_contour_plot():
|
||||
|
|
@ -51,6 +79,8 @@ def test_line_plot():
|
|||
return camelot.plot(tables[0], kind='line')
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_joint_plot():
|
||||
|
|
@ -59,6 +89,8 @@ def test_joint_plot():
|
|||
return camelot.plot(tables[0], kind='joint')
|
||||
|
||||
|
||||
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
|
||||
reason="depends on a recent version of MatPlotLib")
|
||||
@pytest.mark.mpl_image_compare(
|
||||
baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_textedge_plot():
|
||||
|
|
|
|||