Add flavors

pull/2/head
Vinayak Mehta 2018-09-23 10:53:32 +05:30
parent 4a30c5a514
commit 3170a9689f
11 changed files with 207 additions and 305 deletions

View File

@ -1,4 +1,3 @@
from .__version__ import __version__
from .io import read_pdf
from .plotting import plot_geometry

View File

@ -5,18 +5,9 @@ import click
from . import __version__
from .io import read_pdf
from .plotting import plot_geometry
from .utils import validate_input, remove_extra
class Mutex(click.Option):
def handle_parse_result(self, ctx, opts, args):
mesh = opts.get('mesh', False)
geometry_type = opts.get('geometry_type', False)
validate_input(opts, mesh=mesh, geometry_type=geometry_type)
return super(Mutex, self).handle_parse_result(ctx, opts, args)
@click.command()
@click.version_option(version=__version__)
@click.option("-p", "--pages", default="1", help="Comma-separated page numbers"
@ -27,8 +18,6 @@ class Mutex(click.Option):
help="Output file format.")
@click.option("-z", "--zip", is_flag=True, help="Whether or not to create a ZIP"
" archive.")
@click.option("-m", "--mesh", is_flag=True, help="Whether or not to"
" use Lattice method of parsing. Stream is used by default.")
@click.option("-T", "--table_area", default=[], multiple=True,
help="Table areas (x1,y1,x2,y2) to process.\n"
" x1, y1 -> left-top and x2, y2 -> right-bottom")
@ -39,12 +28,44 @@ class Mutex(click.Option):
" super/subscripts)")
@click.option("-M", "--margins", nargs=3, default=(1.0, 0.5, 0.1),
help="char_margin, line_margin, word_margin for PDFMiner.")
@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
help="x-coordinates of column separators.")
@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
" formed by combining text vertically within this tolerance.")
@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
" be formed by combining text horizontally within this tolerance.")
@click.option("-G", "--geometry_type",
type=click.Choice(["text", "table", "contour", "joint", "line"]),
help="Plot geometry found on pdf page for debugging.\n\n"
"text: Plot text objects. (Useful to get table_area and"
" columns coordinates)\ntable: Plot parsed table.\n"
"contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
" intersections.\nline (with --mesh): Plot detected lines.")
@click.argument("filepath", type=click.Path(exists=True))
def cli(*args, **kwargs):
pages = kwargs.pop("pages")
output = kwargs.pop("output")
f = kwargs.pop("format")
compress = kwargs.pop("zip")
mesh = kwargs.pop("mesh")
geometry_type = kwargs.pop("geometry_type")
filepath = kwargs.pop("filepath")
table_area = list(kwargs['table_area'])
kwargs['table_area'] = None if not table_area else table_area
columns = list(kwargs['columns'])
kwargs['columns'] = None if not columns else columns
copy_text = list(kwargs['copy_text'])
kwargs['copy_text'] = None if not copy_text else copy_text
kwargs['shift_text'] = list(kwargs['shift_text'])
kwargs = remove_extra(kwargs, mesh=mesh)
tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
click.echo(tables)
if output is None:
raise click.UsageError("Please specify an output filepath using --output")
if f is None:
raise click.UsageError("Please specify an output format using --format")
tables.export(output, f=f, compress=compress)
@click.option("-T", "--table_area", default=[], multiple=True,
help="Table areas (x1,y1,x2,y2) to process.\n"
" x1, y1 -> left-top and x2, y2 -> right-bottom")
@click.option("-back", "--process_background", is_flag=True, cls=Mutex,
help="(with --mesh) Whether or not to process lines that are in"
" background.")
@ -75,40 +96,18 @@ class Mutex(click.Option):
@click.option("-I", "--iterations", default=0, cls=Mutex,
help="(with --mesh) Number of times for erosion/dilation is"
" applied.")
@click.option("-G", "--geometry_type",
type=click.Choice(["text", "table", "contour", "joint", "line"]),
help="Plot geometry found on pdf page for debugging.\n\n"
"text: Plot text objects. (Useful to get table_area and"
" columns coordinates)\ntable: Plot parsed table.\n"
"contour (with --mesh): Plot detected rectangles.\njoint (with --mesh): Plot detected line"
" intersections.\nline (with --mesh): Plot detected lines.")
@click.argument("filepath", type=click.Path(exists=True))
def cli(*args, **kwargs):
pages = kwargs.pop("pages")
output = kwargs.pop("output")
f = kwargs.pop("format")
compress = kwargs.pop("zip")
mesh = kwargs.pop("mesh")
geometry_type = kwargs.pop("geometry_type")
filepath = kwargs.pop("filepath")
def lattice(*args, **kwargs):
pass
table_area = list(kwargs['table_area'])
kwargs['table_area'] = None if not table_area else table_area
columns = list(kwargs['columns'])
kwargs['columns'] = None if not columns else columns
copy_text = list(kwargs['copy_text'])
kwargs['copy_text'] = None if not copy_text else copy_text
kwargs['shift_text'] = list(kwargs['shift_text'])
kwargs = remove_extra(kwargs, mesh=mesh)
if geometry_type is None:
tables = read_pdf(filepath, pages=pages, mesh=mesh, **kwargs)
click.echo(tables)
if output is None:
raise click.UsageError("Please specify an output filepath using --output")
if f is None:
raise click.UsageError("Please specify an output format using --format")
tables.export(output, f=f, compress=compress)
else:
plot_geometry(filepath, pages=pages, mesh=mesh,
geometry_type=geometry_type, **kwargs)
@click.option("-T", "--table_area", default=[], multiple=True,
help="Table areas (x1,y1,x2,y2) to process.\n"
" x1, y1 -> left-top and x2, y2 -> right-bottom")
@click.option("-C", "--columns", default=[], multiple=True, cls=Mutex,
help="x-coordinates of column separators.")
@click.option("-r", "--row_close_tol", default=2, cls=Mutex, help="Rows will be"
" formed by combining text vertically within this tolerance.")
@click.option("-c", "--col_close_tol", default=0, cls=Mutex, help="Columns will"
" be formed by combining text horizontally within this tolerance.")
def stream(*args, **kwargs):
pass

View File

@ -6,6 +6,8 @@ import tempfile
import numpy as np
import pandas as pd
from .plotting import *
class Cell(object):
"""Defines a cell in a table with coordinates relative to a
@ -318,6 +320,32 @@ class Table(object):
cell.hspan = True
return self
def plot(self, geometry_type):
"""Plot geometry found on PDF page based on geometry_type
specified, useful for debugging and playing with different
parameters to get the best output.
Parameters
----------
geometry_type : str
The geometry type for which a plot should be generated.
Can be 'text', 'table', 'contour', 'joint', 'line'
"""
if self.flavor == 'stream' and geometry_type in ['contour', 'joint', 'line']:
raise NotImplementedError("{} cannot be plotted with flavor='stream'")
if geometry_type == 'text':
plot_text(self._text)
elif geometry_type == 'table':
plot_table(self)
elif geometry_type == 'contour':
plot_contour(self._image)
elif geometry_type == 'joint':
plot_joint(self._image)
elif geometry_type == 'line':
plot_line(self._segments)
def to_csv(self, path, **kwargs):
"""Writes Table to a comma-separated values (csv) file.
@ -489,35 +517,3 @@ class TableList(object):
zipname = os.path.join(os.path.dirname(path), root) + '.zip'
with zipfile.ZipFile(zipname, 'w', allowZip64=True) as z:
z.write(filepath, os.path.basename(filepath))
class Geometry(object):
def __init__(self):
self.text = []
self.images = ()
self.segments = ()
self.tables = []
def __repr__(self):
return '<{} text={} images={} segments={} tables={}>'.format(
self.__class__.__name__,
len(self.text),
len(self.images),
len(self.segments),
len(self.tables))
class GeometryList(object):
def __init__(self, geometry):
self.text = [g.text for g in geometry]
self.images = [g.images for g in geometry]
self.segments = [g.segments for g in geometry]
self.tables = [g.tables for g in geometry]
def __repr__(self):
return '<{} text={} images={} segments={} tables={}>'.format(
self.__class__.__name__,
len(self.text),
len(self.images),
len(self.segments),
len(self.tables))

View File

@ -2,7 +2,7 @@ import os
from PyPDF2 import PdfFileReader, PdfFileWriter
from .core import TableList, GeometryList
from .core import TableList
from .parsers import Stream, Lattice
from .utils import (TemporaryDirectory, get_page_layout, get_text_objects,
get_rotation)
@ -17,7 +17,7 @@ class PDFHandler(object):
----------
filename : str
Path to pdf file.
pages : str
pages : str, optional (default: '1')
Comma-separated page numbers to parse.
Example: 1,3,4 or 1,4-end
@ -35,7 +35,7 @@ class PDFHandler(object):
----------
filename : str
Path to pdf file.
pages : str
pages : str, optional (default: '1')
Comma-separated page numbers to parse.
Example: 1,3,4 or 1,4-end
@ -112,15 +112,15 @@ class PDFHandler(object):
with open(fpath, 'wb') as f:
outfile.write(f)
def parse(self, mesh=False, **kwargs):
def parse(self, flavor='lattice', **kwargs):
"""Extracts tables by calling parser.get_tables on all single
page pdfs.
Parameters
----------
mesh : bool (default: False)
Whether or not to use Lattice method of parsing. Stream
is used by default.
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
kwargs : dict
See camelot.read_pdf kwargs.
@ -134,15 +134,13 @@ class PDFHandler(object):
"""
tables = []
geometry = []
with TemporaryDirectory() as tempdir:
for p in self.pages:
self._save_page(self.filename, p, tempdir)
pages = [os.path.join(tempdir, 'page-{0}.pdf'.format(p))
for p in self.pages]
parser = Stream(**kwargs) if not mesh else Lattice(**kwargs)
parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
for p in pages:
t, g = parser.extract_tables(p)
t = parser.extract_tables(p)
tables.extend(t)
geometry.append(g)
return TableList(tables), GeometryList(geometry)
return TableList(tables)

View File

@ -2,22 +2,22 @@ from .handlers import PDFHandler
from .utils import validate_input, remove_extra
def read_pdf(filepath, pages='1', mesh=False, **kwargs):
def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
"""Read PDF and return parsed data tables.
Note: kwargs annotated with ^ can only be used with mesh=False
and kwargs annotated with * can only be used with mesh=True.
Note: kwargs annotated with ^ can only be used with flavor='stream'
and kwargs annotated with * can only be used with flavor='lattice'.
Parameters
----------
filepath : str
Path to pdf file.
pages : str
pages : str, optional (default: '1')
Comma-separated page numbers to parse.
Example: 1,3,4 or 1,4-end
mesh : bool (default: False)
Whether or not to use Lattice method of parsing. Stream
is used by default.
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
table_area : list, optional (default: None)
List of table areas to process as strings of the form
x1,y1,x2,y2 where (x1, y1) -> left-top and
@ -85,8 +85,8 @@ def read_pdf(filepath, pages='1', mesh=False, **kwargs):
tables : camelot.core.TableList
"""
validate_input(kwargs, mesh=mesh)
validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, mesh=mesh)
tables, __ = p.parse(mesh=mesh, **kwargs)
kwargs = remove_extra(kwargs, flavor=flavor)
tables, __ = p.parse(flavor=flavor, **kwargs)
return tables

View File

@ -194,7 +194,8 @@ class Lattice(BaseParser):
stderr=subprocess.STDOUT)
def _generate_table_bbox(self):
self.image, self.threshold = adaptive_threshold(self.imagename, process_background=self.process_background,
self.image, self.threshold = adaptive_threshold(
self.imagename, process_background=self.process_background,
blocksize=self.threshold_blocksize, c=self.threshold_constant)
image_width = self.image.shape[1]
image_height = self.image.shape[0]
@ -297,11 +298,20 @@ class Lattice(BaseParser):
table.shape = table.df.shape
whitespace = compute_whitespace(data)
table.flavor = 'lattice'
table.accuracy = accuracy
table.whitespace = whitespace
table.order = table_idx + 1
table.page = int(os.path.basename(self.rootname).replace('page-', ''))
# for plotting
_text = []
_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
table._text = _text
table._image = (self.image, self.table_bbox_unscaled)
table._segments = (self.vertical_segments, self.horizontal_segments)
return table
def extract_tables(self, filename):
@ -311,7 +321,7 @@ class Lattice(BaseParser):
if not self.horizontal_text:
logger.info("No tables found on {}".format(
os.path.basename(self.rootname)))
return [], self.g
return []
self._generate_image()
self._generate_table_bbox()
@ -324,13 +334,4 @@ class Lattice(BaseParser):
table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
_tables.append(table)
if self.debug:
text = []
text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
self.g.text = text
self.g.images = (self.image, self.table_bbox_unscaled)
self.g.segments = (self.vertical_segments, self.horizontal_segments)
self.g.tables = _tables
return _tables, self.g
return _tables

View File

@ -333,11 +333,20 @@ class Stream(BaseParser):
table.shape = table.df.shape
whitespace = compute_whitespace(data)
table.flavor = 'stream'
table.accuracy = accuracy
table.whitespace = whitespace
table.order = table_idx + 1
table.page = int(os.path.basename(self.rootname).replace('page-', ''))
# for plotting
_text = []
_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
_text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
table._text = _text
table._image = None
table._segments = None
return table
def extract_tables(self, filename):
@ -347,7 +356,7 @@ class Stream(BaseParser):
if not self.horizontal_text:
logger.info("No tables found on {}".format(
os.path.basename(self.rootname)))
return [], self.g
return []
self._generate_table_bbox()
@ -359,11 +368,4 @@ class Stream(BaseParser):
table = self._generate_table(table_idx, cols, rows)
_tables.append(table)
if self.debug:
text = []
text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
self.g.text = text
self.g.tables = _tables
return _tables, self.g
return _tables

View File

@ -2,107 +2,8 @@ import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from .handlers import PDFHandler
from .utils import validate_input, remove_extra
def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs):
"""Plot geometry found on pdf page based on type specified,
useful for debugging and playing with different parameters to get
the best output.
Note: kwargs annotated with ^ can only be used with mesh=False
and kwargs annotated with * can only be used with mesh=True.
Parameters
----------
filepath : str
Path to pdf file.
pages : str
Comma-separated page numbers to parse.
Example: 1,3,4 or 1,4-end
mesh : bool (default: False)
Whether or not to use Lattice method of parsing. Stream
is used by default.
geometry_type : str, optional (default: None)
* 'text' : Plot text objects found on page. (Useful to get \
table_area and columns coordinates)
* 'table' : Plot parsed table.
* 'contour'* : Plot detected rectangles.
* 'joint'* : Plot detected line intersections.
* 'line'* : Plot detected lines.
table_area : list, optional (default: None)
List of table areas to process as strings of the form
x1,y1,x2,y2 where (x1, y1) -> left-top and
(x2, y2) -> right-bottom in pdf coordinate space.
columns^ : list, optional (default: None)
List of column x-coordinates as strings where the coordinates
are comma-separated.
split_text : bool, optional (default: False)
Whether or not to split a text line if it spans across
multiple cells.
flag_size : bool, optional (default: False)
Whether or not to highlight a substring using <s></s>
if its size is different from rest of the string. (Useful for
super and subscripts.)
row_close_tol^ : int, optional (default: 2)
Rows will be formed by combining text vertically
within this tolerance.
col_close_tol^ : int, optional (default: 0)
Columns will be formed by combining text horizontally
within this tolerance.
process_background* : bool, optional (default: False)
Whether or not to process lines that are in background.
line_size_scaling* : int, optional (default: 15)
Factor by which the page dimensions will be divided to get
smallest length of lines that should be detected.
The larger this value, smaller the detected lines. Making it
too large will lead to text being detected as lines.
copy_text* : list, optional (default: None)
{'h', 'v'}
Select one or more strings from above and pass them as a list
to specify the direction in which text should be copied over
when a cell spans multiple rows or columns.
shift_text* : list, optional (default: ['l', 't'])
{'l', 'r', 't', 'b'}
Select one or more strings from above and pass them as a list
to specify where the text in a spanning cell should flow.
line_close_tol* : int, optional (default: 2)
Tolerance parameter used to merge vertical and horizontal
detected lines which lie close to each other.
joint_close_tol* : int, optional (default: 2)
Tolerance parameter used to decide whether the detected lines
and points lie close to each other.
threshold_blocksize* : int, optional (default: 15)
Size of a pixel neighborhood that is used to calculate a
threshold value for the pixel: 3, 5, 7, and so on.
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
threshold_constant* : int, optional (default: -2)
Constant subtracted from the mean or weighted mean.
Normally, it is positive but may be zero or negative as well.
For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
iterations* : int, optional (default: 0)
Number of times for erosion/dilation is applied.
For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
margins : tuple
PDFMiner margins. (char_margin, line_margin, word_margin)
For more information, refer `PDFMiner docs <https://euske.github.io/pdfminer/>`_.
"""
validate_input(kwargs, mesh=mesh, geometry_type=geometry_type)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, mesh=mesh)
debug = True if geometry_type is not None else False
kwargs.update({'debug': debug})
__, geometry = p.parse(mesh=mesh, **kwargs)
if geometry_type == 'text':
for text in geometry.text:
def plot_text(text):
fig = plt.figure()
ax = fig.add_subplot(111, aspect='equal')
xs, ys = [], []
@ -119,9 +20,9 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs)
ax.set_xlim(min(xs) - 10, max(xs) + 10)
ax.set_ylim(min(ys) - 10, max(ys) + 10)
plt.show()
elif geometry_type == 'table':
for tables in geometry.tables:
for table in tables:
def plot_table(table):
for row in table.cells:
for cell in row:
if cell.left:
@ -137,15 +38,19 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs)
plt.plot([cell.lb[0], cell.rb[0]],
[cell.lb[1], cell.rb[1]])
plt.show()
elif geometry_type == 'contour':
for img, table_bbox in geometry.images:
def plot_contour(image):
img, table_bbox = image
for t in table_bbox.keys():
cv2.rectangle(img, (t[0], t[1]),
(t[2], t[3]), (255, 0, 0), 20)
plt.imshow(img)
plt.show()
elif geometry_type == 'joint':
for img, table_bbox in geometry.images:
def plot_joint(image):
img, table_bbox = image
x_coord = []
y_coord = []
for k in table_bbox.keys():
@ -157,10 +62,12 @@ def plot_geometry(filepath, pages='1', mesh=False, geometry_type=None, **kwargs)
plt.axis([0, max_x + 100, max_y + 100, 0])
plt.imshow(img)
plt.show()
elif geometry_type == 'line':
for v_s, h_s in geometry.segments:
for v in v_s:
def plot_line(segments):
vertical, horizontal = segments
for v in vertical:
plt.plot([v[0], v[2]], [v[1], v[3]])
for h in h_s:
for h in horizontal:
plt.plot([h[0], h[2]], [h[1], h[3]])
plt.show()

View File

@ -38,25 +38,25 @@ lattice_kwargs = [
]
def validate_input(kwargs, mesh=False, geometry_type=False):
def check_intersection(parser_kwargs, input_kwargs, message_bool):
def validate_input(kwargs, flavor='lattice', geometry_type=False):
def check_intersection(parser_kwargs, input_kwargs):
isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
if isec:
raise ValueError("{} can not be used with mesh set to {}".format(
",".join(sorted(isec)), message_bool))
raise ValueError("{} cannot be used with flavor='{}'".format(
",".join(sorted(isec)), flavor))
if mesh:
check_intersection(stream_kwargs, kwargs, True)
if flavor == 'lattice':
check_intersection(stream_kwargs, kwargs)
else:
check_intersection(lattice_kwargs, kwargs, False)
check_intersection(lattice_kwargs, kwargs)
if geometry_type:
if not mesh and geometry_type in ['contour', 'joint', 'line']:
raise ValueError("Use geometry_type={} with mesh set to True".format(
if flavor != 'lattice' and geometry_type in ['contour', 'joint', 'line']:
raise ValueError("Use geometry_type='{}' with flavor='lattice'".format(
geometry_type))
def remove_extra(kwargs, mesh=False):
if mesh:
def remove_extra(kwargs, flavor='lattice'):
if flavor == 'lattice':
for key in kwargs.keys():
if key in stream_kwargs:
kwargs.pop(key)

View File

@ -9,7 +9,7 @@ with open(os.path.join(here, 'camelot', '__version__.py'), 'r') as f:
exec(f.read(), about)
# TODO: Move these to __version__.py
NAME = 'camelot'
NAME = 'camelot-py'
VERSION = about['__version__']
DESCRIPTION = 'PDF Table Parsing for Humans'
with open('README.md') as f:

View File

@ -18,11 +18,11 @@ def test_stream_table_rotated():
df = pd.DataFrame(data_stream_table_rotated)
filename = os.path.join(testdir, "clockwise_table_2.pdf")
tables = camelot.read_pdf(filename)
tables = camelot.read_pdf(filename, flavor="stream")
assert df.equals(tables[0].df)
filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
tables = camelot.read_pdf(filename)
tables = camelot.read_pdf(filename, flavor="stream")
assert df.equals(tables[0].df)
@ -30,7 +30,7 @@ def test_stream_table_area():
df = pd.DataFrame(data_stream_table_area_single)
filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf(filename, table_area=["320,500,573,335"])
tables = camelot.read_pdf(filename, flavor="stream", table_area=["320,500,573,335"])
assert df.equals(tables[0].df)
@ -39,7 +39,7 @@ def test_stream_columns():
filename = os.path.join(testdir, "mexican_towns.pdf")
tables = camelot.read_pdf(
filename, columns=["67,180,230,425,475"], row_close_tol=10)
filename, flavor="stream", columns=["67,180,230,425,475"], row_close_tol=10)
assert df.equals(tables[0].df)
@ -48,7 +48,7 @@ def test_lattice():
filename = os.path.join(testdir,
"tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf")
tables = camelot.read_pdf(filename, pages="2", mesh=True)
tables = camelot.read_pdf(filename, pages="2")
assert df.equals(tables[0].df)
@ -56,11 +56,11 @@ def test_lattice_table_rotated():
df = pd.DataFrame(data_lattice_table_rotated)
filename = os.path.join(testdir, "clockwise_table_1.pdf")
tables = camelot.read_pdf(filename, mesh=True)
tables = camelot.read_pdf(filename)
assert df.equals(tables[0].df)
filename = os.path.join(testdir, "anticlockwise_table_1.pdf")
tables = camelot.read_pdf(filename, mesh=True)
tables = camelot.read_pdf(filename)
assert df.equals(tables[0].df)
@ -68,7 +68,7 @@ def test_lattice_process_background():
df = pd.DataFrame(data_lattice_process_background)
filename = os.path.join(testdir, "background_lines_1.pdf")
tables = camelot.read_pdf(filename, mesh=True, process_background=True)
tables = camelot.read_pdf(filename, process_background=True)
assert df.equals(tables[1].df)
@ -76,5 +76,5 @@ def test_lattice_copy_text():
df = pd.DataFrame(data_lattice_copy_text)
filename = os.path.join(testdir, "row_span_1.pdf")
tables = camelot.read_pdf(filename, mesh=True, line_size_scaling=60, copy_text="v")
tables = camelot.read_pdf(filename, line_size_scaling=60, copy_text="v")
assert df.equals(tables[0].df)