Rename WIP parser "network", actual Hybrid to come

pull/153/head
Frh 2020-05-02 16:14:03 -07:00
parent c7ab3a4c32
commit 6711f877bf
18 changed files with 120 additions and 118 deletions

View File

@ -313,7 +313,7 @@ def stream(c, *args, **kwargs):
tables.export(output, f=f, compress=compress) tables.export(output, f=f, compress=compress)
@cli.command("hybrid") @cli.command("network")
@click.option( @click.option(
"-R", "-R",
"--table_regions", "--table_regions",
@ -365,7 +365,7 @@ def stream(c, *args, **kwargs):
) )
@click.argument("filepath", type=click.Path(exists=True)) @click.argument("filepath", type=click.Path(exists=True))
@pass_config @pass_config
def hybrid(c, *args, **kwargs): def network(c, *args, **kwargs):
"""Use spaces between text to parse the table.""" """Use spaces between text to parse the table."""
conf = c.config conf = c.config
pages = conf.pop("pages") pages = conf.pop("pages")
@ -396,7 +396,7 @@ def hybrid(c, *args, **kwargs):
"Please specify output file format using --format") "Please specify output file format using --format")
tables = read_pdf( tables = read_pdf(
filepath, pages=pages, flavor="hybrid", suppress_stdout=quiet, **kwargs filepath, pages=pages, flavor="network", suppress_stdout=quiet, **kwargs
) )
click.echo("Found {} tables".format(tables.n)) click.echo("Found {} tables".format(tables.n))
if plot_type is not None: if plot_type is not None:

View File

@ -7,7 +7,7 @@ import logging
from PyPDF2 import PdfFileReader, PdfFileWriter from PyPDF2 import PdfFileReader, PdfFileWriter
from .core import TableList from .core import TableList
from .parsers import Stream, Lattice, Hybrid from .parsers import Stream, Lattice, Network
from .utils import ( from .utils import (
build_file_path_in_temp_dir, build_file_path_in_temp_dir,
get_page_layout, get_page_layout,
@ -22,7 +22,7 @@ logger = logging.getLogger("camelot")
PARSERS = { PARSERS = {
"lattice": Lattice, "lattice": Lattice,
"stream": Stream, "stream": Stream,
"hybrid": Hybrid, "network": Network,
} }
@ -177,7 +177,7 @@ class PDFHandler():
Parameters Parameters
---------- ----------
flavor : str (default: 'lattice') flavor : str (default: 'lattice')
The parsing method to use ('lattice', 'stream', or 'hybrid'). The parsing method to use ('lattice', 'stream', or 'network').
Lattice is used by default. Lattice is used by default.
suppress_stdout : str (default: False) suppress_stdout : str (default: False)
Suppress logs and warnings. Suppress logs and warnings.

View File

@ -99,10 +99,10 @@ def read_pdf(
""" """
layout_kwargs = layout_kwargs or {} layout_kwargs = layout_kwargs or {}
if flavor not in ["lattice", "stream", "hybrid"]: if flavor not in ["lattice", "stream", "network"]:
raise NotImplementedError( raise NotImplementedError(
"Unknown flavor specified." "Unknown flavor specified."
" Use either 'lattice', 'stream', or 'hybrid'" " Use either 'lattice', 'stream', or 'network'"
) )
with warnings.catch_warnings(): with warnings.catch_warnings():

View File

@ -2,4 +2,4 @@
from .stream import Stream from .stream import Stream
from .lattice import Lattice from .lattice import Lattice
from .hybrid import Hybrid from .network import Network

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Implementation of hybrid table parser.""" """Implementation of network table parser."""
from __future__ import division from __future__ import division
@ -391,7 +391,7 @@ class TextNetworks(TextAlignments):
return gaps_hv return gaps_hv
def search_table_body(self, gaps_hv, parse_details=None): def search_table_body(self, gaps_hv, parse_details=None):
""" Build a candidate bbox for the body of a table using hybrid algo """ Build a candidate bbox for the body of a table using network algo
Seed the process with the textline with the highest alignment Seed the process with the textline with the highest alignment
score, then expand the bbox with textlines within threshold. score, then expand the bbox with textlines within threshold.
@ -496,7 +496,7 @@ class TextNetworks(TextAlignments):
if textline in new_tls: if textline in new_tls:
del tls_search_space[i] del tls_search_space[i]
if len(tls_in_bbox) > MINIMUM_TEXTLINES_IN_TABLE: if len(tls_in_bbox) >= MINIMUM_TEXTLINES_IN_TABLE:
return bbox return bbox
return None return None
@ -508,8 +508,8 @@ class TextNetworks(TextAlignments):
self._compute_alignment_counts() self._compute_alignment_counts()
class Hybrid(TextBaseParser): class Network(TextBaseParser):
"""Hybrid method of parsing looks for spaces between text """Network method of parsing looks for spaces between text
to parse the table. to parse the table.
If you want to specify columns when specifying multiple table If you want to specify columns when specifying multiple table
@ -561,7 +561,7 @@ class Hybrid(TextBaseParser):
debug=False, debug=False,
**kwargs): **kwargs):
super().__init__( super().__init__(
"hybrid", "network",
table_regions=table_regions, table_regions=table_regions,
table_areas=table_areas, table_areas=table_areas,
columns=columns, columns=columns,

View File

@ -203,7 +203,7 @@ class PlotMethods():
raise NotImplementedError( raise NotImplementedError(
"Lattice flavor does not support kind='{}'".format(kind) "Lattice flavor does not support kind='{}'".format(kind)
) )
if table.flavor in ["stream", "hybrid"] and kind in ["line"]: if table.flavor in ["stream", "network"] and kind in ["line"]:
raise NotImplementedError( raise NotImplementedError(
"Stream flavor does not support kind='{}'".format(kind) "Stream flavor does not support kind='{}'".format(kind)
) )
@ -313,7 +313,7 @@ class PlotMethods():
ax = prepare_plot(table, ax) ax = prepare_plot(table, ax)
draw_text(table, ax) draw_text(table, ax)
if table.flavor == "hybrid": if table.flavor == "network":
for network in table.parse_details["network_searches"]: for network in table.parse_details["network_searches"]:
most_connected_tl = network.most_connected_textline() most_connected_tl = network.most_connected_textline()
@ -428,8 +428,8 @@ class PlotMethods():
return ax.get_figure() return ax.get_figure()
@staticmethod @staticmethod
def hybrid_table_search(table, ax=None): def network_table_search(table, ax=None):
"""Generates a plot illustrating the steps of the hybrid table search. """Generates a plot illustrating the steps of the network table search.
Parameters Parameters
---------- ----------

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -824,8 +824,8 @@ data_stream_table_rotated = [
] ]
# The streaming algorithm incorrectly includes a header in the result. # The streaming algorithm incorrectly includes a header in the result.
# Trimming the table for the test of hybrid, which doesn't include it. # Trimming the table for the test of network, which doesn't include it.
data_hybrid_table_rotated = data_stream_table_rotated[1:] data_network_table_rotated = data_stream_table_rotated[1:]
data_stream_two_tables_1 = [ data_stream_two_tables_1 = [
[ [
@ -1298,8 +1298,8 @@ data_stream_two_tables_1 = [
] ]
# The streaming algorithm incorrectly includes a header and a footer. # The streaming algorithm incorrectly includes a header and a footer.
# Trimming the table for the test of hybrid, which doesn't include it. # Trimming the table for the test of network, which doesn't include it.
data_hybrid_two_tables_1 = data_stream_two_tables_1[3:-1] data_network_two_tables_1 = data_stream_two_tables_1[3:-1]
data_stream_two_tables_2 = [ data_stream_two_tables_2 = [
["Table 325. Arrests by Race: 2009", "", "", "", "", ""], ["Table 325. Arrests by Race: 2009", "", "", "", "", ""],
@ -1609,7 +1609,7 @@ data_stream_two_tables_2 = [
["1 Except forcible rape and prostitution.", "", "", "", "", ""], ["1 Except forcible rape and prostitution.", "", "", "", "", ""],
] ]
data_hybrid_two_tables_b_1 = [ data_network_two_tables_b_1 = [
["1", "Ghfhbdhj", "1", "Hgfdhgjsdhjdsf"], ["1", "Ghfhbdhj", "1", "Hgfdhgjsdhjdsf"],
["Vgvhgh", "Hj", "Hj", "Hj"], ["Vgvhgh", "Hj", "Hj", "Hj"],
["Hj", "Hj", "Hj", "Hj"], ["Hj", "Hj", "Hj", "Hj"],
@ -1619,17 +1619,17 @@ data_hybrid_two_tables_b_1 = [
["Hjdhshj", "Hjhjhh", "Ddnj", "dsxv"], ["Hjdhshj", "Hjhjhh", "Ddnj", "dsxv"],
] ]
data_hybrid_two_tables_b_2 = [ data_network_two_tables_b_2 = [
["Trtrt", "H", "Gh"], ["Trtrt", "H", "Gh"],
["Gh", "V", "Hv"], ["Gh", "V", "Hv"],
["Hv", "Bhjb", "hg"], ["Hv", "Bhjb", "hg"],
] ]
# The streaming algorithm incorrectly includes a header and a footer. # The streaming algorithm incorrectly includes a header and a footer.
# Trimming the table for the test of hybrid, which doesn't include it. # Trimming the table for the test of network, which doesn't include it.
data_hybrid_two_tables_2 = data_stream_two_tables_2[3:-1] data_network_two_tables_2 = data_stream_two_tables_2[3:-1]
data_hybrid_vertical_headers = [ data_network_vertical_headers = [
[ [
"", "",
"", "",
@ -2090,8 +2090,8 @@ data_stream_table_areas = [
["(each day of the payroll period)", ""], ["(each day of the payroll period)", ""],
] ]
# Hybrid doesn't recognize the footer as belonging to the table. # Network doesn't recognize the footer as belonging to the table.
data_hybrid_table_regions = data_stream_table_areas[:-1] data_network_table_regions = data_stream_table_areas[:-1]
data_stream_columns = [ data_stream_columns = [
[ [
@ -2613,9 +2613,9 @@ data_stream_split_text = [
] ]
# The stream algorithm excludes the string "Alphabetic Listing by type" # The stream algorithm excludes the string "Alphabetic Listing by type"
data_hybrid_split_text = [] data_network_split_text = []
data_hybrid_split_text.extend(data_stream_split_text) data_network_split_text.extend(data_stream_split_text)
data_hybrid_split_text[0] = [ data_network_split_text[0] = [
'FEB', 'RUAR', 'Y 2014 M27 (BUS)', '', 'FEB', 'RUAR', 'Y 2014 M27 (BUS)', '',
'ALPHABETIC LISTING BY T', 'YPE', '', '', '', 'ABLPDM27' 'ALPHABETIC LISTING BY T', 'YPE', '', '', '', 'ABLPDM27'
] ]
@ -2851,15 +2851,15 @@ data_stream_flag_size = [
], ],
] ]
# Hybrid adds more content into the header. # Network adds more content into the header.
data_hybrid_flag_size = [ data_network_flag_size = [
['', '', '', '', '(As at end-March)', '', '', '', '', '', ''], ['', '', '', '', '(As at end-March)', '', '', '', '', '', ''],
['', '', '', '', '', '', '', '', '', '', '(` Billion)'] ['', '', '', '', '', '', '', '', '', '', '(` Billion)']
] ]
data_hybrid_flag_size.extend(data_stream_flag_size) data_network_flag_size.extend(data_stream_flag_size)
data_hybrid_strip_text = [ data_network_strip_text = [
["VinsauVerre", ""], ["VinsauVerre", ""],
["LesBlancs", "12.5CL"], ["LesBlancs", "12.5CL"],
["A.O.PCôtesduRhône", ""], ["A.O.PCôtesduRhône", ""],
@ -2900,7 +2900,7 @@ data_hybrid_strip_text = [
] ]
# Stream only detects part of the table # Stream only detects part of the table
data_stream_strip_text = data_hybrid_strip_text[0:-13] data_stream_strip_text = data_network_strip_text[0:-13]
data_stream_edge_tol = [ data_stream_edge_tol = [
["Key figures", ""], ["Key figures", ""],
@ -2940,9 +2940,9 @@ data_stream_edge_tol = [
["period.", ""], ["period.", ""],
] ]
# The stream algorithm ends up including a footer, which hybrid correctly # The stream algorithm ends up including a footer, which network correctly
# skips. # skips.
data_hybrid_edge_tol = data_stream_edge_tol[:-3] data_network_edge_tol = data_stream_edge_tol[:-3]
data_lattice = [ data_lattice = [
[ [

View File

Before

Width:  |  Height:  |  Size: 103 KiB

After

Width:  |  Height:  |  Size: 103 KiB

View File

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 48 KiB

View File

Before

Width:  |  Height:  |  Size: 88 KiB

After

Width:  |  Height:  |  Size: 88 KiB

View File

Before

Width:  |  Height:  |  Size: 102 KiB

After

Width:  |  Height:  |  Size: 102 KiB

View File

@ -72,22 +72,22 @@ def test_cli_stream():
assert format_error in result.output assert format_error in result.output
def test_cli_hybrid(): def test_cli_network():
with TemporaryDirectory() as tempdir: with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, "budget.pdf") infile = os.path.join(testdir, "budget.pdf")
outfile = os.path.join(tempdir, "budget.csv") outfile = os.path.join(tempdir, "budget.csv")
runner = CliRunner() runner = CliRunner()
result = runner.invoke( result = runner.invoke(
cli, ["--format", "csv", "--output", outfile, "hybrid", infile] cli, ["--format", "csv", "--output", outfile, "network", infile]
) )
assert result.exit_code == 0 assert result.exit_code == 0
assert result.output == "Found 1 tables\n" assert result.output == "Found 1 tables\n"
result = runner.invoke(cli, ["--format", "csv", "hybrid", infile]) result = runner.invoke(cli, ["--format", "csv", "network", infile])
output_error = "Error: Please specify output file path using --output" output_error = "Error: Please specify output file path using --output"
assert output_error in result.output assert output_error in result.output
result = runner.invoke(cli, ["--output", outfile, "hybrid", infile]) result = runner.invoke(cli, ["--output", outfile, "network", infile])
format_error = "Please specify output file format using --format" format_error = "Please specify output file format using --format"
assert format_error in result.output assert format_error in result.output

View File

@ -150,32 +150,32 @@ def test_stream_layout_kwargs():
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid(): def test_network():
df = pd.DataFrame(data_stream) df = pd.DataFrame(data_stream)
filename = os.path.join(testdir, "health.pdf") filename = os.path.join(testdir, "health.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_table_rotated(): def test_network_table_rotated():
df = pd.DataFrame(data_hybrid_table_rotated) df = pd.DataFrame(data_network_table_rotated)
filename = os.path.join(testdir, "clockwise_table_2.pdf") filename = os.path.join(testdir, "clockwise_table_2.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
filename = os.path.join(testdir, "anticlockwise_table_2.pdf") filename = os.path.join(testdir, "anticlockwise_table_2.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_two_tables_a(): def test_network_two_tables_a():
df1 = pd.DataFrame(data_hybrid_two_tables_1) df1 = pd.DataFrame(data_network_two_tables_1)
df2 = pd.DataFrame(data_hybrid_two_tables_2) df2 = pd.DataFrame(data_network_two_tables_2)
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
assert len(tables) == 2 assert len(tables) == 2
assert df1.equals(tables[0].df) assert df1.equals(tables[0].df)
@ -183,104 +183,104 @@ def test_hybrid_two_tables_a():
# Reported as https://github.com/camelot-dev/camelot/issues/132 # Reported as https://github.com/camelot-dev/camelot/issues/132
def test_hybrid_two_tables_b(): def test_network_two_tables_b():
df1 = pd.DataFrame(data_hybrid_two_tables_b_1) df1 = pd.DataFrame(data_network_two_tables_b_1)
df2 = pd.DataFrame(data_hybrid_two_tables_b_2) df2 = pd.DataFrame(data_network_two_tables_b_2)
filename = os.path.join(testdir, "camelot-issue-132-multiple-tables.pdf") filename = os.path.join(testdir, "camelot-issue-132-multiple-tables.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
assert len(tables) == 2 assert len(tables) == 2
assert df1.equals(tables[0].df) assert df1.equals(tables[0].df)
assert df2.equals(tables[1].df) assert df2.equals(tables[1].df)
def test_hybrid_vertical_header(): def test_network_vertical_header():
"""Tests a complex table with a vertically text header. """Tests a complex table with a vertically text header.
""" """
df = pd.DataFrame(data_hybrid_vertical_headers) df = pd.DataFrame(data_network_vertical_headers)
filename = os.path.join(testdir, "vertical_header.pdf") filename = os.path.join(testdir, "vertical_header.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
assert len(tables) == 1 assert len(tables) == 1
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_table_regions(): def test_network_table_regions():
df = pd.DataFrame(data_hybrid_table_regions) df = pd.DataFrame(data_network_table_regions)
filename = os.path.join(testdir, "tabula/us-007.pdf") filename = os.path.join(testdir, "tabula/us-007.pdf")
# The "stream" test looks for a region in ["320,460,573,335"], which # The "stream" test looks for a region in ["320,460,573,335"], which
# should exclude the header. # should exclude the header.
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, flavor="hybrid", table_regions=["320,335,573,505"] filename, flavor="network", table_regions=["320,335,573,505"]
) )
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_table_areas(): def test_network_table_areas():
df = pd.DataFrame(data_stream_table_areas) df = pd.DataFrame(data_stream_table_areas)
filename = os.path.join(testdir, "tabula/us-007.pdf") filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, flavor="hybrid", table_areas=["320,500,573,335"] filename, flavor="network", table_areas=["320,500,573,335"]
) )
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_columns(): def test_network_columns():
df = pd.DataFrame(data_stream_columns) df = pd.DataFrame(data_stream_columns)
filename = os.path.join(testdir, "mexican_towns.pdf") filename = os.path.join(testdir, "mexican_towns.pdf")
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, flavor="hybrid", columns=["67,180,230,425,475"], row_tol=10 filename, flavor="network", columns=["67,180,230,425,475"], row_tol=10
) )
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_split_text(): def test_network_split_text():
df = pd.DataFrame(data_hybrid_split_text) df = pd.DataFrame(data_network_split_text)
filename = os.path.join(testdir, "tabula/m27.pdf") filename = os.path.join(testdir, "tabula/m27.pdf")
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, filename,
flavor="hybrid", flavor="network",
columns=["72,95,209,327,442,529,566,606,683"], columns=["72,95,209,327,442,529,566,606,683"],
split_text=True, split_text=True,
) )
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_flag_size(): def test_network_flag_size():
df = pd.DataFrame(data_hybrid_flag_size) df = pd.DataFrame(data_network_flag_size)
filename = os.path.join(testdir, "superscript.pdf") filename = os.path.join(testdir, "superscript.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid", flag_size=True) tables = camelot.read_pdf(filename, flavor="network", flag_size=True)
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_strip_text(): def test_network_strip_text():
df = pd.DataFrame(data_hybrid_strip_text) df = pd.DataFrame(data_network_strip_text)
filename = os.path.join(testdir, "detect_vertical_false.pdf") filename = os.path.join(testdir, "detect_vertical_false.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid", strip_text=" ,\n") tables = camelot.read_pdf(filename, flavor="network", strip_text=" ,\n")
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_edge_tol(): def test_network_edge_tol():
df = pd.DataFrame(data_hybrid_edge_tol) df = pd.DataFrame(data_network_edge_tol)
filename = os.path.join(testdir, "edge_tol.pdf") filename = os.path.join(testdir, "edge_tol.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid", edge_tol=500) tables = camelot.read_pdf(filename, flavor="network", edge_tol=500)
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)
def test_hybrid_layout_kwargs(): def test_network_layout_kwargs():
df = pd.DataFrame(data_stream_layout_kwargs) df = pd.DataFrame(data_stream_layout_kwargs)
filename = os.path.join(testdir, "detect_vertical_false.pdf") filename = os.path.join(testdir, "detect_vertical_false.pdf")
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, flavor="hybrid", layout_kwargs={"detect_vertical": False} filename, flavor="network", layout_kwargs={"detect_vertical": False}
) )
assert_frame_equal(df, tables[0].df) assert_frame_equal(df, tables[0].df)

View File

@ -15,7 +15,7 @@ filename = os.path.join(testdir, 'foo.pdf')
def test_unknown_flavor(): def test_unknown_flavor():
message = ("Unknown flavor specified." message = ("Unknown flavor specified."
" Use either 'lattice', 'stream', or 'hybrid'") " Use either 'lattice', 'stream', or 'network'")
with pytest.raises(NotImplementedError, match=message): with pytest.raises(NotImplementedError, match=message):
camelot.read_pdf(filename, flavor='chocolate') camelot.read_pdf(filename, flavor='chocolate')

View File

@ -62,9 +62,9 @@ def test_stream_grid_plot():
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE) baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE)
def test_hybrid_grid_plot(): def test_network_grid_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="network")
return unit_test_stable_plot(tables[0], 'grid') return unit_test_stable_plot(tables[0], 'grid')
@ -86,9 +86,9 @@ def test_stream_contour_plot():
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE) baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE)
def test_hybrid_contour_plot(): def test_network_contour_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor='hybrid') tables = camelot.read_pdf(filename, flavor='network')
return unit_test_stable_plot(tables[0], 'contour') return unit_test_stable_plot(tables[0], 'contour')
@ -118,18 +118,18 @@ def test_stream_textedge_plot():
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE) baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE)
def test_hybrid_textedge_plot(): def test_network_textedge_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, debug=True, flavor='hybrid') tables = camelot.read_pdf(filename, debug=True, flavor='network')
return unit_test_stable_plot(tables[0], 'textedge') return unit_test_stable_plot(tables[0], 'textedge')
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE) baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE)
def test_hybrid_table_regions_textedge_plot(): def test_network_table_regions_textedge_plot():
filename = os.path.join(testdir, "tabula/us-007.pdf") filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, debug=True, flavor="hybrid", filename, debug=True, flavor="network",
table_regions=["320,505,573,330"] table_regions=["320,505,573,330"]
) )
return unit_test_stable_plot(tables[0], 'textedge') return unit_test_stable_plot(tables[0], 'textedge')
@ -137,10 +137,10 @@ def test_hybrid_table_regions_textedge_plot():
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE) baseline_dir="files/baseline_plots", remove_text=True, tolerance=TOLERANCE)
def test_hybrid_table_areas_text_plot(): def test_network_table_areas_text_plot():
filename = os.path.join(testdir, "tabula/us-007.pdf") filename = os.path.join(testdir, "tabula/us-007.pdf")
tables = camelot.read_pdf( tables = camelot.read_pdf(
filename, debug=True, flavor="hybrid", filename, debug=True, flavor="network",
table_areas=["320,500,573,335"] table_areas=["320,500,573,335"]
) )
return unit_test_stable_plot(tables[0], 'text') return unit_test_stable_plot(tables[0], 'text')