Fix plotting unit tests

Enforce order of textline plotting for unit test consistency in 3.6.
Create wrapper around camelot plot that enforces backwards consistency
with older versions of matplotlib.
pull/153/head
Frh 2020-04-30 16:54:37 -07:00
parent f3aded5b17
commit d663dd18fd
8 changed files with 30 additions and 37 deletions

1
.gitignore vendored
View File

@ -7,6 +7,7 @@ dist/
prof/ prof/
*.egg-info/ *.egg-info/
.eggs/ .eggs/
.tox/
.coverage .coverage
coverage.xml coverage.xml

View File

@ -333,7 +333,7 @@ class TextNetworks(TextAlignments):
key=lambda textline: key=lambda textline:
( (
self._textline_to_alignments[textline].alignment_score(), self._textline_to_alignments[textline].alignment_score(),
-textline.y0 -textline.y0, -textline.x0
), ),
default=None default=None
) )

View File

@ -326,7 +326,11 @@ class PlotMethods():
alpha=0.5 alpha=0.5
) )
) )
for tl, alignments in network._textline_to_alignments.items(): for tl in sorted(
network._textline_to_alignments.keys(),
key=lambda textline: (-textline.y0, textline.x0)
):
alignments = network._textline_to_alignments[tl]
coords = get_textline_coords(tl) coords = get_textline_coords(tl)
alignment_id_h, tls_h = alignments.max_v() alignment_id_h, tls_h = alignments.max_v()
alignment_id_v, tls_v = alignments.max_h() alignment_id_v, tls_v = alignments.max_h()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 88 KiB

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 101 KiB

After

Width:  |  Height:  |  Size: 102 KiB

View File

@ -3,7 +3,6 @@
import os import os
import pytest import pytest
import matplotlib import matplotlib
import camelot import camelot
@ -16,7 +15,7 @@ import camelot
# To force upgrade: # To force upgrade:
# pip install --upgrade --force-reinstall matplotlib # pip install --upgrade --force-reinstall matplotlib
# To force usage of a Python 3.6 compatible version: # To force usage of a Python 3.6 compatible version:
# pip install "matplotlib==2.2.5" # pip install "matplotlib==3.0.3"
# This condition can be removed in favor of a version requirement bump for # This condition can be removed in favor of a version requirement bump for
# matplotlib once support for Python 3.5 is dropped. # matplotlib once support for Python 3.5 is dropped.
@ -26,43 +25,43 @@ testdir = os.path.dirname(os.path.abspath(__file__))
testdir = os.path.join(testdir, "files") testdir = os.path.join(testdir, "files")
@pytest.mark.skipif(LEGACY_MATPLOTLIB, def unit_test_stable_plot(table, kind):
reason="depends on a recent version of MatPlotLib") if not LEGACY_MATPLOTLIB:
# See https://matplotlib.org/3.2.1/users/whats_new.html#kerning-adjustments-now-use-correct-values # noqa
matplotlib.rcParams["text.kerning_factor"] = 6
return camelot.plot(table, kind=kind)
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_text_plot(): def test_text_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='text') return unit_test_stable_plot(tables[0], 'text')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_grid_plot(): def test_grid_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='grid') return unit_test_stable_plot(tables[0], 'grid')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_stream_grid_plot(): def test_stream_grid_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename, flavor="stream") tables = camelot.read_pdf(filename, flavor="stream")
return camelot.plot(tables[0], kind='grid') return unit_test_stable_plot(tables[0], 'grid')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_hybrid_grid_plot(): def test_hybrid_grid_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename, flavor="hybrid") tables = camelot.read_pdf(filename, flavor="hybrid")
return camelot.plot(tables[0], kind='grid') return unit_test_stable_plot(tables[0], 'grid')
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
@ -70,67 +69,55 @@ def test_hybrid_grid_plot():
def test_lattice_contour_plot(): def test_lattice_contour_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='contour') return unit_test_stable_plot(tables[0], 'contour')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_stream_contour_plot(): def test_stream_contour_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor='stream') tables = camelot.read_pdf(filename, flavor='stream')
return camelot.plot(tables[0], kind='contour') return unit_test_stable_plot(tables[0], 'contour')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_hybrid_contour_plot(): def test_hybrid_contour_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor='hybrid') tables = camelot.read_pdf(filename, flavor='hybrid')
return camelot.plot(tables[0], kind='contour') return unit_test_stable_plot(tables[0], 'contour')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_line_plot(): def test_line_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='line') return unit_test_stable_plot(tables[0], 'line')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_joint_plot(): def test_joint_plot():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
return camelot.plot(tables[0], kind='joint') return unit_test_stable_plot(tables[0], 'joint')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_stream_textedge_plot(): def test_stream_textedge_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, flavor='stream') tables = camelot.read_pdf(filename, flavor='stream')
return camelot.plot(tables[0], kind='textedge') return unit_test_stable_plot(tables[0], 'textedge')
@pytest.mark.skipif(LEGACY_MATPLOTLIB,
reason="depends on a recent version of MatPlotLib")
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
def test_hybrid_textedge_plot(): def test_hybrid_textedge_plot():
filename = os.path.join(testdir, "tabula/12s0324.pdf") filename = os.path.join(testdir, "tabula/12s0324.pdf")
tables = camelot.read_pdf(filename, debug=True, flavor='hybrid') tables = camelot.read_pdf(filename, debug=True, flavor='hybrid')
return camelot.plot(tables[0], kind='textedge') return unit_test_stable_plot(tables[0], 'textedge')
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
@ -141,7 +128,8 @@ def test_hybrid_table_regions_textedge_plot():
filename, debug=True, flavor="hybrid", filename, debug=True, flavor="hybrid",
table_regions=["320,505,573,330"] table_regions=["320,505,573,330"]
) )
return camelot.plot(tables[0], kind='textedge') return unit_test_stable_plot(tables[0], 'textedge')
@pytest.mark.mpl_image_compare( @pytest.mark.mpl_image_compare(
baseline_dir="files/baseline_plots", remove_text=True) baseline_dir="files/baseline_plots", remove_text=True)
@ -151,4 +139,4 @@ def test_hybrid_table_areas_text_plot():
filename, debug=True, flavor="hybrid", filename, debug=True, flavor="hybrid",
table_areas=["320,500,573,335"] table_areas=["320,500,573,335"]
) )
return camelot.plot(tables[0], kind='text') return unit_test_stable_plot(tables[0], 'text')