From 8abe02528b4f0daaaeb9e36df2ca6d24afde33d4 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sun, 11 Jul 2021 17:25:56 +0530 Subject: [PATCH 1/4] Make ghostscript default backend and add support for string keywords --- camelot/backends/ghostscript_backend.py | 4 +- camelot/backends/image_conversion.py | 10 +-- camelot/backends/poppler_backend.py | 2 +- camelot/parsers/lattice.py | 29 +++++- docs/user/advanced.rst | 7 +- tests/test_errors.py | 115 +++++++++++++----------- tests/test_image_conversion_backend.py | 41 +++++---- tests/test_plotting.py | 19 ++-- 8 files changed, 135 insertions(+), 92 deletions(-) diff --git a/camelot/backends/ghostscript_backend.py b/camelot/backends/ghostscript_backend.py index 5e93cdb..1de7da1 100644 --- a/camelot/backends/ghostscript_backend.py +++ b/camelot/backends/ghostscript_backend.py @@ -29,8 +29,8 @@ class GhostscriptBackend(object): def convert(self, pdf_path, png_path, resolution=300): if not self.installed(): raise OSError( - "Ghostscript is not installed. Please install it using the instructions" - "here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html" + "Ghostscript is not installed. You can install it using the instructions" + " here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html" ) import ghostscript diff --git a/camelot/backends/image_conversion.py b/camelot/backends/image_conversion.py index a9b6004..7d2c4d7 100644 --- a/camelot/backends/image_conversion.py +++ b/camelot/backends/image_conversion.py @@ -3,21 +3,21 @@ from .poppler_backend import PopplerBackend from .ghostscript_backend import GhostscriptBackend -backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend} +BACKENDS = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend} class ImageConversionBackend(object): def __init__(self, backend="poppler", use_fallback=True): - if backend not in backends.keys(): + if backend not in BACKENDS.keys(): raise ValueError(f"Image conversion backend '{backend}' not supported") self.backend = backend self.use_fallback = use_fallback - self.fallbacks = list(filter(lambda x: x != backend, backends.keys())) + self.fallbacks = list(filter(lambda x: x != backend, BACKENDS.keys())) def convert(self, pdf_path, png_path): try: - converter = backends[self.backend]() + converter = BACKENDS[self.backend]() converter.convert(pdf_path, png_path) except Exception as e: import sys @@ -25,7 +25,7 @@ class ImageConversionBackend(object): if self.use_fallback: for fallback in self.fallbacks: try: - converter = backends[fallback]() + converter = BACKENDS[fallback]() converter.convert(pdf_path, png_path) except Exception as e: raise type(e)( diff --git a/camelot/backends/poppler_backend.py b/camelot/backends/poppler_backend.py index ab12bcf..4103372 100644 --- a/camelot/backends/poppler_backend.py +++ b/camelot/backends/poppler_backend.py @@ -9,7 +9,7 @@ class PopplerBackend(object): pdftopng_executable = shutil.which("pdftopng") if pdftopng_executable is None: raise OSError( - "pdftopng is not installed. Please install it using the `pip install pdftopng` command." + "pdftopng is not installed. You can install it using the 'pip install pdftopng' command." ) pdftopng_command = [pdftopng_executable, pdf_path, png_path] diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 02ef794..2fbd195 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -28,7 +28,7 @@ from ..image_processing import ( find_contours, find_joints, ) -from ..backends import ImageConversionBackend +from ..backends.image_conversion import BACKENDS logger = logging.getLogger("camelot") @@ -111,7 +111,7 @@ class Lattice(BaseParser): threshold_constant=-2, iterations=0, resolution=300, - backend=ImageConversionBackend(), + backend="ghostscript", **kwargs, ): self.table_regions = table_regions @@ -129,7 +129,30 @@ class Lattice(BaseParser): self.threshold_constant = threshold_constant self.iterations = iterations self.resolution = resolution - self.backend = backend + self.backend = Lattice._get_backend(backend) + + @staticmethod + def _get_backend(backend): + def implements_convert(): + methods = [ + method for method in dir(backend) if method.startswith("__") is False + ] + return "convert" in methods + + if isinstance(backend, str): + if backend in BACKENDS.keys(): + return BACKENDS[backend]() + else: + raise NotImplementedError( + f"Unknown backend '{backend}' specified. Please use either 'poppler' or 'ghostscript'." + ) + else: + if not implements_convert(): + raise NotImplementedError( + f"'{backend}' must implement a 'convert' method" + ) + + return backend @staticmethod def _reduce_index(t, idx, shift_text): diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index 09bfc4e..6a551d9 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -644,8 +644,5 @@ When using the :ref:`Lattice ` flavor, Camelot uses `pdftopng >> from camelot.backends.poppler_backend import PopplerBackend - >>> from camelot.backends.ghostscript_backend import GhostscriptBackend - >>> - >>> tables = camelot.read_pdf(filename, backend=PopplerBackend()) - >>> tables = camelot.read_pdf(filename, backend=GhostscriptBackend()) + >>> tables = camelot.read_pdf(filename, backend="poppler") + >>> tables = camelot.read_pdf(filename, backend="ghostscript") diff --git a/tests/test_errors.py b/tests/test_errors.py index 595c54b..f488aee 100755 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -32,56 +32,6 @@ def test_unsupported_format(): tables = camelot.read_pdf(filename) -def test_stream_equal_length(): - message = "Length of table_areas and columns" " should be equal" - with pytest.raises(ValueError, match=message): - tables = camelot.read_pdf( - filename, - flavor="stream", - table_areas=["10,20,30,40"], - columns=["10,20,30,40", "10,20,30,40"], - ) - - -def test_image_warning(): - filename = os.path.join(testdir, "image.pdf") - with warnings.catch_warnings(): - warnings.simplefilter("error") - with pytest.raises(UserWarning) as e: - tables = camelot.read_pdf(filename) - assert ( - str(e.value) - == "page-1 is image-based, camelot only works on text-based pages." - ) - - -def test_lattice_no_tables_on_page(): - filename = os.path.join(testdir, "empty.pdf") - with warnings.catch_warnings(): - warnings.simplefilter("error") - with pytest.raises(UserWarning) as e: - tables = camelot.read_pdf(filename, flavor="lattice") - assert str(e.value) == "No tables found on page-1" - - -def test_stream_no_tables_on_page(): - filename = os.path.join(testdir, "empty.pdf") - with warnings.catch_warnings(): - warnings.simplefilter("error") - with pytest.raises(UserWarning) as e: - tables = camelot.read_pdf(filename, flavor="stream") - assert str(e.value) == "No tables found on page-1" - - -def test_stream_no_tables_in_area(): - filename = os.path.join(testdir, "only_page_number.pdf") - with warnings.catch_warnings(): - warnings.simplefilter("error") - with pytest.raises(UserWarning) as e: - tables = camelot.read_pdf(filename, flavor="stream") - assert str(e.value) == "No tables found in table area 1" - - def test_no_tables_found_logs_suppressed(): filename = os.path.join(testdir, "foo.pdf") with warnings.catch_warnings(): @@ -118,3 +68,68 @@ def test_bad_password(): message = "file has not been decrypted" with pytest.raises(Exception, match=message): tables = camelot.read_pdf(filename, password="wrongpass") + + +def test_stream_equal_length(): + message = "Length of table_areas and columns" " should be equal" + with pytest.raises(ValueError, match=message): + tables = camelot.read_pdf( + filename, + flavor="stream", + table_areas=["10,20,30,40"], + columns=["10,20,30,40", "10,20,30,40"], + ) + + +def test_image_warning(): + filename = os.path.join(testdir, "image.pdf") + with warnings.catch_warnings(): + warnings.simplefilter("error") + with pytest.raises(UserWarning) as e: + tables = camelot.read_pdf(filename) + assert ( + str(e.value) + == "page-1 is image-based, camelot only works on text-based pages." + ) + + +def test_stream_no_tables_on_page(): + filename = os.path.join(testdir, "empty.pdf") + with warnings.catch_warnings(): + warnings.simplefilter("error") + with pytest.raises(UserWarning) as e: + tables = camelot.read_pdf(filename, flavor="stream") + assert str(e.value) == "No tables found on page-1" + + +def test_stream_no_tables_in_area(): + filename = os.path.join(testdir, "only_page_number.pdf") + with warnings.catch_warnings(): + warnings.simplefilter("error") + with pytest.raises(UserWarning) as e: + tables = camelot.read_pdf(filename, flavor="stream") + assert str(e.value) == "No tables found in table area 1" + + +def test_lattice_no_tables_on_page(): + filename = os.path.join(testdir, "empty.pdf") + with warnings.catch_warnings(): + warnings.simplefilter("error") + with pytest.raises(UserWarning) as e: + tables = camelot.read_pdf(filename, flavor="lattice") + assert str(e.value) == "No tables found on page-1" + + +def test_lattice_unknown_backend(): + message = "Unknown backend 'mupdf' specified. Please use either 'poppler' or 'ghostscript'." + with pytest.raises(NotImplementedError, match=message): + tables = camelot.read_pdf(filename, backend="mupdf") + + +def test_lattice_no_convert_method(): + class ConversionBackend(object): + pass + + message = "must implement a 'convert' method" + with pytest.raises(NotImplementedError, match=message): + tables = camelot.read_pdf(filename, backend=ConversionBackend()) diff --git a/tests/test_image_conversion_backend.py b/tests/test_image_conversion_backend.py index 8074cac..39f56e6 100644 --- a/tests/test_image_conversion_backend.py +++ b/tests/test_image_conversion_backend.py @@ -4,18 +4,16 @@ import pytest import camelot.backends.image_conversion from camelot.backends import ImageConversionBackend -from camelot.backends.poppler_backend import PopplerBackend -from camelot.backends.ghostscript_backend import GhostscriptBackend class PopplerBackendError(object): def convert(self, pdf_path, png_path): - raise ValueError('conversion failed') + raise ValueError("Image conversion failed") class GhostscriptBackendError(object): def convert(self, pdf_path, png_path): - raise ValueError('conversion failed') + raise ValueError("Image conversion failed") class GhostscriptBackendNoError(object): @@ -24,26 +22,39 @@ class GhostscriptBackendNoError(object): def test_poppler_backend_error_when_no_use_fallback(monkeypatch): - backends = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendNoError} - monkeypatch.setattr("camelot.backends.image_conversion.backends", backends, raising=True) + BACKENDS = { + "poppler": PopplerBackendError, + "ghostscript": GhostscriptBackendNoError, + } + monkeypatch.setattr( + "camelot.backends.image_conversion.BACKENDS", BACKENDS, raising=True + ) backend = ImageConversionBackend(use_fallback=False) - message = "conversion failed with image conversion backend 'poppler'" + message = "Image conversion failed with image conversion backend 'poppler'" with pytest.raises(ValueError, match=message): - backend.convert('foo', 'bar') + backend.convert("foo", "bar") + def test_ghostscript_backend_when_use_fallback(monkeypatch): - backends = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendNoError} - monkeypatch.setattr("camelot.backends.image_conversion.backends", backends, raising=True) + BACKENDS = { + "poppler": PopplerBackendError, + "ghostscript": GhostscriptBackendNoError, + } + monkeypatch.setattr( + "camelot.backends.image_conversion.BACKENDS", BACKENDS, raising=True + ) backend = ImageConversionBackend() - backend.convert('foo', 'bar') + backend.convert("foo", "bar") def test_ghostscript_backend_error_when_use_fallback(monkeypatch): - backends = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendError} - monkeypatch.setattr("camelot.backends.image_conversion.backends", backends, raising=True) + BACKENDS = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendError} + monkeypatch.setattr( + "camelot.backends.image_conversion.BACKENDS", BACKENDS, raising=True + ) backend = ImageConversionBackend() - message = "conversion failed with image conversion backend 'ghostscript'" + message = "Image conversion failed with image conversion backend 'ghostscript'" with pytest.raises(ValueError, match=message): - backend.convert('foo', 'bar') + backend.convert("foo", "bar") diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 22d9f6f..1ef178f 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -6,9 +6,6 @@ import sys import pytest import camelot -from camelot.backends.poppler_backend import PopplerBackend -from camelot.backends.ghostscript_backend import GhostscriptBackend - testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") @@ -31,7 +28,7 @@ def test_textedge_plot(): @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_lattice_contour_plot_poppler(): filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=PopplerBackend()) + tables = camelot.read_pdf(filename, backend="poppler") return camelot.plot(tables[0], kind="contour") @@ -41,7 +38,7 @@ def test_lattice_contour_plot_ghostscript(): pytest.skip("Skipping ghostscript test on Windows") filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=GhostscriptBackend()) + tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="contour") @@ -55,7 +52,7 @@ def test_stream_contour_plot(): @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_line_plot_poppler(): filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=PopplerBackend()) + tables = camelot.read_pdf(filename, backend="poppler") return camelot.plot(tables[0], kind="line") @@ -65,14 +62,14 @@ def test_line_plot_ghostscript(): pytest.skip("Skipping ghostscript test on Windows") filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=GhostscriptBackend()) + tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="line") @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_joint_plot_poppler(): filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=PopplerBackend()) + tables = camelot.read_pdf(filename, backend="poppler") return camelot.plot(tables[0], kind="joint") @@ -82,14 +79,14 @@ def test_joint_plot_ghostscript(): pytest.skip("Skipping ghostscript test on Windows") filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=GhostscriptBackend()) + tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="joint") @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_grid_plot_poppler(): filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=PopplerBackend()) + tables = camelot.read_pdf(filename, backend="poppler") return camelot.plot(tables[0], kind="grid") @@ -99,5 +96,5 @@ def test_grid_plot_ghostscript(): pytest.skip("Skipping ghostscript test on Windows") filename = os.path.join(testdir, "foo.pdf") - tables = camelot.read_pdf(filename, backend=GhostscriptBackend()) + tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="grid") From 793ddaf42ff8442fcefec3bea6b89e2d052d59df Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sun, 11 Jul 2021 17:28:24 +0530 Subject: [PATCH 2/4] Update HISTORY.md --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index f8ac1cf..9c0a5b9 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,7 +6,7 @@ master **Improvements** -- Add pdftopng for image conversion and use ghostscript as fallback. [#198](https://github.com/camelot-dev/camelot/pull/198) by Vinayak Mehta. +- Add support for multiple image conversion backends. [#198](https://github.com/camelot-dev/camelot/pull/198) and [#253](https://github.com/camelot-dev/camelot/pull/253) by Vinayak Mehta. - Add markdown export format. [#222](https://github.com/camelot-dev/camelot/pull/222/) by [Lucas Cimon](https://github.com/Lucas-C). **Documentation** From f160c1d44dfce67816b0f1467d9ffdc94e1de33d Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sun, 11 Jul 2021 18:55:56 +0530 Subject: [PATCH 3/4] Raise ghostscript deprecation warning and skip ghostscript tests on windows --- camelot/parsers/lattice.py | 6 ++++++ docs/user/advanced.rst | 16 ++++++++-------- tests/test_cli.py | 3 +++ tests/test_common.py | 15 ++++++--------- tests/test_errors.py | 16 ++++++++++++++++ tests/test_lattice.py | 11 +++++++++++ tests/test_plotting.py | 19 +++++++------------ 7 files changed, 57 insertions(+), 29 deletions(-) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 2fbd195..13d1d61 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -141,6 +141,12 @@ class Lattice(BaseParser): if isinstance(backend, str): if backend in BACKENDS.keys(): + if backend == "ghostscript": + raise DeprecationWarning( + "'ghostscript' will be replaced by 'poppler' as the default image conversion" + " backend in v0.12.0. You can try out 'poppler' with backend='poppler'." + ) + return BACKENDS[backend]() else: raise NotImplementedError( diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index 6a551d9..c590058 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -629,7 +629,14 @@ To deal with such cases, you can tweak PDFMiner's `LAParams kwargs ` flavor, Camelot uses `pdftopng `_ to convert PDF pages to images for line recognition. This should work out of the box on most operating systems. However, if you get an error, you can supply your own image conversion backend to Camelot:: +When using the :ref:`Lattice ` flavor, Camelot uses ``ghostscript`` to convert PDF pages to images for line recognition. If you face installation issues with ``ghostscript``, you can use an alternate image conversion backend called ``poppler``. You can specify which image conversion backend you want to use with:: + + >>> tables = camelot.read_pdf(filename, backend="ghostscript") # default + >>> tables = camelot.read_pdf(filename, backend="poppler") + +.. note:: ``poppler`` will be made the default image conversion backend (replacing ``ghostscript``) with ``v0.12.0``. + +If you face issues with both ``ghostscript`` and ``poppler``, you can supply your own image conversion backend:: >>> class ConversionBackend(object): >>> def convert(pdf_path, png_path): @@ -639,10 +646,3 @@ When using the :ref:`Lattice ` flavor, Camelot uses `pdftopng >> pass >>> >>> tables = camelot.read_pdf(filename, backend=ConversionBackend()) - -.. note:: If image conversion using ``pdftopng`` fails, Camelot falls back to ``ghostscript`` to try image conversion again, and if that fails, it raises an error. - -In case you want to be explicit about the image conversion backend that Camelot should use, you can supply them like this:: - - >>> tables = camelot.read_pdf(filename, backend="poppler") - >>> tables = camelot.read_pdf(filename, backend="ghostscript") diff --git a/tests/test_cli.py b/tests/test_cli.py index a496be6..5ae7980 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -11,6 +11,8 @@ from camelot.utils import TemporaryDirectory testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") +skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) + def test_help_output(): runner = CliRunner() @@ -26,6 +28,7 @@ def test_help_output(): ) +@skip_on_windows def test_cli_lattice(): with TemporaryDirectory() as tempdir: infile = os.path.join(testdir, "foo.pdf") diff --git a/tests/test_common.py b/tests/test_common.py index 86889d5..0f33ebb 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -16,6 +16,8 @@ from .data import * testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") +skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) + def test_version_generation(): version = (0, 7, 3) @@ -32,6 +34,7 @@ def test_version_generation_with_prerelease_revision(): ) +@skip_on_windows def test_parsing_report(): parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1} @@ -61,10 +64,8 @@ def test_repr_poppler(): assert repr(tables[0].cells[0][0]) == "" +@skip_on_windows def test_repr_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - return True - filename = os.path.join(testdir, "foo.pdf") tables = camelot.read_pdf( filename, @@ -85,10 +86,8 @@ def test_url_poppler(): assert repr(tables[0].cells[0][0]) == "" +@skip_on_windows def test_url_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - return True - url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf" tables = camelot.read_pdf( url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False) @@ -126,10 +125,8 @@ def test_pages_poppler(): assert repr(tables[0].cells[0][0]) == "" +@skip_on_windows def test_pages_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - return True - url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf" tables = camelot.read_pdf( url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False) diff --git a/tests/test_errors.py b/tests/test_errors.py index f488aee..5a37112 100755 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -12,6 +12,8 @@ testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") filename = os.path.join(testdir, "foo.pdf") +skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) + def test_unknown_flavor(): message = "Unknown flavor specified." " Use either 'lattice' or 'stream'" @@ -32,6 +34,7 @@ def test_unsupported_format(): tables = camelot.read_pdf(filename) +@skip_on_windows def test_no_tables_found_logs_suppressed(): filename = os.path.join(testdir, "foo.pdf") with warnings.catch_warnings(): @@ -133,3 +136,16 @@ def test_lattice_no_convert_method(): message = "must implement a 'convert' method" with pytest.raises(NotImplementedError, match=message): tables = camelot.read_pdf(filename, backend=ConversionBackend()) + + +def test_lattice_ghostscript_deprecation_warning(): + ghostscript_deprecation_warning = ( + "'ghostscript' will be replaced by 'poppler' as the default image conversion" + " backend in v0.12.0. You can try out 'poppler' with backend='poppler'." + ) + + with warnings.catch_warnings(): + warnings.simplefilter("error") + with pytest.raises(DeprecationWarning) as e: + tables = camelot.read_pdf(filename) + assert str(e.value) == ghostscript_deprecation_warning diff --git a/tests/test_lattice.py b/tests/test_lattice.py index 7706b4a..edd0966 100644 --- a/tests/test_lattice.py +++ b/tests/test_lattice.py @@ -14,7 +14,10 @@ from .data import * testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") +skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) + +@skip_on_windows def test_lattice(): df = pd.DataFrame(data_lattice) @@ -25,6 +28,7 @@ def test_lattice(): assert_frame_equal(df, tables[0].df) +@skip_on_windows def test_lattice_table_rotated(): df = pd.DataFrame(data_lattice_table_rotated) @@ -37,6 +41,7 @@ def test_lattice_table_rotated(): assert_frame_equal(df, tables[0].df) +@skip_on_windows def test_lattice_two_tables(): df1 = pd.DataFrame(data_lattice_two_tables_1) df2 = pd.DataFrame(data_lattice_two_tables_2) @@ -48,6 +53,7 @@ def test_lattice_two_tables(): assert df2.equals(tables[1].df) +@skip_on_windows def test_lattice_table_regions(): df = pd.DataFrame(data_lattice_table_regions) @@ -56,6 +62,7 @@ def test_lattice_table_regions(): assert_frame_equal(df, tables[0].df) +@skip_on_windows def test_lattice_table_areas(): df = pd.DataFrame(data_lattice_table_areas) @@ -64,6 +71,7 @@ def test_lattice_table_areas(): assert_frame_equal(df, tables[0].df) +@skip_on_windows def test_lattice_process_background(): df = pd.DataFrame(data_lattice_process_background) @@ -72,6 +80,7 @@ def test_lattice_process_background(): assert_frame_equal(df, tables[1].df) +@skip_on_windows def test_lattice_copy_text(): df = pd.DataFrame(data_lattice_copy_text) @@ -80,6 +89,7 @@ def test_lattice_copy_text(): assert_frame_equal(df, tables[0].df) +@skip_on_windows def test_lattice_shift_text(): df_lt = pd.DataFrame(data_lattice_shift_text_left_top) df_disable = pd.DataFrame(data_lattice_shift_text_disable) @@ -96,6 +106,7 @@ def test_lattice_shift_text(): assert df_rb.equals(tables[0].df) +@skip_on_windows def test_lattice_arabic(): df = pd.DataFrame(data_arabic) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 1ef178f..d38fbad 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -10,7 +10,10 @@ import camelot testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") +skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) + +@skip_on_windows @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_text_plot(): filename = os.path.join(testdir, "foo.pdf") @@ -32,11 +35,9 @@ def test_lattice_contour_plot_poppler(): return camelot.plot(tables[0], kind="contour") +@skip_on_windows @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_lattice_contour_plot_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - pytest.skip("Skipping ghostscript test on Windows") - filename = os.path.join(testdir, "foo.pdf") tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="contour") @@ -56,11 +57,9 @@ def test_line_plot_poppler(): return camelot.plot(tables[0], kind="line") +@skip_on_windows @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_line_plot_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - pytest.skip("Skipping ghostscript test on Windows") - filename = os.path.join(testdir, "foo.pdf") tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="line") @@ -73,11 +72,9 @@ def test_joint_plot_poppler(): return camelot.plot(tables[0], kind="joint") +@skip_on_windows @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_joint_plot_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - pytest.skip("Skipping ghostscript test on Windows") - filename = os.path.join(testdir, "foo.pdf") tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="joint") @@ -90,11 +87,9 @@ def test_grid_plot_poppler(): return camelot.plot(tables[0], kind="grid") +@skip_on_windows @pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True) def test_grid_plot_ghostscript(): - if sys.platform not in ["linux", "darwin"]: - pytest.skip("Skipping ghostscript test on Windows") - filename = os.path.join(testdir, "foo.pdf") tables = camelot.read_pdf(filename, backend="ghostscript") return camelot.plot(tables[0], kind="grid") From 02f53e7654a68d1b27d40f8e9dbba542f25384e3 Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Sun, 11 Jul 2021 19:15:36 +0530 Subject: [PATCH 4/4] Warn instead of raise, fix imports, and use skipif instead of skip --- camelot/parsers/lattice.py | 5 +++-- tests/test_cli.py | 9 +++++++-- tests/test_common.py | 6 +++++- tests/test_errors.py | 10 +++++++--- tests/test_lattice.py | 7 ++++++- tests/test_plotting.py | 5 ++++- tests/test_stream.py | 1 + 7 files changed, 33 insertions(+), 10 deletions(-) diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 13d1d61..9687ee8 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -142,9 +142,10 @@ class Lattice(BaseParser): if isinstance(backend, str): if backend in BACKENDS.keys(): if backend == "ghostscript": - raise DeprecationWarning( + warnings.warn( "'ghostscript' will be replaced by 'poppler' as the default image conversion" - " backend in v0.12.0. You can try out 'poppler' with backend='poppler'." + " backend in v0.12.0. You can try out 'poppler' with backend='poppler'.", + DeprecationWarning ) return BACKENDS[backend]() diff --git a/tests/test_cli.py b/tests/test_cli.py index 5ae7980..70b5b7b 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- import os +import sys +import pytest from click.testing import CliRunner from camelot.cli import cli @@ -11,7 +13,10 @@ from camelot.utils import TemporaryDirectory testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") -skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Ghostscript not installed in Windows test environment", +) def test_help_output(): @@ -38,7 +43,7 @@ def test_cli_lattice(): cli, ["--format", "csv", "--output", outfile, "lattice", infile] ) assert result.exit_code == 0 - assert result.output == "Found 1 tables\n" + assert "Found 1 tables" in result.output result = runner.invoke(cli, ["--format", "csv", "lattice", infile]) output_error = "Error: Please specify output file path using --output" diff --git a/tests/test_common.py b/tests/test_common.py index 0f33ebb..9e07efa 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -3,6 +3,7 @@ import os import sys +import pytest import pandas as pd from pandas.testing import assert_frame_equal @@ -16,7 +17,10 @@ from .data import * testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") -skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Ghostscript not installed in Windows test environment", +) def test_version_generation(): diff --git a/tests/test_errors.py b/tests/test_errors.py index 5a37112..1cdb344 100755 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import os +import sys import warnings import pytest @@ -12,7 +13,10 @@ testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") filename = os.path.join(testdir, "foo.pdf") -skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Ghostscript not installed in Windows test environment", +) def test_unknown_flavor(): @@ -87,7 +91,7 @@ def test_stream_equal_length(): def test_image_warning(): filename = os.path.join(testdir, "image.pdf") with warnings.catch_warnings(): - warnings.simplefilter("error") + warnings.simplefilter("error", category=UserWarning) with pytest.raises(UserWarning) as e: tables = camelot.read_pdf(filename) assert ( @@ -117,7 +121,7 @@ def test_stream_no_tables_in_area(): def test_lattice_no_tables_on_page(): filename = os.path.join(testdir, "empty.pdf") with warnings.catch_warnings(): - warnings.simplefilter("error") + warnings.simplefilter("error", category=UserWarning) with pytest.raises(UserWarning) as e: tables = camelot.read_pdf(filename, flavor="lattice") assert str(e.value) == "No tables found on page-1" diff --git a/tests/test_lattice.py b/tests/test_lattice.py index edd0966..7636b1b 100644 --- a/tests/test_lattice.py +++ b/tests/test_lattice.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- import os +import sys +import pytest import pandas as pd from pandas.testing import assert_frame_equal @@ -14,7 +16,10 @@ from .data import * testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") -skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Ghostscript not installed in Windows test environment", +) @skip_on_windows diff --git a/tests/test_plotting.py b/tests/test_plotting.py index d38fbad..071dfff 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -10,7 +10,10 @@ import camelot testdir = os.path.dirname(os.path.abspath(__file__)) testdir = os.path.join(testdir, "files") -skip_on_windows = pytest.mark.skip(sys.platform.startswith("win")) +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Ghostscript not installed in Windows test environment", +) @skip_on_windows diff --git a/tests/test_stream.py b/tests/test_stream.py index 4a0ec0c..0626ea7 100644 --- a/tests/test_stream.py +++ b/tests/test_stream.py @@ -2,6 +2,7 @@ import os +import pytest import pandas as pd from pandas.testing import assert_frame_equal