Raise ghostscript deprecation warning and skip ghostscript tests on windows
parent
793ddaf42f
commit
f160c1d44d
|
|
@ -141,6 +141,12 @@ class Lattice(BaseParser):
|
||||||
|
|
||||||
if isinstance(backend, str):
|
if isinstance(backend, str):
|
||||||
if backend in BACKENDS.keys():
|
if backend in BACKENDS.keys():
|
||||||
|
if backend == "ghostscript":
|
||||||
|
raise DeprecationWarning(
|
||||||
|
"'ghostscript' will be replaced by 'poppler' as the default image conversion"
|
||||||
|
" backend in v0.12.0. You can try out 'poppler' with backend='poppler'."
|
||||||
|
)
|
||||||
|
|
||||||
return BACKENDS[backend]()
|
return BACKENDS[backend]()
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
|
|
|
||||||
|
|
@ -629,7 +629,14 @@ To deal with such cases, you can tweak PDFMiner's `LAParams kwargs <https://gith
|
||||||
Use alternate image conversion backends
|
Use alternate image conversion backends
|
||||||
---------------------------------------
|
---------------------------------------
|
||||||
|
|
||||||
When using the :ref:`Lattice <lattice>` flavor, Camelot uses `pdftopng <https://github.com/vinayak-mehta/pdftopng>`_ to convert PDF pages to images for line recognition. This should work out of the box on most operating systems. However, if you get an error, you can supply your own image conversion backend to Camelot::
|
When using the :ref:`Lattice <lattice>` flavor, Camelot uses ``ghostscript`` to convert PDF pages to images for line recognition. If you face installation issues with ``ghostscript``, you can use an alternate image conversion backend called ``poppler``. You can specify which image conversion backend you want to use with::
|
||||||
|
|
||||||
|
>>> tables = camelot.read_pdf(filename, backend="ghostscript") # default
|
||||||
|
>>> tables = camelot.read_pdf(filename, backend="poppler")
|
||||||
|
|
||||||
|
.. note:: ``poppler`` will be made the default image conversion backend (replacing ``ghostscript``) with ``v0.12.0``.
|
||||||
|
|
||||||
|
If you face issues with both ``ghostscript`` and ``poppler``, you can supply your own image conversion backend::
|
||||||
|
|
||||||
>>> class ConversionBackend(object):
|
>>> class ConversionBackend(object):
|
||||||
>>> def convert(pdf_path, png_path):
|
>>> def convert(pdf_path, png_path):
|
||||||
|
|
@ -639,10 +646,3 @@ When using the :ref:`Lattice <lattice>` flavor, Camelot uses `pdftopng <https://
|
||||||
>>> pass
|
>>> pass
|
||||||
>>>
|
>>>
|
||||||
>>> tables = camelot.read_pdf(filename, backend=ConversionBackend())
|
>>> tables = camelot.read_pdf(filename, backend=ConversionBackend())
|
||||||
|
|
||||||
.. note:: If image conversion using ``pdftopng`` fails, Camelot falls back to ``ghostscript`` to try image conversion again, and if that fails, it raises an error.
|
|
||||||
|
|
||||||
In case you want to be explicit about the image conversion backend that Camelot should use, you can supply them like this::
|
|
||||||
|
|
||||||
>>> tables = camelot.read_pdf(filename, backend="poppler")
|
|
||||||
>>> tables = camelot.read_pdf(filename, backend="ghostscript")
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,8 @@ from camelot.utils import TemporaryDirectory
|
||||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
|
|
||||||
|
skip_on_windows = pytest.mark.skip(sys.platform.startswith("win"))
|
||||||
|
|
||||||
|
|
||||||
def test_help_output():
|
def test_help_output():
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
|
|
@ -26,6 +28,7 @@ def test_help_output():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_cli_lattice():
|
def test_cli_lattice():
|
||||||
with TemporaryDirectory() as tempdir:
|
with TemporaryDirectory() as tempdir:
|
||||||
infile = os.path.join(testdir, "foo.pdf")
|
infile = os.path.join(testdir, "foo.pdf")
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,8 @@ from .data import *
|
||||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
|
|
||||||
|
skip_on_windows = pytest.mark.skip(sys.platform.startswith("win"))
|
||||||
|
|
||||||
|
|
||||||
def test_version_generation():
|
def test_version_generation():
|
||||||
version = (0, 7, 3)
|
version = (0, 7, 3)
|
||||||
|
|
@ -32,6 +34,7 @@ def test_version_generation_with_prerelease_revision():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_parsing_report():
|
def test_parsing_report():
|
||||||
parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
|
parsing_report = {"accuracy": 99.02, "whitespace": 12.24, "order": 1, "page": 1}
|
||||||
|
|
||||||
|
|
@ -61,10 +64,8 @@ def test_repr_poppler():
|
||||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_repr_ghostscript():
|
def test_repr_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
return True
|
|
||||||
|
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(
|
tables = camelot.read_pdf(
|
||||||
filename,
|
filename,
|
||||||
|
|
@ -85,10 +86,8 @@ def test_url_poppler():
|
||||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_url_ghostscript():
|
def test_url_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
return True
|
|
||||||
|
|
||||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||||
tables = camelot.read_pdf(
|
tables = camelot.read_pdf(
|
||||||
url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False)
|
url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False)
|
||||||
|
|
@ -126,10 +125,8 @@ def test_pages_poppler():
|
||||||
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_pages_ghostscript():
|
def test_pages_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
return True
|
|
||||||
|
|
||||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||||
tables = camelot.read_pdf(
|
tables = camelot.read_pdf(
|
||||||
url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False)
|
url, backend=ImageConversionBackend(backend="ghostscript", use_fallback=False)
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
|
|
||||||
|
skip_on_windows = pytest.mark.skip(sys.platform.startswith("win"))
|
||||||
|
|
||||||
|
|
||||||
def test_unknown_flavor():
|
def test_unknown_flavor():
|
||||||
message = "Unknown flavor specified." " Use either 'lattice' or 'stream'"
|
message = "Unknown flavor specified." " Use either 'lattice' or 'stream'"
|
||||||
|
|
@ -32,6 +34,7 @@ def test_unsupported_format():
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_no_tables_found_logs_suppressed():
|
def test_no_tables_found_logs_suppressed():
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
|
|
@ -133,3 +136,16 @@ def test_lattice_no_convert_method():
|
||||||
message = "must implement a 'convert' method"
|
message = "must implement a 'convert' method"
|
||||||
with pytest.raises(NotImplementedError, match=message):
|
with pytest.raises(NotImplementedError, match=message):
|
||||||
tables = camelot.read_pdf(filename, backend=ConversionBackend())
|
tables = camelot.read_pdf(filename, backend=ConversionBackend())
|
||||||
|
|
||||||
|
|
||||||
|
def test_lattice_ghostscript_deprecation_warning():
|
||||||
|
ghostscript_deprecation_warning = (
|
||||||
|
"'ghostscript' will be replaced by 'poppler' as the default image conversion"
|
||||||
|
" backend in v0.12.0. You can try out 'poppler' with backend='poppler'."
|
||||||
|
)
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter("error")
|
||||||
|
with pytest.raises(DeprecationWarning) as e:
|
||||||
|
tables = camelot.read_pdf(filename)
|
||||||
|
assert str(e.value) == ghostscript_deprecation_warning
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,10 @@ from .data import *
|
||||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
|
|
||||||
|
skip_on_windows = pytest.mark.skip(sys.platform.startswith("win"))
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice():
|
def test_lattice():
|
||||||
df = pd.DataFrame(data_lattice)
|
df = pd.DataFrame(data_lattice)
|
||||||
|
|
||||||
|
|
@ -25,6 +28,7 @@ def test_lattice():
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_table_rotated():
|
def test_lattice_table_rotated():
|
||||||
df = pd.DataFrame(data_lattice_table_rotated)
|
df = pd.DataFrame(data_lattice_table_rotated)
|
||||||
|
|
||||||
|
|
@ -37,6 +41,7 @@ def test_lattice_table_rotated():
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_two_tables():
|
def test_lattice_two_tables():
|
||||||
df1 = pd.DataFrame(data_lattice_two_tables_1)
|
df1 = pd.DataFrame(data_lattice_two_tables_1)
|
||||||
df2 = pd.DataFrame(data_lattice_two_tables_2)
|
df2 = pd.DataFrame(data_lattice_two_tables_2)
|
||||||
|
|
@ -48,6 +53,7 @@ def test_lattice_two_tables():
|
||||||
assert df2.equals(tables[1].df)
|
assert df2.equals(tables[1].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_table_regions():
|
def test_lattice_table_regions():
|
||||||
df = pd.DataFrame(data_lattice_table_regions)
|
df = pd.DataFrame(data_lattice_table_regions)
|
||||||
|
|
||||||
|
|
@ -56,6 +62,7 @@ def test_lattice_table_regions():
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_table_areas():
|
def test_lattice_table_areas():
|
||||||
df = pd.DataFrame(data_lattice_table_areas)
|
df = pd.DataFrame(data_lattice_table_areas)
|
||||||
|
|
||||||
|
|
@ -64,6 +71,7 @@ def test_lattice_table_areas():
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_process_background():
|
def test_lattice_process_background():
|
||||||
df = pd.DataFrame(data_lattice_process_background)
|
df = pd.DataFrame(data_lattice_process_background)
|
||||||
|
|
||||||
|
|
@ -72,6 +80,7 @@ def test_lattice_process_background():
|
||||||
assert_frame_equal(df, tables[1].df)
|
assert_frame_equal(df, tables[1].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_copy_text():
|
def test_lattice_copy_text():
|
||||||
df = pd.DataFrame(data_lattice_copy_text)
|
df = pd.DataFrame(data_lattice_copy_text)
|
||||||
|
|
||||||
|
|
@ -80,6 +89,7 @@ def test_lattice_copy_text():
|
||||||
assert_frame_equal(df, tables[0].df)
|
assert_frame_equal(df, tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_shift_text():
|
def test_lattice_shift_text():
|
||||||
df_lt = pd.DataFrame(data_lattice_shift_text_left_top)
|
df_lt = pd.DataFrame(data_lattice_shift_text_left_top)
|
||||||
df_disable = pd.DataFrame(data_lattice_shift_text_disable)
|
df_disable = pd.DataFrame(data_lattice_shift_text_disable)
|
||||||
|
|
@ -96,6 +106,7 @@ def test_lattice_shift_text():
|
||||||
assert df_rb.equals(tables[0].df)
|
assert df_rb.equals(tables[0].df)
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
def test_lattice_arabic():
|
def test_lattice_arabic():
|
||||||
df = pd.DataFrame(data_arabic)
|
df = pd.DataFrame(data_arabic)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,10 @@ import camelot
|
||||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||||
testdir = os.path.join(testdir, "files")
|
testdir = os.path.join(testdir, "files")
|
||||||
|
|
||||||
|
skip_on_windows = pytest.mark.skip(sys.platform.startswith("win"))
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_text_plot():
|
def test_text_plot():
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
|
|
@ -32,11 +35,9 @@ def test_lattice_contour_plot_poppler():
|
||||||
return camelot.plot(tables[0], kind="contour")
|
return camelot.plot(tables[0], kind="contour")
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_lattice_contour_plot_ghostscript():
|
def test_lattice_contour_plot_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
pytest.skip("Skipping ghostscript test on Windows")
|
|
||||||
|
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||||
return camelot.plot(tables[0], kind="contour")
|
return camelot.plot(tables[0], kind="contour")
|
||||||
|
|
@ -56,11 +57,9 @@ def test_line_plot_poppler():
|
||||||
return camelot.plot(tables[0], kind="line")
|
return camelot.plot(tables[0], kind="line")
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_line_plot_ghostscript():
|
def test_line_plot_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
pytest.skip("Skipping ghostscript test on Windows")
|
|
||||||
|
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||||
return camelot.plot(tables[0], kind="line")
|
return camelot.plot(tables[0], kind="line")
|
||||||
|
|
@ -73,11 +72,9 @@ def test_joint_plot_poppler():
|
||||||
return camelot.plot(tables[0], kind="joint")
|
return camelot.plot(tables[0], kind="joint")
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_joint_plot_ghostscript():
|
def test_joint_plot_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
pytest.skip("Skipping ghostscript test on Windows")
|
|
||||||
|
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||||
return camelot.plot(tables[0], kind="joint")
|
return camelot.plot(tables[0], kind="joint")
|
||||||
|
|
@ -90,11 +87,9 @@ def test_grid_plot_poppler():
|
||||||
return camelot.plot(tables[0], kind="grid")
|
return camelot.plot(tables[0], kind="grid")
|
||||||
|
|
||||||
|
|
||||||
|
@skip_on_windows
|
||||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||||
def test_grid_plot_ghostscript():
|
def test_grid_plot_ghostscript():
|
||||||
if sys.platform not in ["linux", "darwin"]:
|
|
||||||
pytest.skip("Skipping ghostscript test on Windows")
|
|
||||||
|
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||||
return camelot.plot(tables[0], kind="grid")
|
return camelot.plot(tables[0], kind="grid")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue