Make ghostscript default backend and add support for string keywords
parent
f43235934b
commit
8abe02528b
|
|
@ -29,8 +29,8 @@ class GhostscriptBackend(object):
|
|||
def convert(self, pdf_path, png_path, resolution=300):
|
||||
if not self.installed():
|
||||
raise OSError(
|
||||
"Ghostscript is not installed. Please install it using the instructions"
|
||||
"here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
|
||||
"Ghostscript is not installed. You can install it using the instructions"
|
||||
" here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
|
||||
)
|
||||
|
||||
import ghostscript
|
||||
|
|
|
|||
|
|
@ -3,21 +3,21 @@
|
|||
from .poppler_backend import PopplerBackend
|
||||
from .ghostscript_backend import GhostscriptBackend
|
||||
|
||||
backends = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
|
||||
BACKENDS = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
|
||||
|
||||
|
||||
class ImageConversionBackend(object):
|
||||
def __init__(self, backend="poppler", use_fallback=True):
|
||||
if backend not in backends.keys():
|
||||
if backend not in BACKENDS.keys():
|
||||
raise ValueError(f"Image conversion backend '{backend}' not supported")
|
||||
|
||||
self.backend = backend
|
||||
self.use_fallback = use_fallback
|
||||
self.fallbacks = list(filter(lambda x: x != backend, backends.keys()))
|
||||
self.fallbacks = list(filter(lambda x: x != backend, BACKENDS.keys()))
|
||||
|
||||
def convert(self, pdf_path, png_path):
|
||||
try:
|
||||
converter = backends[self.backend]()
|
||||
converter = BACKENDS[self.backend]()
|
||||
converter.convert(pdf_path, png_path)
|
||||
except Exception as e:
|
||||
import sys
|
||||
|
|
@ -25,7 +25,7 @@ class ImageConversionBackend(object):
|
|||
if self.use_fallback:
|
||||
for fallback in self.fallbacks:
|
||||
try:
|
||||
converter = backends[fallback]()
|
||||
converter = BACKENDS[fallback]()
|
||||
converter.convert(pdf_path, png_path)
|
||||
except Exception as e:
|
||||
raise type(e)(
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ class PopplerBackend(object):
|
|||
pdftopng_executable = shutil.which("pdftopng")
|
||||
if pdftopng_executable is None:
|
||||
raise OSError(
|
||||
"pdftopng is not installed. Please install it using the `pip install pdftopng` command."
|
||||
"pdftopng is not installed. You can install it using the 'pip install pdftopng' command."
|
||||
)
|
||||
|
||||
pdftopng_command = [pdftopng_executable, pdf_path, png_path]
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from ..image_processing import (
|
|||
find_contours,
|
||||
find_joints,
|
||||
)
|
||||
from ..backends import ImageConversionBackend
|
||||
from ..backends.image_conversion import BACKENDS
|
||||
|
||||
|
||||
logger = logging.getLogger("camelot")
|
||||
|
|
@ -111,7 +111,7 @@ class Lattice(BaseParser):
|
|||
threshold_constant=-2,
|
||||
iterations=0,
|
||||
resolution=300,
|
||||
backend=ImageConversionBackend(),
|
||||
backend="ghostscript",
|
||||
**kwargs,
|
||||
):
|
||||
self.table_regions = table_regions
|
||||
|
|
@ -129,7 +129,30 @@ class Lattice(BaseParser):
|
|||
self.threshold_constant = threshold_constant
|
||||
self.iterations = iterations
|
||||
self.resolution = resolution
|
||||
self.backend = backend
|
||||
self.backend = Lattice._get_backend(backend)
|
||||
|
||||
@staticmethod
|
||||
def _get_backend(backend):
|
||||
def implements_convert():
|
||||
methods = [
|
||||
method for method in dir(backend) if method.startswith("__") is False
|
||||
]
|
||||
return "convert" in methods
|
||||
|
||||
if isinstance(backend, str):
|
||||
if backend in BACKENDS.keys():
|
||||
return BACKENDS[backend]()
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Unknown backend '{backend}' specified. Please use either 'poppler' or 'ghostscript'."
|
||||
)
|
||||
else:
|
||||
if not implements_convert():
|
||||
raise NotImplementedError(
|
||||
f"'{backend}' must implement a 'convert' method"
|
||||
)
|
||||
|
||||
return backend
|
||||
|
||||
@staticmethod
|
||||
def _reduce_index(t, idx, shift_text):
|
||||
|
|
|
|||
|
|
@ -644,8 +644,5 @@ When using the :ref:`Lattice <lattice>` flavor, Camelot uses `pdftopng <https://
|
|||
|
||||
In case you want to be explicit about the image conversion backend that Camelot should use, you can supply them like this::
|
||||
|
||||
>>> from camelot.backends.poppler_backend import PopplerBackend
|
||||
>>> from camelot.backends.ghostscript_backend import GhostscriptBackend
|
||||
>>>
|
||||
>>> tables = camelot.read_pdf(filename, backend=PopplerBackend())
|
||||
>>> tables = camelot.read_pdf(filename, backend=GhostscriptBackend())
|
||||
>>> tables = camelot.read_pdf(filename, backend="poppler")
|
||||
>>> tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||
|
|
|
|||
|
|
@ -32,56 +32,6 @@ def test_unsupported_format():
|
|||
tables = camelot.read_pdf(filename)
|
||||
|
||||
|
||||
def test_stream_equal_length():
|
||||
message = "Length of table_areas and columns" " should be equal"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
tables = camelot.read_pdf(
|
||||
filename,
|
||||
flavor="stream",
|
||||
table_areas=["10,20,30,40"],
|
||||
columns=["10,20,30,40", "10,20,30,40"],
|
||||
)
|
||||
|
||||
|
||||
def test_image_warning():
|
||||
filename = os.path.join(testdir, "image.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename)
|
||||
assert (
|
||||
str(e.value)
|
||||
== "page-1 is image-based, camelot only works on text-based pages."
|
||||
)
|
||||
|
||||
|
||||
def test_lattice_no_tables_on_page():
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="lattice")
|
||||
assert str(e.value) == "No tables found on page-1"
|
||||
|
||||
|
||||
def test_stream_no_tables_on_page():
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert str(e.value) == "No tables found on page-1"
|
||||
|
||||
|
||||
def test_stream_no_tables_in_area():
|
||||
filename = os.path.join(testdir, "only_page_number.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert str(e.value) == "No tables found in table area 1"
|
||||
|
||||
|
||||
def test_no_tables_found_logs_suppressed():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
with warnings.catch_warnings():
|
||||
|
|
@ -118,3 +68,68 @@ def test_bad_password():
|
|||
message = "file has not been decrypted"
|
||||
with pytest.raises(Exception, match=message):
|
||||
tables = camelot.read_pdf(filename, password="wrongpass")
|
||||
|
||||
|
||||
def test_stream_equal_length():
|
||||
message = "Length of table_areas and columns" " should be equal"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
tables = camelot.read_pdf(
|
||||
filename,
|
||||
flavor="stream",
|
||||
table_areas=["10,20,30,40"],
|
||||
columns=["10,20,30,40", "10,20,30,40"],
|
||||
)
|
||||
|
||||
|
||||
def test_image_warning():
|
||||
filename = os.path.join(testdir, "image.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename)
|
||||
assert (
|
||||
str(e.value)
|
||||
== "page-1 is image-based, camelot only works on text-based pages."
|
||||
)
|
||||
|
||||
|
||||
def test_stream_no_tables_on_page():
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert str(e.value) == "No tables found on page-1"
|
||||
|
||||
|
||||
def test_stream_no_tables_in_area():
|
||||
filename = os.path.join(testdir, "only_page_number.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="stream")
|
||||
assert str(e.value) == "No tables found in table area 1"
|
||||
|
||||
|
||||
def test_lattice_no_tables_on_page():
|
||||
filename = os.path.join(testdir, "empty.pdf")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with pytest.raises(UserWarning) as e:
|
||||
tables = camelot.read_pdf(filename, flavor="lattice")
|
||||
assert str(e.value) == "No tables found on page-1"
|
||||
|
||||
|
||||
def test_lattice_unknown_backend():
|
||||
message = "Unknown backend 'mupdf' specified. Please use either 'poppler' or 'ghostscript'."
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
tables = camelot.read_pdf(filename, backend="mupdf")
|
||||
|
||||
|
||||
def test_lattice_no_convert_method():
|
||||
class ConversionBackend(object):
|
||||
pass
|
||||
|
||||
message = "must implement a 'convert' method"
|
||||
with pytest.raises(NotImplementedError, match=message):
|
||||
tables = camelot.read_pdf(filename, backend=ConversionBackend())
|
||||
|
|
|
|||
|
|
@ -4,18 +4,16 @@ import pytest
|
|||
|
||||
import camelot.backends.image_conversion
|
||||
from camelot.backends import ImageConversionBackend
|
||||
from camelot.backends.poppler_backend import PopplerBackend
|
||||
from camelot.backends.ghostscript_backend import GhostscriptBackend
|
||||
|
||||
|
||||
class PopplerBackendError(object):
|
||||
def convert(self, pdf_path, png_path):
|
||||
raise ValueError('conversion failed')
|
||||
raise ValueError("Image conversion failed")
|
||||
|
||||
|
||||
class GhostscriptBackendError(object):
|
||||
def convert(self, pdf_path, png_path):
|
||||
raise ValueError('conversion failed')
|
||||
raise ValueError("Image conversion failed")
|
||||
|
||||
|
||||
class GhostscriptBackendNoError(object):
|
||||
|
|
@ -24,26 +22,39 @@ class GhostscriptBackendNoError(object):
|
|||
|
||||
|
||||
def test_poppler_backend_error_when_no_use_fallback(monkeypatch):
|
||||
backends = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendNoError}
|
||||
monkeypatch.setattr("camelot.backends.image_conversion.backends", backends, raising=True)
|
||||
BACKENDS = {
|
||||
"poppler": PopplerBackendError,
|
||||
"ghostscript": GhostscriptBackendNoError,
|
||||
}
|
||||
monkeypatch.setattr(
|
||||
"camelot.backends.image_conversion.BACKENDS", BACKENDS, raising=True
|
||||
)
|
||||
backend = ImageConversionBackend(use_fallback=False)
|
||||
|
||||
message = "conversion failed with image conversion backend 'poppler'"
|
||||
message = "Image conversion failed with image conversion backend 'poppler'"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
backend.convert('foo', 'bar')
|
||||
backend.convert("foo", "bar")
|
||||
|
||||
|
||||
def test_ghostscript_backend_when_use_fallback(monkeypatch):
|
||||
backends = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendNoError}
|
||||
monkeypatch.setattr("camelot.backends.image_conversion.backends", backends, raising=True)
|
||||
BACKENDS = {
|
||||
"poppler": PopplerBackendError,
|
||||
"ghostscript": GhostscriptBackendNoError,
|
||||
}
|
||||
monkeypatch.setattr(
|
||||
"camelot.backends.image_conversion.BACKENDS", BACKENDS, raising=True
|
||||
)
|
||||
backend = ImageConversionBackend()
|
||||
backend.convert('foo', 'bar')
|
||||
backend.convert("foo", "bar")
|
||||
|
||||
|
||||
def test_ghostscript_backend_error_when_use_fallback(monkeypatch):
|
||||
backends = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendError}
|
||||
monkeypatch.setattr("camelot.backends.image_conversion.backends", backends, raising=True)
|
||||
BACKENDS = {"poppler": PopplerBackendError, "ghostscript": GhostscriptBackendError}
|
||||
monkeypatch.setattr(
|
||||
"camelot.backends.image_conversion.BACKENDS", BACKENDS, raising=True
|
||||
)
|
||||
backend = ImageConversionBackend()
|
||||
|
||||
message = "conversion failed with image conversion backend 'ghostscript'"
|
||||
message = "Image conversion failed with image conversion backend 'ghostscript'"
|
||||
with pytest.raises(ValueError, match=message):
|
||||
backend.convert('foo', 'bar')
|
||||
backend.convert("foo", "bar")
|
||||
|
|
|
|||
|
|
@ -6,9 +6,6 @@ import sys
|
|||
import pytest
|
||||
|
||||
import camelot
|
||||
from camelot.backends.poppler_backend import PopplerBackend
|
||||
from camelot.backends.ghostscript_backend import GhostscriptBackend
|
||||
|
||||
|
||||
testdir = os.path.dirname(os.path.abspath(__file__))
|
||||
testdir = os.path.join(testdir, "files")
|
||||
|
|
@ -31,7 +28,7 @@ def test_textedge_plot():
|
|||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_lattice_contour_plot_poppler():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=PopplerBackend())
|
||||
tables = camelot.read_pdf(filename, backend="poppler")
|
||||
return camelot.plot(tables[0], kind="contour")
|
||||
|
||||
|
||||
|
|
@ -41,7 +38,7 @@ def test_lattice_contour_plot_ghostscript():
|
|||
pytest.skip("Skipping ghostscript test on Windows")
|
||||
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=GhostscriptBackend())
|
||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||
return camelot.plot(tables[0], kind="contour")
|
||||
|
||||
|
||||
|
|
@ -55,7 +52,7 @@ def test_stream_contour_plot():
|
|||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_line_plot_poppler():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=PopplerBackend())
|
||||
tables = camelot.read_pdf(filename, backend="poppler")
|
||||
return camelot.plot(tables[0], kind="line")
|
||||
|
||||
|
||||
|
|
@ -65,14 +62,14 @@ def test_line_plot_ghostscript():
|
|||
pytest.skip("Skipping ghostscript test on Windows")
|
||||
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=GhostscriptBackend())
|
||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||
return camelot.plot(tables[0], kind="line")
|
||||
|
||||
|
||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_joint_plot_poppler():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=PopplerBackend())
|
||||
tables = camelot.read_pdf(filename, backend="poppler")
|
||||
return camelot.plot(tables[0], kind="joint")
|
||||
|
||||
|
||||
|
|
@ -82,14 +79,14 @@ def test_joint_plot_ghostscript():
|
|||
pytest.skip("Skipping ghostscript test on Windows")
|
||||
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=GhostscriptBackend())
|
||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||
return camelot.plot(tables[0], kind="joint")
|
||||
|
||||
|
||||
@pytest.mark.mpl_image_compare(baseline_dir="files/baseline_plots", remove_text=True)
|
||||
def test_grid_plot_poppler():
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=PopplerBackend())
|
||||
tables = camelot.read_pdf(filename, backend="poppler")
|
||||
return camelot.plot(tables[0], kind="grid")
|
||||
|
||||
|
||||
|
|
@ -99,5 +96,5 @@ def test_grid_plot_ghostscript():
|
|||
pytest.skip("Skipping ghostscript test on Windows")
|
||||
|
||||
filename = os.path.join(testdir, "foo.pdf")
|
||||
tables = camelot.read_pdf(filename, backend=GhostscriptBackend())
|
||||
tables = camelot.read_pdf(filename, backend="ghostscript")
|
||||
return camelot.plot(tables[0], kind="grid")
|
||||
|
|
|
|||
Loading…
Reference in New Issue