From f43235934b9356bd42e5cbf033a6f1953a2cf26b Mon Sep 17 00:00:00 2001 From: Vinayak Mehta Date: Wed, 7 Jul 2021 04:29:23 +0530 Subject: [PATCH] Bump version and update docs --- HISTORY.md | 1 + camelot/__version__.py | 2 +- docs/user/advanced.rst | 26 ++++++++++++++++++++++++++ docs/user/faq.rst | 14 ++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 0b3c011..f8ac1cf 100755 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ master **Improvements** +- Add pdftopng for image conversion and use ghostscript as fallback. [#198](https://github.com/camelot-dev/camelot/pull/198) by Vinayak Mehta. - Add markdown export format. [#222](https://github.com/camelot-dev/camelot/pull/222/) by [Lucas Cimon](https://github.com/Lucas-C). **Documentation** diff --git a/camelot/__version__.py b/camelot/__version__.py index ae0cab1..fc20419 100644 --- a/camelot/__version__.py +++ b/camelot/__version__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -VERSION = (0, 9, 0) +VERSION = (0, 10, 0) PRERELEASE = None # alpha, beta or rc REVISION = None diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index 662a7b1..09bfc4e 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -623,3 +623,29 @@ To deal with such cases, you can tweak PDFMiner's `LAParams kwargs >> tables = camelot.read_pdf('foo.pdf', layout_kwargs={'detect_vertical': False}) + +.. _image-conversion-backend: + +Use alternate image conversion backends +--------------------------------------- + +When using the :ref:`Lattice ` flavor, Camelot uses `pdftopng `_ to convert PDF pages to images for line recognition. This should work out of the box on most operating systems. However, if you get an error, you can supply your own image conversion backend to Camelot:: + + >>> class ConversionBackend(object): + >>> def convert(pdf_path, png_path): + >>> # read pdf page from pdf_path + >>> # convert pdf page to image + >>> # write image to png_path + >>> pass + >>> + >>> tables = camelot.read_pdf(filename, backend=ConversionBackend()) + +.. note:: If image conversion using ``pdftopng`` fails, Camelot falls back to ``ghostscript`` to try image conversion again, and if that fails, it raises an error. + +In case you want to be explicit about the image conversion backend that Camelot should use, you can supply them like this:: + + >>> from camelot.backends.poppler_backend import PopplerBackend + >>> from camelot.backends.ghostscript_backend import GhostscriptBackend + >>> + >>> tables = camelot.read_pdf(filename, backend=PopplerBackend()) + >>> tables = camelot.read_pdf(filename, backend=GhostscriptBackend()) diff --git a/docs/user/faq.rst b/docs/user/faq.rst index 29bbdad..71babbd 100644 --- a/docs/user/faq.rst +++ b/docs/user/faq.rst @@ -54,3 +54,17 @@ For more details, check out this code snippet from `@anakin87 ` flavor, you can supply your own :ref:`image conversion backend ` by creating a class with a ``convert`` method as follows:: + + >>> class ConversionBackend(object): + >>> def convert(pdf_path, png_path): + >>> # read pdf page from pdf_path + >>> # convert pdf page to image + >>> # write image to png_path + >>> pass + >>> + >>> tables = camelot.read_pdf(filename, backend=ConversionBackend())