Call pdftopng in subprocess

pull/198/head
Vinayak Mehta 2021-07-04 18:52:38 +05:30
parent 4c32c45534
commit 4dd1e7fb15
No known key found for this signature in database
GPG Key ID: 2DE013537A15A9A4
4 changed files with 27 additions and 37 deletions

View File

@ -35,7 +35,7 @@ class GhostscriptBackend(object):
"here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html" "here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
) )
gs_args = [ gs_command = [
"gs", "gs",
"-q", "-q",
"-sDEVICE=png16m", "-sDEVICE=png16m",
@ -44,4 +44,4 @@ class GhostscriptBackend(object):
f"-r{resolution}", f"-r{resolution}",
pdf_path, pdf_path,
] ]
ghostscript.Ghostscript(*gs_args) ghostscript.Ghostscript(*gs_command)

View File

@ -1,8 +1,15 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from pdftopng import pdftopng import subprocess
class PopplerBackend(object): class PopplerBackend(object):
def convert(self, pdf_path, png_path): def convert(self, pdf_path, png_path):
pdftopng.convert(pdf_path, png_path) pdftopng_command = ["pdftopng", pdf_path, png_path]
try:
subprocess.check_output(
" ".join(pdftopng_command), stderr=subprocess.STDOUT, shell=True
)
except subprocess.CalledProcessError as e:
raise ValueError(e.output)

View File

@ -6,7 +6,6 @@ import copy
import locale import locale
import logging import logging
import warnings import warnings
import subprocess
import numpy as np import numpy as np
import pandas as pd import pandas as pd

View File

@ -55,19 +55,17 @@ def test_repr_poppler():
tables = camelot.read_pdf(filename) tables = camelot.read_pdf(filename)
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
def test_repr_ghostscript(): def test_repr_ghostscript():
filename = os.path.join(testdir, "foo.pdf") filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename, backend=ImageConversionBackend(backend="ghostscript")) tables = camelot.read_pdf(
filename, backend=ImageConversionBackend(backend="ghostscript")
)
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
)
def test_url_poppler(): def test_url_poppler():
@ -75,19 +73,17 @@ def test_url_poppler():
tables = camelot.read_pdf(url) tables = camelot.read_pdf(url)
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
def test_url_ghostscript(): def test_url_ghostscript():
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf" url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
tables = camelot.read_pdf(url, backend=ImageConversionBackend(backend="ghostscript")) tables = camelot.read_pdf(
url, backend=ImageConversionBackend(backend="ghostscript")
)
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
)
def test_pages_poppler(): def test_pages_poppler():
@ -95,23 +91,17 @@ def test_pages_poppler():
tables = camelot.read_pdf(url) tables = camelot.read_pdf(url)
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
tables = camelot.read_pdf(url, pages="1-end") tables = camelot.read_pdf(url, pages="1-end")
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
tables = camelot.read_pdf(url, pages="all") tables = camelot.read_pdf(url, pages="all")
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
def test_pages_ghostscript(): def test_pages_ghostscript():
@ -119,23 +109,17 @@ def test_pages_ghostscript():
tables = camelot.read_pdf(url) tables = camelot.read_pdf(url)
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
tables = camelot.read_pdf(url, pages="1-end") tables = camelot.read_pdf(url, pages="1-end")
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
tables = camelot.read_pdf(url, pages="all") tables = camelot.read_pdf(url, pages="all")
assert repr(tables) == "<TableList n=1>" assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>" assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert ( assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
def test_table_order(): def test_table_order():