Call pdftopng in subprocess

pull/198/head
Vinayak Mehta 2021-07-04 18:52:38 +05:30
parent 4c32c45534
commit 4dd1e7fb15
No known key found for this signature in database
GPG Key ID: 2DE013537A15A9A4
4 changed files with 27 additions and 37 deletions

View File

@ -35,7 +35,7 @@ class GhostscriptBackend(object):
"here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
)
gs_args = [
gs_command = [
"gs",
"-q",
"-sDEVICE=png16m",
@ -44,4 +44,4 @@ class GhostscriptBackend(object):
f"-r{resolution}",
pdf_path,
]
ghostscript.Ghostscript(*gs_args)
ghostscript.Ghostscript(*gs_command)

View File

@ -1,8 +1,15 @@
# -*- coding: utf-8 -*-
from pdftopng import pdftopng
import subprocess
class PopplerBackend(object):
def convert(self, pdf_path, png_path):
pdftopng.convert(pdf_path, png_path)
pdftopng_command = ["pdftopng", pdf_path, png_path]
try:
subprocess.check_output(
" ".join(pdftopng_command), stderr=subprocess.STDOUT, shell=True
)
except subprocess.CalledProcessError as e:
raise ValueError(e.output)

View File

@ -6,7 +6,6 @@ import copy
import locale
import logging
import warnings
import subprocess
import numpy as np
import pandas as pd

View File

@ -55,19 +55,17 @@ def test_repr_poppler():
tables = camelot.read_pdf(filename)
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
def test_repr_ghostscript():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename, backend=ImageConversionBackend(backend="ghostscript"))
tables = camelot.read_pdf(
filename, backend=ImageConversionBackend(backend="ghostscript")
)
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
def test_url_poppler():
@ -75,19 +73,17 @@ def test_url_poppler():
tables = camelot.read_pdf(url)
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
def test_url_ghostscript():
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
tables = camelot.read_pdf(url, backend=ImageConversionBackend(backend="ghostscript"))
tables = camelot.read_pdf(
url, backend=ImageConversionBackend(backend="ghostscript")
)
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
def test_pages_poppler():
@ -95,23 +91,17 @@ def test_pages_poppler():
tables = camelot.read_pdf(url)
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
tables = camelot.read_pdf(url, pages="1-end")
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
tables = camelot.read_pdf(url, pages="all")
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
def test_pages_ghostscript():
@ -119,23 +109,17 @@ def test_pages_ghostscript():
tables = camelot.read_pdf(url)
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
tables = camelot.read_pdf(url, pages="1-end")
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
tables = camelot.read_pdf(url, pages="all")
assert repr(tables) == "<TableList n=1>"
assert repr(tables[0]) == "<Table shape=(7, 7)>"
assert (
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
)
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
def test_table_order():