Call pdftopng in subprocess
parent
4c32c45534
commit
4dd1e7fb15
|
|
@ -35,7 +35,7 @@ class GhostscriptBackend(object):
|
||||||
"here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
|
"here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
|
||||||
)
|
)
|
||||||
|
|
||||||
gs_args = [
|
gs_command = [
|
||||||
"gs",
|
"gs",
|
||||||
"-q",
|
"-q",
|
||||||
"-sDEVICE=png16m",
|
"-sDEVICE=png16m",
|
||||||
|
|
@ -44,4 +44,4 @@ class GhostscriptBackend(object):
|
||||||
f"-r{resolution}",
|
f"-r{resolution}",
|
||||||
pdf_path,
|
pdf_path,
|
||||||
]
|
]
|
||||||
ghostscript.Ghostscript(*gs_args)
|
ghostscript.Ghostscript(*gs_command)
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,15 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from pdftopng import pdftopng
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
class PopplerBackend(object):
|
class PopplerBackend(object):
|
||||||
def convert(self, pdf_path, png_path):
|
def convert(self, pdf_path, png_path):
|
||||||
pdftopng.convert(pdf_path, png_path)
|
pdftopng_command = ["pdftopng", pdf_path, png_path]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.check_output(
|
||||||
|
" ".join(pdftopng_command), stderr=subprocess.STDOUT, shell=True
|
||||||
|
)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise ValueError(e.output)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import copy
|
||||||
import locale
|
import locale
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
import subprocess
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
|
||||||
|
|
@ -55,19 +55,17 @@ def test_repr_poppler():
|
||||||
tables = camelot.read_pdf(filename)
|
tables = camelot.read_pdf(filename)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_repr_ghostscript():
|
def test_repr_ghostscript():
|
||||||
filename = os.path.join(testdir, "foo.pdf")
|
filename = os.path.join(testdir, "foo.pdf")
|
||||||
tables = camelot.read_pdf(filename, backend=ImageConversionBackend(backend="ghostscript"))
|
tables = camelot.read_pdf(
|
||||||
|
filename, backend=ImageConversionBackend(backend="ghostscript")
|
||||||
|
)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_url_poppler():
|
def test_url_poppler():
|
||||||
|
|
@ -75,19 +73,17 @@ def test_url_poppler():
|
||||||
tables = camelot.read_pdf(url)
|
tables = camelot.read_pdf(url)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_url_ghostscript():
|
def test_url_ghostscript():
|
||||||
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
url = "https://camelot-py.readthedocs.io/en/master/_static/pdf/foo.pdf"
|
||||||
tables = camelot.read_pdf(url, backend=ImageConversionBackend(backend="ghostscript"))
|
tables = camelot.read_pdf(
|
||||||
|
url, backend=ImageConversionBackend(backend="ghostscript")
|
||||||
|
)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=218 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_pages_poppler():
|
def test_pages_poppler():
|
||||||
|
|
@ -95,23 +91,17 @@ def test_pages_poppler():
|
||||||
tables = camelot.read_pdf(url)
|
tables = camelot.read_pdf(url)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
tables = camelot.read_pdf(url, pages="1-end")
|
tables = camelot.read_pdf(url, pages="1-end")
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
tables = camelot.read_pdf(url, pages="all")
|
tables = camelot.read_pdf(url, pages="all")
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_pages_ghostscript():
|
def test_pages_ghostscript():
|
||||||
|
|
@ -119,23 +109,17 @@ def test_pages_ghostscript():
|
||||||
tables = camelot.read_pdf(url)
|
tables = camelot.read_pdf(url)
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
tables = camelot.read_pdf(url, pages="1-end")
|
tables = camelot.read_pdf(url, pages="1-end")
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
tables = camelot.read_pdf(url, pages="all")
|
tables = camelot.read_pdf(url, pages="all")
|
||||||
assert repr(tables) == "<TableList n=1>"
|
assert repr(tables) == "<TableList n=1>"
|
||||||
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
assert repr(tables[0]) == "<Table shape=(7, 7)>"
|
||||||
assert (
|
assert repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
||||||
repr(tables[0].cells[0][0]) == "<Cell x1=120 y1=219 x2=165 y2=234>"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_table_order():
|
def test_table_order():
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue