* Remove unnecessary kwargs

* Direct ghostscript call output to /dev/null

* Change char_margin's default value
pull/2/head
Vinayak Mehta 2017-01-07 15:55:27 +05:30
parent bd1d57a561
commit 70f626373b
3 changed files with 4 additions and 4 deletions

View File

@ -222,7 +222,8 @@ class Lattice:
gs_call.insert(0, "gs") gs_call.insert(0, "gs")
else: else:
gs_call.insert(0, "gsc") gs_call.insert(0, "gsc")
subprocess.call(gs_call) subprocess.call(gs_call, stdout=open(os.devnull, 'w'),
stderr=subprocess.STDOUT)
img, threshold = adaptive_threshold(imagename, invert=self.invert) img, threshold = adaptive_threshold(imagename, invert=self.invert)
pdf_x = width pdf_x = width

View File

@ -89,8 +89,7 @@ class Pdf:
outfile.addPage(page) outfile.addPage(page)
with open(sp_path, 'wb') as f: with open(sp_path, 'wb') as f:
outfile.write(f) outfile.write(f)
layout, dim = get_page_layout(sp_path, char_margin=1.0, layout, dim = get_page_layout(sp_path)
line_margin=0.5, word_margin=0.1)
lttextlh = get_text_objects(layout, ltype="lh") lttextlh = get_text_objects(layout, ltype="lh")
lttextlv = get_text_objects(layout, ltype="lv") lttextlv = get_text_objects(layout, ltype="lv")
ltchar = get_text_objects(layout, ltype="char") ltchar = get_text_objects(layout, ltype="char")

View File

@ -671,7 +671,7 @@ def get_text_objects(layout, ltype="char", t=None):
return t return t
def get_page_layout(pname, char_margin=2.0, line_margin=0.5, word_margin=0.1, def get_page_layout(pname, char_margin=1.0, line_margin=0.5, word_margin=0.1,
detect_vertical=True, all_texts=True): detect_vertical=True, all_texts=True):
"""Returns a PDFMiner LTPage object and page dimension of a single """Returns a PDFMiner LTPage object and page dimension of a single
page pdf. See https://euske.github.io/pdfminer/ to get definitions page pdf. See https://euske.github.io/pdfminer/ to get definitions