Fix current logging

pull/2/head
Vinayak Mehta 2018-09-07 05:53:19 +05:30
parent 09ac8f4640
commit 20acda2259
3 changed files with 39 additions and 41 deletions

View File

@ -2,7 +2,6 @@ from __future__ import division
import os import os
import copy import copy
import logging import logging
import warnings
import subprocess import subprocess
import numpy as np import numpy as np
@ -12,12 +11,12 @@ from .base import BaseParser
from ..core import Table from ..core import Table
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox, from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
merge_close_values, get_table_index, compute_accuracy, merge_close_values, get_table_index, compute_accuracy,
count_empty, encode_) count_empty, encode_, setup_logging)
from ..image_processing import (adaptive_threshold, find_lines, from ..image_processing import (adaptive_threshold, find_lines,
find_table_contours, find_table_joints) find_table_contours, find_table_joints)
logger = logging.getLogger('camelot') logger = setup_logging(__name__)
class Lattice(BaseParser): class Lattice(BaseParser):
@ -218,7 +217,7 @@ class Lattice(BaseParser):
self._generate_layout(filename) self._generate_layout(filename)
if not self.horizontal_text: if not self.horizontal_text:
warnings.warn("No tables found on {}".format( logger.info("No tables found on {}".format(
os.path.basename(self.rootname))) os.path.basename(self.rootname)))
return [], self.g return [], self.g

View File

@ -1,7 +1,6 @@
from __future__ import division from __future__ import division
import os import os
import logging import logging
import warnings
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -12,7 +11,7 @@ from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
count_empty, encode_) count_empty, encode_)
logger = logging.getLogger('camelot') logger = setup_logging(__name__)
class Stream(BaseParser): class Stream(BaseParser):
@ -167,8 +166,7 @@ class Stream(BaseParser):
else: else:
ncols = max(set(elements), key=elements.count) ncols = max(set(elements), key=elements.count)
if ncols == 1: if ncols == 1:
# no tables condition logger.info("No tables found on {}".format(
warnings.warn("No tables found on {}".format(
os.path.basename(self.rootname))) os.path.basename(self.rootname)))
cols = [(t.x0, t.x1) cols = [(t.x0, t.x1)
for r in rows_grouped if len(r) == ncols for t in r] for r in rows_grouped if len(r) == ncols for t in r]
@ -232,7 +230,7 @@ class Stream(BaseParser):
self._generate_layout(filename) self._generate_layout(filename)
if not self.horizontal_text: if not self.horizontal_text:
warnings.warn("No tables found on {}".format( logger.info("No tables found on {}".format(
os.path.basename(self.rootname))) os.path.basename(self.rootname)))
return [], self.g return [], self.g

View File

@ -18,6 +18,34 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
LTTextLineVertical) LTTextLineVertical)
def setup_logging(name):
"""
Parameters
----------
name
Returns
-------
"""
logger = logging.getLogger(name)
format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
logger = setup_logging(__name__)
def translate(x1, x2): def translate(x1, x2):
""" """
@ -143,37 +171,6 @@ def scale_image(tables, v_segments, h_segments, factors):
return tables_new, v_segments_new, h_segments_new return tables_new, v_segments_new, h_segments_new
def setup_logging(log_filepath):
"""
Parameters
----------
log_filepath
Returns
-------
"""
logger = logging.getLogger("camelot")
logger.setLevel(logging.DEBUG)
# Log File Handler (Associating one log file per webservice run)
log_file_handler = logging.FileHandler(log_filepath,
mode='a',
encoding='utf-8')
log_file_handler.setLevel(logging.DEBUG)
format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
log_file_handler.setFormatter(formatter)
logger.addHandler(log_file_handler)
# Stream Log Handler (For console)
stream_log_handler = logging.StreamHandler()
stream_log_handler.setLevel(logging.INFO)
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
stream_log_handler.setFormatter(formatter)
logger.addHandler(stream_log_handler)
return logger
def get_rotation(lttextlh, lttextlv, ltchar): def get_rotation(lttextlh, lttextlv, ltchar):
""" """
@ -419,7 +416,11 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
else: else:
lt_col_overlap.append(-1) lt_col_overlap.append(-1)
if len(filter(lambda x: x != -1, lt_col_overlap)) == 0: if len(filter(lambda x: x != -1, lt_col_overlap)) == 0:
logging.warning("Text did not fit any column.") text = t.get_text().strip('\n')
text_range = (t.x0, t.x1)
col_range = (table.cols[0][0], table.cols[-1][1])
logger.info("{} {} does not lie in column range {}".format(
text, text_range, col_range))
r_idx = r r_idx = r
c_idx = lt_col_overlap.index(max(lt_col_overlap)) c_idx = lt_col_overlap.index(max(lt_col_overlap))
break break