Fix current logging
parent
09ac8f4640
commit
20acda2259
|
|
@ -2,7 +2,6 @@ from __future__ import division
|
|||
import os
|
||||
import copy
|
||||
import logging
|
||||
import warnings
|
||||
import subprocess
|
||||
|
||||
import numpy as np
|
||||
|
|
@ -12,12 +11,12 @@ from .base import BaseParser
|
|||
from ..core import Table
|
||||
from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
|
||||
merge_close_values, get_table_index, compute_accuracy,
|
||||
count_empty, encode_)
|
||||
count_empty, encode_, setup_logging)
|
||||
from ..image_processing import (adaptive_threshold, find_lines,
|
||||
find_table_contours, find_table_joints)
|
||||
|
||||
|
||||
logger = logging.getLogger('camelot')
|
||||
logger = setup_logging(__name__)
|
||||
|
||||
|
||||
class Lattice(BaseParser):
|
||||
|
|
@ -218,7 +217,7 @@ class Lattice(BaseParser):
|
|||
self._generate_layout(filename)
|
||||
|
||||
if not self.horizontal_text:
|
||||
warnings.warn("No tables found on {}".format(
|
||||
logger.info("No tables found on {}".format(
|
||||
os.path.basename(self.rootname)))
|
||||
return [], self.g
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from __future__ import division
|
||||
import os
|
||||
import logging
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
|
@ -12,7 +11,7 @@ from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
|
|||
count_empty, encode_)
|
||||
|
||||
|
||||
logger = logging.getLogger('camelot')
|
||||
logger = setup_logging(__name__)
|
||||
|
||||
|
||||
class Stream(BaseParser):
|
||||
|
|
@ -167,8 +166,7 @@ class Stream(BaseParser):
|
|||
else:
|
||||
ncols = max(set(elements), key=elements.count)
|
||||
if ncols == 1:
|
||||
# no tables condition
|
||||
warnings.warn("No tables found on {}".format(
|
||||
logger.info("No tables found on {}".format(
|
||||
os.path.basename(self.rootname)))
|
||||
cols = [(t.x0, t.x1)
|
||||
for r in rows_grouped if len(r) == ncols for t in r]
|
||||
|
|
@ -232,7 +230,7 @@ class Stream(BaseParser):
|
|||
self._generate_layout(filename)
|
||||
|
||||
if not self.horizontal_text:
|
||||
warnings.warn("No tables found on {}".format(
|
||||
logger.info("No tables found on {}".format(
|
||||
os.path.basename(self.rootname)))
|
||||
return [], self.g
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,34 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal,
|
|||
LTTextLineVertical)
|
||||
|
||||
|
||||
def setup_logging(name):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
"""
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'
|
||||
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setLevel(logging.INFO)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(handler)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
logger = setup_logging(__name__)
|
||||
|
||||
|
||||
def translate(x1, x2):
|
||||
"""
|
||||
|
||||
|
|
@ -143,37 +171,6 @@ def scale_image(tables, v_segments, h_segments, factors):
|
|||
return tables_new, v_segments_new, h_segments_new
|
||||
|
||||
|
||||
def setup_logging(log_filepath):
|
||||
"""
|
||||
|
||||
Parameters
|
||||
----------
|
||||
log_filepath
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
"""
|
||||
logger = logging.getLogger("camelot")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
# Log File Handler (Associating one log file per webservice run)
|
||||
log_file_handler = logging.FileHandler(log_filepath,
|
||||
mode='a',
|
||||
encoding='utf-8')
|
||||
log_file_handler.setLevel(logging.DEBUG)
|
||||
format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'
|
||||
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
|
||||
log_file_handler.setFormatter(formatter)
|
||||
logger.addHandler(log_file_handler)
|
||||
# Stream Log Handler (For console)
|
||||
stream_log_handler = logging.StreamHandler()
|
||||
stream_log_handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S')
|
||||
stream_log_handler.setFormatter(formatter)
|
||||
logger.addHandler(stream_log_handler)
|
||||
return logger
|
||||
|
||||
|
||||
def get_rotation(lttextlh, lttextlv, ltchar):
|
||||
"""
|
||||
|
||||
|
|
@ -419,7 +416,11 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True):
|
|||
else:
|
||||
lt_col_overlap.append(-1)
|
||||
if len(filter(lambda x: x != -1, lt_col_overlap)) == 0:
|
||||
logging.warning("Text did not fit any column.")
|
||||
text = t.get_text().strip('\n')
|
||||
text_range = (t.x0, t.x1)
|
||||
col_range = (table.cols[0][0], table.cols[-1][1])
|
||||
logger.info("{} {} does not lie in column range {}".format(
|
||||
text, text_range, col_range))
|
||||
r_idx = r
|
||||
c_idx = lt_col_overlap.index(max(lt_col_overlap))
|
||||
break
|
||||
|
|
|
|||
Loading…
Reference in New Issue