diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index a758af7..8b0fc2b 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -2,7 +2,6 @@ from __future__ import division import os import copy import logging -import warnings import subprocess import numpy as np @@ -12,12 +11,12 @@ from .base import BaseParser from ..core import Table from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox, merge_close_values, get_table_index, compute_accuracy, - count_empty, encode_) + count_empty, encode_, setup_logging) from ..image_processing import (adaptive_threshold, find_lines, find_table_contours, find_table_joints) -logger = logging.getLogger('camelot') +logger = setup_logging(__name__) class Lattice(BaseParser): @@ -218,7 +217,7 @@ class Lattice(BaseParser): self._generate_layout(filename) if not self.horizontal_text: - warnings.warn("No tables found on {}".format( + logger.info("No tables found on {}".format( os.path.basename(self.rootname))) return [], self.g diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index fe3a3e8..37e9bcf 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -1,7 +1,6 @@ from __future__ import division import os import logging -import warnings import numpy as np import pandas as pd @@ -12,7 +11,7 @@ from ..utils import (text_in_bbox, get_table_index, compute_accuracy, count_empty, encode_) -logger = logging.getLogger('camelot') +logger = setup_logging(__name__) class Stream(BaseParser): @@ -167,8 +166,7 @@ class Stream(BaseParser): else: ncols = max(set(elements), key=elements.count) if ncols == 1: - # no tables condition - warnings.warn("No tables found on {}".format( + logger.info("No tables found on {}".format( os.path.basename(self.rootname))) cols = [(t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r] @@ -232,7 +230,7 @@ class Stream(BaseParser): self._generate_layout(filename) if not self.horizontal_text: - warnings.warn("No tables found on {}".format( + logger.info("No tables found on {}".format( os.path.basename(self.rootname))) return [], self.g diff --git a/camelot/utils.py b/camelot/utils.py index 39ab918..7eaad84 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -18,6 +18,34 @@ from pdfminer.layout import (LAParams, LTAnno, LTChar, LTTextLineHorizontal, LTTextLineVertical) +def setup_logging(name): + """ + + Parameters + ---------- + name + + Returns + ------- + + """ + logger = logging.getLogger(name) + + format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s' + formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S') + + handler = logging.StreamHandler() + handler.setLevel(logging.INFO) + handler.setFormatter(formatter) + + logger.addHandler(handler) + + return logger + + +logger = setup_logging(__name__) + + def translate(x1, x2): """ @@ -143,37 +171,6 @@ def scale_image(tables, v_segments, h_segments, factors): return tables_new, v_segments_new, h_segments_new -def setup_logging(log_filepath): - """ - - Parameters - ---------- - log_filepath - - Returns - ------- - - """ - logger = logging.getLogger("camelot") - logger.setLevel(logging.DEBUG) - # Log File Handler (Associating one log file per webservice run) - log_file_handler = logging.FileHandler(log_filepath, - mode='a', - encoding='utf-8') - log_file_handler.setLevel(logging.DEBUG) - format_string = '%(asctime)s - %(levelname)s - %(funcName)s - %(message)s' - formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S') - log_file_handler.setFormatter(formatter) - logger.addHandler(log_file_handler) - # Stream Log Handler (For console) - stream_log_handler = logging.StreamHandler() - stream_log_handler.setLevel(logging.INFO) - formatter = logging.Formatter(format_string, datefmt='%Y-%m-%dT%H:%M:%S') - stream_log_handler.setFormatter(formatter) - logger.addHandler(stream_log_handler) - return logger - - def get_rotation(lttextlh, lttextlv, ltchar): """ @@ -419,7 +416,11 @@ def get_table_index(table, t, direction, split_text=False, flag_size=True): else: lt_col_overlap.append(-1) if len(filter(lambda x: x != -1, lt_col_overlap)) == 0: - logging.warning("Text did not fit any column.") + text = t.get_text().strip('\n') + text_range = (t.x0, t.x1) + col_range = (table.cols[0][0], table.cols[-1][1]) + logger.info("{} {} does not lie in column range {}".format( + text, text_range, col_range)) r_idx = r c_idx = lt_col_overlap.index(max(lt_col_overlap)) break