diff --git a/camelot/core.py b/camelot/core.py index 276840c..2173774 100644 --- a/camelot/core.py +++ b/camelot/core.py @@ -99,7 +99,9 @@ class TextEdges(object): x0 = area[0] - TABLE_AREA_PADDING y0 = area[1] - TABLE_AREA_PADDING x1 = area[2] + TABLE_AREA_PADDING - y1 = area[3] + TABLE_AREA_PADDING + # TODO: deal in percentages instead of absolutes + # add a constant to include table headers + y1 = area[3] + TABLE_AREA_PADDING + 10 return (x0, y0, x1, y1) # sort relevant textedges in reading order @@ -149,6 +151,41 @@ class TextEdges(object): for area in table_areas: table_areas_padded[pad(area)] = None + # debug + import matplotlib.pyplot as plt + import matplotlib.patches as patches + + fig = plt.figure() + ax = fig.add_subplot(111, aspect='equal') + xs, ys = [], [] + for t in textlines: + xs.extend([t.x0, t.x1]) + ys.extend([t.y0, t.y1]) + ax.add_patch( + patches.Rectangle( + (t.x0, t.y0), + t.x1 - t.x0, + t.y1 - t.y0, + color='blue' + ) + ) + for area in table_areas_padded: + xs.extend([area[0], area[2]]) + ys.extend([area[1], area[3]]) + ax.add_patch( + patches.Rectangle( + (area[0], area[1]), + area[2] - area[0], + area[3] - area[1], + fill=False, + color='red' + ) + ) + + ax.set_xlim(min(xs) - 10, max(xs) + 10) + ax.set_ylim(min(ys) - 10, max(ys) + 10) + plt.show() + return table_areas_padded