From a1e1fd781d7cdf39707f825a2851ff376e9ff5dd Mon Sep 17 00:00:00 2001
From: Vinayak Mehta <vmehta94@gmail.com>
Date: Fri, 23 Nov 2018 02:51:22 +0530
Subject: [PATCH] Fix comments

---
 camelot/core.py           | 17 ++++++++++-------
 camelot/parsers/stream.py |  8 ++++----
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/camelot/core.py b/camelot/core.py
index 44aff2b..e0687d2 100644
--- a/camelot/core.py
+++ b/camelot/core.py
@@ -10,10 +10,12 @@ import numpy as np
 import pandas as pd
 
 
-# minimum number of textlines to be considered a textedge
+# minimum number of vertical textline intersections for a textedge
+# to be considered valid
 TEXTEDGE_REQUIRED_ELEMENTS = 4
-# y coordinate tolerance for extending text edge
+# y coordinate tolerance for extending textedge
 TEXTEDGE_EXTEND_TOLERANCE = 50
+# TODO: deal in percentages instead of absolutes
 # padding added to table area's lt and rb
 TABLE_AREA_PADDING = 10
 
@@ -36,7 +38,8 @@ class TextEdge(object):
             self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
             self.y0 = y0
             self.intersections += 1
-            # a textedge is valid if it extends uninterrupted over required_elements
+            # a textedge is valid only if it extends uninterrupted
+            # over a required number of textlines
             if self.intersections > TEXTEDGE_REQUIRED_ELEMENTS:
                 self.is_valid = True
 
@@ -89,8 +92,8 @@ class TextEdges(object):
         }
 
         # TODO: naive
-        # get the vertical textedges that intersect maximum number of
-        # times with horizontal text rows
+        # get vertical textedges that intersect maximum number of
+        # times with horizontal textlines
         relevant_align = max(intersections_sum.items(), key=itemgetter(1))[0]
         return self._textedges[relevant_align]
 
@@ -130,8 +133,8 @@ class TextEdges(object):
         # extend table areas based on textlines that overlap
         # vertically. it's possible that these textlines were
         # eliminated during textedges generation since numbers and
-        # sentences/chars are often aligned differently.
-        # drawback: table areas that have paragraphs to their sides
+        # chars/words/sentences are often aligned differently.
+        # drawback: table areas that have paragraphs on their sides
         # will include the paragraphs too.
         for tl in textlines:
             found = None
diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
index 2aa5fc4..8f86dbd 100644
--- a/camelot/parsers/stream.py
+++ b/camelot/parsers/stream.py
@@ -247,10 +247,10 @@ class Stream(BaseParser):
                                  " should be equal")
 
     def _nurminen_table_detection(self, textlines):
-        # an general heuristic implementation of the table detection
+        # a general heuristic implementation of the table detection
         # algorithm described by Anssi Nurminen's master's thesis:
         # https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3
-        # assumes that tables vertically separated by some distance
+        # assumes that tables are situated relatively apart vertically
 
         # TODO: add support for arabic text #141
         # sort textlines in reading order
@@ -263,9 +263,9 @@ class Stream(BaseParser):
         textedges.generate(text_grouped)
         # select relevant edges
         relevant_textedges = textedges.get_relevant()
-        # guess table areas using relevant edges
+        # guess table areas using textlines and relevant edges
         table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
-        # treat whole page as table if not table areas found
+        # treat whole page as table area if no table areas found
         if not len(table_bbox):
             table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}