diff --git a/camelot/lattice.py b/camelot/lattice.py
index 079e2a0..57b4c12 100644
--- a/camelot/lattice.py
+++ b/camelot/lattice.py
@@ -162,6 +162,12 @@ class Lattice:
         different cells.
         (optional, default: False)
 
+    flag_size : bool
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+        (optional, default: True)
+
     shift_text : list
         {'l', 'r', 't', 'b'}
         Select one or more from above and pass them as a list to
@@ -176,17 +182,19 @@ class Lattice:
     """
     def __init__(self, table_area=None, fill=None, headers=None, mtol=[2],
                  scale=15, invert=False, margins=(1.0, 0.5, 0.1),
-                 split_text=False, shift_text=['l', 't'], debug=None):
+                 split_text=False, flag_size=True, shift_text=['l', 't'],
+                 debug=None):
 
         self.method = 'lattice'
         self.table_area = table_area
         self.fill = fill
-        self.headers = [h.split(',') for h in headers]
+        self.headers = headers
         self.mtol = mtol
         self.scale = scale
         self.invert = invert
         self.char_margin, self.line_margin, self.word_margin = margins
         self.split_text = split_text
+        self.flag_size = flag_size
         self.shift_text = shift_text
         self.debug = debug
 
@@ -248,6 +256,7 @@ class Lattice:
             if self.headers is not None:
                 if len(self.table_area) != len(self.headers):
                     raise ValueError("Length of headers should be equal to table_area.")
+                self.headers = [h.split(',') for h in headers]
 
             areas = []
             for area in self.table_area:
@@ -329,13 +338,20 @@ class Lattice:
                 self.debug_tables.append(table)
 
             assignment_errors = []
+            table_data['split_text'] = []
+            table_data['superscript'] = []
             for direction in t_bbox:
                 for t in t_bbox[direction]:
                     indices, error = get_table_index(
-                        table, t, direction, split_text=self.split_text)
+                        table, t, direction, split_text=self.split_text,
+                        flag_size=self.flag_size)
                     assignment_errors.append(error)
-                    indices = _reduce_index(table, indices, shift_text=self.shift_text)
+                    indices = _reduce_index(table, indices, shift_text=self.shift_text,)
+                    if len(indices) > 1:
+                        table_data['split_text'].append(indices)
                     for r_idx, c_idx, text in indices:
+                        if all(s in text for s in ['<s>', '</s>']):
+                            table_data['superscript'].append((r_idx, c_idx, text))
                         table.cells[r_idx][c_idx].add_text(text)
             score = get_score([[100, assignment_errors]])
             table_data['score'] = score
diff --git a/camelot/stream.py b/camelot/stream.py
index 04c3f06..d1ab29b 100644
--- a/camelot/stream.py
+++ b/camelot/stream.py
@@ -259,6 +259,12 @@ class Stream:
         different cells.
         (optional, default: False)
 
+    flag_size : bool
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+        (optional, default: True)
+
     debug : bool
         Set to True to generate a matplotlib plot of
         LTTextLineHorizontals in order to select table_area, columns.
@@ -266,7 +272,7 @@ class Stream:
     """
     def __init__(self, table_area=None, columns=None, ncolumns=None,
                  headers=None, ytol=[2], mtol=[0], margins=(1.0, 0.5, 0.1),
-                 split_text=False, debug=False):
+                 split_text=False, flag_size=True, debug=False):
 
         self.method = 'stream'
         self.table_area = table_area
@@ -274,9 +280,10 @@ class Stream:
         self.ncolumns = ncolumns
         self.ytol = ytol
         self.mtol = mtol
-        self.headers = [h.split(',') for h in headers]
+        self.headers = headers
         self.char_margin, self.line_margin, self.word_margin = margins
         self.split_text = split_text
+        self.flag_size = flag_size
         self.debug = debug
 
     def get_tables(self, pdfname):
@@ -318,6 +325,7 @@ class Stream:
             if self.headers is not None:
                 if len(self.table_area) != len(self.headers):
                     raise ValueError("Length of headers should be equal to table_area.")
+                self.headers = [h.split(',') for h in headers]
 
             table_bbox = {}
             for area in self.table_area:
@@ -418,12 +426,19 @@ class Stream:
             table = Table(cols, rows)
             table = table.set_all_edges()
             assignment_errors = []
+            table_data['split_text'] = []
+            table_data['superscript'] = []
             for direction in t_bbox:
                 for t in t_bbox[direction]:
                     indices, error = get_table_index(
-                        table, t, direction, split_text=self.split_text)
+                        table, t, direction, split_text=self.split_text,
+                        flag_size=self.flag_size)
                     assignment_errors.append(error)
+                    if len(indices) > 1:
+                        table_data['split_text'].append(indices)
                     for r_idx, c_idx, text in indices:
+                        if all(s in text for s in ['<s>', '</s>']):
+                            table_data['superscript'].append((r_idx, c_idx, text))
                         table.cells[r_idx][c_idx].add_text(text)
             if guess:
                 score = get_score([[66, assignment_errors], [34, [len_non_mode / len(elements)]]])
diff --git a/camelot/utils.py b/camelot/utils.py
index e798dd8..91e19bc 100644
--- a/camelot/utils.py
+++ b/camelot/utils.py
@@ -1,6 +1,8 @@
 from __future__ import division
 import os
 import logging
+from itertools import groupby
+from operator import itemgetter
 
 import numpy as np
 
@@ -500,7 +502,49 @@ def merge_close_values(ar, mtol=2):
     return ret
 
 
-def split_textline(table, textline, direction):
+def flag_on_size(textline, direction):
+    """Flags a super/subscript by enclosing it with <s></s>. May give
+    false positives.
+
+    Parameters
+    ----------
+    textline : list
+        List of PDFMiner LTChar objects.
+
+    direction : string
+        {'horizontal', 'vertical'}
+        Direction of the PDFMiner LTTextLine object.
+
+    Returns
+    -------
+    fstring : string
+    """
+    if direction == 'horizontal':
+        d = [(t.get_text(), np.round(t.height, decimals=6)) for t in textline if not isinstance(t, LTAnno)]
+    elif direction == 'vertical':
+        d = [(t.get_text(), np.round(t.width, decimals=6)) for t in textline if not isinstance(t, LTAnno)]
+    l = [np.round(size, decimals=6) for text, size in d]
+    if len(set(l)) > 1:
+        flist = []
+        min_size = min(l)
+        for key, chars in groupby(d, itemgetter(1)):
+            if key == min_size:
+                fchars = [t[0] for t in chars]
+                if ''.join(fchars).strip():
+                    fchars.insert(0, '<s>')
+                    fchars.append('</s>')
+                    flist.append(''.join(fchars))
+            else:
+                fchars = [t[0] for t in chars]
+                if ''.join(fchars).strip():
+                    flist.append(''.join(fchars))
+        fstring = ''.join(flist).strip('\n')
+    else:
+        fstring = ''.join([t.get_text() for t in textline]).strip('\n')
+    return fstring
+
+
+def split_textline(table, textline, direction, flag_size=True):
     """Splits PDFMiner LTTextLine into substrings if it spans across
     multiple rows/columns.
 
@@ -516,9 +560,15 @@ def split_textline(table, textline, direction):
         {'horizontal', 'vertical'}
         Direction of the PDFMiner LTTextLine object.
 
+    flag_size : bool
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+        (optional, default: True)
+
     Returns
     -------
-    cut_text : list
+    grouped_chars : list
         List of tuples of the form (idx, text) where idx is the index
         of row/column and text is the an lttextline substring.
     """
@@ -538,10 +588,10 @@ def split_textline(table, textline, direction):
                 if isinstance(obj, LTChar):
                     if (row[1] <= (obj.y0 + obj.y1) / 2 <= row[0] and
                             (obj.x0 + obj.x1) / 2 <= cut[1]):
-                        cut_text.append((r, cut[0], obj.get_text().strip('\n')))
+                        cut_text.append((r, cut[0], obj))
                         break
                 elif isinstance(obj, LTAnno):
-                    cut_text.append((r, cut[0], obj.get_text().strip('\n')))
+                    cut_text.append((r, cut[0], obj))
     elif direction == 'vertical' and not textline.is_empty():
         y_overlap = [j for j, y in enumerate(table.rows) if y[1] <= bbox[3] and bbox[1] <= y[0]]
         c_idx = [i for i, c in enumerate(table.cols) if c[0] <= (bbox[0] + bbox[2]) / 2 <= c[1]]
@@ -558,11 +608,18 @@ def split_textline(table, textline, direction):
                         cut_text.append((cut[0], c, obj.get_text()))
                         break
                 elif isinstance(obj, LTAnno):
-                    cut_text.append((cut[0], c, obj.get_text().strip('\n')))
-    return cut_text
+                    cut_text.append((cut[0], c, obj))
+    grouped_chars = []
+    for key, chars in groupby(cut_text, itemgetter(0, 1)):
+        if flag_size:
+            grouped_chars.append((key[0], key[1], flag_on_size([t[2] for t in chars], direction)))
+        else:
+            gchars = [t[2].get_text() for t in chars]
+            grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n')))
+    return grouped_chars
 
 
-def get_table_index(table, t, direction, split_text=False):
+def get_table_index(table, t, direction, split_text=False, flag_size=True):
     """Gets indices of the cell where given text object lies by
     comparing their y and x-coordinates.
 
@@ -583,6 +640,12 @@ def get_table_index(table, t, direction, split_text=False):
         multiple cells.
         (optional, default: False)
 
+    flag_size : bool
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string, useful for
+        super and subscripts.
+        (optional, default: True)
+
     Returns
     -------
     indices : list
@@ -632,9 +695,12 @@ def get_table_index(table, t, direction, split_text=False):
     error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
 
     if split_text:
-        return split_textline(table, t, direction), error
+        return split_textline(table, t, direction, flag_size=flag_size), error
     else:
-        return [(r_idx, c_idx, t.get_text().strip('\n'))], error
+        if flag_size:
+            return [(r_idx, c_idx, flag_on_size(t._objs, direction))], error
+        else:
+            return [(r_idx, c_idx, t.get_text().strip('\n'))], error
 
 
 def get_score(error_weights):
diff --git a/tests/test_stream.py b/tests/test_stream.py
index a23ad08..aea1dbd 100644
--- a/tests/test_stream.py
+++ b/tests/test_stream.py
@@ -85,7 +85,7 @@ def test_stream_missing_value():
         ["4","","","",""]
     ]
     pdfname = os.path.join(testdir, "missing_values.pdf")
-    manager = Pdf(Stream(), pdfname, clean=True)
+    manager = Pdf(Stream(flag_size=False), pdfname, clean=True)
     tables = manager.extract()
     assert_equal(tables["page-1"]["table-1"]["data"], data)
 
@@ -210,11 +210,11 @@ def test_stream_table_rotation():
         ["","","","","","","","","54","","","","","","","","",""]
     ]
     pdfname = os.path.join(testdir, "left_rotated_table_2.pdf")
-    manager = Pdf(Stream(), pdfname, clean=True)
+    manager = Pdf(Stream(flag_size=False), pdfname, clean=True)
     tables = manager.extract()
     assert_equal(tables["page-1"]["table-1"]["data"], data)
 
     pdfname = os.path.join(testdir, "right_rotated_table_2.pdf")
-    manager = Pdf(Stream(), pdfname, clean=True)
+    manager = Pdf(Stream(flag_size=False), pdfname, clean=True)
     tables = manager.extract()
     assert_equal(tables["page-1"]["table-1"]["data"], data)
\ No newline at end of file