From 5c3a686ebedc933e9055ba9ea947d5931fdfde47 Mon Sep 17 00:00:00 2001
From: anakin87 <44616784+anakin87@users.noreply.github.com>
Date: Tue, 8 Dec 2020 18:57:41 +0100
Subject: [PATCH 1/2] Introduce Faq

Introduced faq. Started with reducing memory usage.
---
 docs/user/faq.rst | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 docs/user/faq.rst

diff --git a/docs/user/faq.rst b/docs/user/faq.rst
new file mode 100644
index 0000000..8f3b59e
--- /dev/null
+++ b/docs/user/faq.rst
@@ -0,0 +1,46 @@
+.. _faq:
+
+FAQ
+===
+
+This part of the documentation answers some common questions. If you want to add some questions you can simply open an issue `here <https://github.com/camelot-dev/camelot/issues/new>`_.
+
+
+How to reduce memory usage for long PDFs?
+---------------------------------------------------
+
+During table extraction from long PDF documents, RAM usage can grow significantly.
+ 
+A simple workaround is to divide the extraction into some chunks (for example, chunks of 50 pages); at the end of every chunk extraction, data are saved to disk.
+
+For more information, refer to this code snippet from `@anakin87 <https://github.com/anakin87>`_:
+
+.. code-block:: python3
+
+    import camelot
+    
+    def chunks(l, n):
+        """Yield successive n-sized chunks from l."""
+        for i in range(0, len(l), n):
+            yield l[i:i + n]
+    		
+    def extract_tables_with_less_memory_usage(filepath, pages, params={}, 
+ export_path='.', chunks_length=50):
+        """
+        Control page number
+        and subdivide the extraction work into n-pages parts (chunks_length).
+        At the end of every part, save the data on disk and free ram
+        """
+    
+        # get list of document pages from Camelot handler
+        handler=camelot.handlers.PDFHandler(filepath)
+        pages_list=handler._get_pages(filepath,pages=pages)
+        
+        # chunk pages list
+        pages_chunks=list(chunks(pages_list,chunks_length))
+    
+        # extraction and export
+        for chunk in pages_chunks:
+            pages_string=str(chunk).replace('[','').replace(']','')
+            tables = camelot.read_pdf(filepath, pages=pages_string,**params)
+            tables.export(f'{export_path}/tables.json',f='json')	

From 2aaa913c401e9d2dd7f599aa9e4c2b1c19e943d7 Mon Sep 17 00:00:00 2001
From: Vinayak Mehta <vmehta94@gmail.com>
Date: Mon, 28 Jun 2021 00:15:43 +0530
Subject: [PATCH 2/2] Update faq

---
 docs/index.rst    |  1 +
 docs/user/faq.rst | 61 +++++++++++++++++++++++++----------------------
 2 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index c3e1de4..b15fe33 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -109,6 +109,7 @@ This part of the documentation begins with some background information about why
    user/install
    user/how-it-works
    user/quickstart
+   user/faq
    user/advanced
    user/cli
 
diff --git a/docs/user/faq.rst b/docs/user/faq.rst
index 8f3b59e..8081083 100644
--- a/docs/user/faq.rst
+++ b/docs/user/faq.rst
@@ -1,46 +1,51 @@
 .. _faq:
 
-FAQ
-===
-
-This part of the documentation answers some common questions. If you want to add some questions you can simply open an issue `here <https://github.com/camelot-dev/camelot/issues/new>`_.
+Frequently Asked Questions
+==========================
 
+This part of the documentation answers some common questions. To add questions, please open an issue `here <https://github.com/camelot-dev/camelot/issues/new>`_.
 
 How to reduce memory usage for long PDFs?
----------------------------------------------------
+-----------------------------------------
 
 During table extraction from long PDF documents, RAM usage can grow significantly.
- 
-A simple workaround is to divide the extraction into some chunks (for example, chunks of 50 pages); at the end of every chunk extraction, data are saved to disk.
 
-For more information, refer to this code snippet from `@anakin87 <https://github.com/anakin87>`_:
+A simple workaround is to divide the extraction into chunks, and save extracted data to disk at the end of every chunk.
 
-.. code-block:: python3
+For more details, check out this code snippet from `@anakin87 <https://github.com/anakin87>`_:
+
+::
 
     import camelot
-    
+
+
     def chunks(l, n):
         """Yield successive n-sized chunks from l."""
         for i in range(0, len(l), n):
-            yield l[i:i + n]
-    		
-    def extract_tables_with_less_memory_usage(filepath, pages, params={}, 
- export_path='.', chunks_length=50):
+            yield l[i : i + n]
+
+
+    def extract_tables(filepath, pages, chunks=50, export_path=".", params={}):
         """
-        Control page number
-        and subdivide the extraction work into n-pages parts (chunks_length).
-        At the end of every part, save the data on disk and free ram
+        Divide the extraction work into n chunks. At the end of every chunk,
+        save data on disk and free RAM.
+
+        filepath : str
+            Filepath or URL of the PDF file.
+        pages : str, optional (default: '1')
+            Comma-separated page numbers.
+            Example: '1,3,4' or '1,4-end' or 'all'.
         """
-    
-        # get list of document pages from Camelot handler
-        handler=camelot.handlers.PDFHandler(filepath)
-        pages_list=handler._get_pages(filepath,pages=pages)
-        
+
+        # get list of pages from camelot.handlers.PDFHandler
+        handler = camelot.handlers.PDFHandler(filepath)
+        page_list = handler._get_pages(filepath, pages=pages)
+
         # chunk pages list
-        pages_chunks=list(chunks(pages_list,chunks_length))
-    
+        page_chunks = list(chunks(page_list, chunks))
+
         # extraction and export
-        for chunk in pages_chunks:
-            pages_string=str(chunk).replace('[','').replace(']','')
-            tables = camelot.read_pdf(filepath, pages=pages_string,**params)
-            tables.export(f'{export_path}/tables.json',f='json')	
+        for chunk in page_chunks:
+            pages_string = str(chunk).replace("[", "").replace("]", "")
+            tables = camelot.read_pdf(filepath, pages=pages_string, **params)
+            tables.export(f"{export_path}/tables.csv")