diff --git a/camelot/utils.py b/camelot/utils.py index e7ad848..11d28dd 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -29,16 +29,9 @@ from pdfminer.layout import ( LTImage, ) - -PY3 = sys.version_info[0] >= 3 -if PY3: - from urllib.request import urlopen - from urllib.parse import urlparse as parse_url - from urllib.parse import uses_relative, uses_netloc, uses_params -else: - from urllib2 import urlopen - from urlparse import urlparse as parse_url - from urlparse import uses_relative, uses_netloc, uses_params +from urllib.request import Request, urlopen +from urllib.parse import urlparse as parse_url +from urllib.parse import uses_relative, uses_netloc, uses_params _VALID_URLS = set(uses_relative + uses_netloc + uses_params) @@ -90,11 +83,10 @@ def download_url(url): """ filename = "{}.pdf".format(random_string(6)) with tempfile.NamedTemporaryFile("wb", delete=False) as f: - obj = urlopen(url) - if PY3: - content_type = obj.info().get_content_type() - else: - content_type = obj.info().getheader("Content-Type") + headers = {"User-Agent": "Mozilla/5.0"} + request = Request(url, None, headers) + obj = urlopen(request) + content_type = obj.info().get_content_type() if content_type != "application/pdf": raise NotImplementedError("File format not supported") f.write(obj.read()) diff --git a/tests/data.py b/tests/data.py index 3338a81..2eeddb4 100755 --- a/tests/data.py +++ b/tests/data.py @@ -4,16 +4,6 @@ from __future__ import unicode_literals data_stream = [ - [ - "", - "Table: 5 Public Health Outlay 2012-13 (Budget Estimates) (Rs. in 000)", - "", - "", - "", - "", - "", - "", - ], ["States-A", "Revenue", "", "Capital", "", "Total", "Others(1)", "Total"], ["", "", "", "", "", "Revenue &", "", ""], ["", "Medical &", "Family", "Medical &", "Family", "", "", ""], @@ -829,18 +819,6 @@ data_stream_table_rotated = [ ] data_stream_two_tables_1 = [ - [ - "[In thousands (11,062.6 represents 11,062,600) For year ending December 31. Based on Uniform Crime Reporting (UCR)", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ], [ "Program. Represents arrests reported (not charged) by 12,910 agencies with a total population of 247,526,916 as estimated", "", @@ -915,7 +893,7 @@ data_stream_two_tables_1 = [ "2,330 .9", ], [ - "Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n . .", + "Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n . .", "467 .9", "69 .1", "398 .8", @@ -1300,29 +1278,10 @@ data_stream_two_tables_1 = [ "", "", ], - [ - "", - "Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.", - "", - "", - "", - "", - "", - "", - "", - "", - ], ] + data_stream_two_tables_2 = [ - [ - "", - "Source: U.S. Department of Justice, Federal Bureau of Investigation, Uniform Crime Reports, Arrests Master Files.", - "", - "", - "", - "", - ], ["Table 325. Arrests by Race: 2009", "", "", "", "", ""], [ "[Based on Uniform Crime Reporting (UCR) Program. Represents arrests reported (not charged) by 12,371 agencies", @@ -1352,7 +1311,7 @@ data_stream_two_tables_2 = [ "123,656", ], [ - "Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .", + "Violent crime . . . . . . . .\n . .\n . .\n . .\n . .\n .\n .\n . .\n . .\n .\n .\n .\n .\n . .", "456,965", "268,346", "177,766", @@ -1600,16 +1559,9 @@ data_stream_two_tables_2 = [ "3,950", ], ["1 Except forcible rape and prostitution.", "", "", "", "", ""], - [ - "", - "Source: U.S. Department of Justice, Federal Bureau of Investigation, “Crime in the United States, Arrests,” September 2010,", - "", - "", - "", - "", - ], ] + data_stream_table_areas = [ ["", "One Withholding"], ["Payroll Period", "Allowance"], @@ -1776,18 +1728,7 @@ data_stream_columns = [ ] data_stream_split_text = [ - [ - "FEB", - "RUAR", - "Y 2014 M27 (BUS)", - "", - "ALPHABETIC LISTING BY T", - "YPE", - "", - "", - "", - "ABLPDM27", - ], + ["FEB", "RUAR", "Y 2014 M27 (BUS)", "", "", "", "", "", "", ""], ["", "", "", "", "OF ACTIVE LICENSES", "", "", "", "", "3/19/2014"], ["", "", "", "", "OKLAHOMA ABLE COMMIS", "SION", "", "", "", ""], ["LICENSE", "", "", "", "PREMISE", "", "", "", "", ""], @@ -2121,6 +2062,7 @@ data_stream_split_text = [ ], ] + data_stream_flag_size = [ [ "States", @@ -2820,7 +2762,7 @@ data_arabic = [ ] data_stream_layout_kwargs = [ - ["V i n s a u Ve r r e", ""], + ["V i n s a u V e r r e", ""], ["Les Blancs", "12.5CL"], ["A.O.P Côtes du Rhône", ""], ["Domaine de la Guicharde « Autour de la chapelle » 2016", "8 €"], diff --git a/tests/files/baseline_plots/test_joint_plot.png b/tests/files/baseline_plots/test_joint_plot.png index e9e40ec..61df900 100644 Binary files a/tests/files/baseline_plots/test_joint_plot.png and b/tests/files/baseline_plots/test_joint_plot.png differ diff --git a/tests/files/baseline_plots/test_line_plot.png b/tests/files/baseline_plots/test_line_plot.png index e8099ce..12c44c0 100644 Binary files a/tests/files/baseline_plots/test_line_plot.png and b/tests/files/baseline_plots/test_line_plot.png differ diff --git a/tests/files/baseline_plots/test_stream_contour_plot.png b/tests/files/baseline_plots/test_stream_contour_plot.png index a6e77f7..958ea0a 100644 Binary files a/tests/files/baseline_plots/test_stream_contour_plot.png and b/tests/files/baseline_plots/test_stream_contour_plot.png differ diff --git a/tests/files/baseline_plots/test_text_plot.png b/tests/files/baseline_plots/test_text_plot.png index 8cc3825..63b5520 100644 Binary files a/tests/files/baseline_plots/test_text_plot.png and b/tests/files/baseline_plots/test_text_plot.png differ diff --git a/tests/files/baseline_plots/test_textedge_plot.png b/tests/files/baseline_plots/test_textedge_plot.png index 63fc236..1de4e9c 100644 Binary files a/tests/files/baseline_plots/test_textedge_plot.png and b/tests/files/baseline_plots/test_textedge_plot.png differ