Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Helpers for Internet
4"""
5import sys
7try:
8 import urllib.request as urllib_request
9 from urllib.error import HTTPError
10except ImportError: # pragma: no cover
11 import urllib2 as urllib_request
12 from urllib2 import HTTPError
15class CannotDownloadException(Exception):
16 """
17 Raised by function @see fn get_url_content
18 if something cannot be downloaded.
19 """
20 pass
23def get_url_content(url, use_mozilla=False):
24 """
25 retrieve the content of an url
26 @param url (str) url
27 @param use_mozilla (bool) to use an header fill with Mozilla
28 @return page
29 """
30 if use_mozilla:
31 try:
32 req = urllib_request.Request(
33 url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' if sys.platform.startswith("win") else 'Mozilla/5.0'})
34 u = urllib_request.urlopen(req)
35 except HTTPError as e: # pragma: no cover
36 raise CannotDownloadException(
37 "Unable to download from url '{0}'".format(url)) from e
38 text = u.read()
39 u.close()
40 text = text.decode("utf8")
41 return text
43 try:
44 u = urllib_request.urlopen(url)
45 except HTTPError as e: # pragma: no cover
46 raise CannotDownloadException(
47 "Unable to download from url '{0}'".format(url)) from e
48 text = u.read()
49 u.close()
50 text = text.decode("utf8")
51 return text