Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Link to data from `Gutenberg <http://www.gutenberg.org/>`_, 

5provides an automated way to get the data from this website. 

6Some data may be replicated here to unit test notebooks. 

7""" 

8import os 

9import urllib.request 

10from urllib.error import URLError 

11 

12 

13def gutenberg_name(name="condamne", local=False, load=False): 

14 """ 

15 Retrieves data from `Gutenberg <http://www.gutenberg.org/>`_. 

16 

17 @param name name of the requested data 

18 @param local use local version 

19 @param load load the data 

20 @return content or filename or url 

21 

22 List of available datasets: 

23 

24 * ``condamne``: `Le dernier jour d'un condamné <http://www.gutenberg.org/ebooks/6838>`_, Victor Hugo 

25 """ 

26 this = os.path.abspath(os.path.dirname(__file__)) 

27 data = os.path.join(this, "data_gutenberg") 

28 if name == "condamne": 

29 url = "http://www.gutenberg.org/cache/epub/6838/pg6838.txt" 

30 loc = os.path.join(data, "pg6838.txt") 

31 if load: 

32 if not local: 

33 try: 

34 with urllib.request.urlopen(url) as u: 

35 text = u.read() 

36 u.close() 

37 except URLError: 

38 # we switch to local 

39 text = None 

40 if text is not None: 

41 text = text.decode("utf8") 

42 return text 

43 if not os.path.exists(loc): 

44 raise FileNotFoundError(loc) 

45 with open(loc, "r", encoding="utf8") as f: 

46 text = f.read() 

47 return text 

48 else: 

49 if local: 

50 if not os.path.exists(loc): 

51 raise FileNotFoundError(loc) 

52 return loc 

53 else: 

54 return url 

55 else: 

56 raise ValueError( 

57 "unknown name '{0}', check the code of the function".format(name))