Coverage for src/ensae_teaching_cs/data/dataweb.py: 82%

17 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-04-28 06:23 +0200

1""" 

2@file 

3@brief Data from the web 

4""" 

5from io import StringIO 

6import pandas 

7from .data_helper import any_local_file 

8 

9 

10def anyfile(name, local=True, cache_folder=".", filename=True, unzip=False, encoding=None): 

11 """ 

12 Returns any file in sub folder 

13 `data_web <https://github.com/sdpython/ensae_teaching_cs/tree/master/src/ensae_teaching_cs/data/data_web>`_. 

14 

15 @param name file to download 

16 @param local local data or web 

17 @param cache_folder where to cache the data if downloaded a second time 

18 @param filename return the filename (True) or the content (False) 

19 @param unzip unzip the file 

20 @param encoding encoding 

21 @return text content (str) 

22 """ 

23 return any_local_file(name, "data_web", cache_folder=cache_folder, filename=filename, unzip=unzip, encoding=encoding) 

24 

25 

26def google_trends(name="macron", local=True, cache_folder=".", filename=True): 

27 """ 

28 Returns some google trends example. 

29 See :func:`ensae_teaching_cs.data.dataweb.anyfile` to 

30 directly download it. 

31 

32 @param name expression 

33 @param local local data or web 

34 @param cache_folder where to cache the data if downloaded a second time 

35 @param filename return the filename (True) or the content (False) 

36 @return text content (str) 

37 """ 

38 return anyfile(f"google_trends_{name}.csv", local=local, cache_folder=cache_folder, filename=filename) 

39 

40 

41def twitter_zip(name="tweets_macron_sijetaispresident_201609", local=True, cache_folder=".", 

42 filename=False, unzip=True, as_df=True, encoding="utf-8"): 

43 """ 

44 Returns zipped twitter. 

45 See :func:`ensae_teaching_cs.data.dataweb.anyfile` to 

46 directly download it. 

47 

48 @param name filename 

49 @param local local data or web 

50 @param cache_folder where to cache or unzip the data if downloaded a second time 

51 @param filename return the filename (True) or the content (False) 

52 @param unzip unzip the file 

53 @return text content (str) 

54 """ 

55 res = anyfile(name + ".zip", local=local, 

56 cache_folder=cache_folder, filename=filename, unzip=unzip, encoding=encoding) 

57 if as_df: 

58 st = StringIO(res) 

59 return pandas.read_csv(st, sep="\t") 

60 else: 

61 if isinstance(res, list): 

62 if len(res) > 1: 

63 raise ValueError(f"too many files: {res}") 

64 res = res[0] 

65 return res