Coverage for src/ensae_teaching_cs/data/dataweb.py: 82%
17 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Data from the web
4"""
5from io import StringIO
6import pandas
7from .data_helper import any_local_file
10def anyfile(name, local=True, cache_folder=".", filename=True, unzip=False, encoding=None):
11 """
12 Returns any file in sub folder
13 `data_web <https://github.com/sdpython/ensae_teaching_cs/tree/master/src/ensae_teaching_cs/data/data_web>`_.
15 @param name file to download
16 @param local local data or web
17 @param cache_folder where to cache the data if downloaded a second time
18 @param filename return the filename (True) or the content (False)
19 @param unzip unzip the file
20 @param encoding encoding
21 @return text content (str)
22 """
23 return any_local_file(name, "data_web", cache_folder=cache_folder, filename=filename, unzip=unzip, encoding=encoding)
26def google_trends(name="macron", local=True, cache_folder=".", filename=True):
27 """
28 Returns some google trends example.
29 See :func:`ensae_teaching_cs.data.dataweb.anyfile` to
30 directly download it.
32 @param name expression
33 @param local local data or web
34 @param cache_folder where to cache the data if downloaded a second time
35 @param filename return the filename (True) or the content (False)
36 @return text content (str)
37 """
38 return anyfile(f"google_trends_{name}.csv", local=local, cache_folder=cache_folder, filename=filename)
41def twitter_zip(name="tweets_macron_sijetaispresident_201609", local=True, cache_folder=".",
42 filename=False, unzip=True, as_df=True, encoding="utf-8"):
43 """
44 Returns zipped twitter.
45 See :func:`ensae_teaching_cs.data.dataweb.anyfile` to
46 directly download it.
48 @param name filename
49 @param local local data or web
50 @param cache_folder where to cache or unzip the data if downloaded a second time
51 @param filename return the filename (True) or the content (False)
52 @param unzip unzip the file
53 @return text content (str)
54 """
55 res = anyfile(name + ".zip", local=local,
56 cache_folder=cache_folder, filename=filename, unzip=unzip, encoding=encoding)
57 if as_df:
58 st = StringIO(res)
59 return pandas.read_csv(st, sep="\t")
60 else:
61 if isinstance(res, list):
62 if len(res) > 1:
63 raise ValueError(f"too many files: {res}")
64 res = res[0]
65 return res