Coverage for src/ensae_projects/datainc/data_bikes.py: 89%
56 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-07-20 04:37 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-07-20 04:37 +0200
1"""
2@file
3@brief Data related to a challenge, streets in Seattle
4"""
5import os
6from datetime import time
7import pandas
8from pyensae.datasource import download_data
9from pyensae.notebookhelper import folium_html_map
12def get_chicago_stations(folder=".", as_df=False):
13 """
14 Retrieves processed data from
15 `Divvy Data <https://www.divvybikes.com/system-data>`_.
17 @param folder temporary folder where to download files
18 @param as_df
19 @return filename or 2 dataframes (`as_df=True`)
20 """
21 file = download_data("Divvy_Trips_2016_Q3Q4.zip",
22 # url="https://s3.amazonaws.com/divvy-data/tripdata/",
23 whereTo=folder)
24 if as_df:
25 df1 = pandas.read_csv(os.path.join(
26 folder, "Divvy_Stations_2016_Q3.csv"))
27 df2 = pandas.read_csv(os.path.join(folder, "Divvy_Trips_2016_Q3.csv"))
28 df3 = pandas.read_csv(os.path.join(folder, "Divvy_Trips_2016_Q4.csv"))
29 df34 = pandas.concat([df2, df3])
30 return df1, df34
31 else:
32 return file
35def df_crossjoin(df1, df2, **kwargs):
36 """
37 Makes a cross join (cartesian product) between two dataframes by using a constant temporary key.
38 Also sets a MultiIndex which is the cartesian product of the indices of the input dataframes.
39 Source: `Cross join / cartesian product between pandas DataFrames
40 <https://mkonrad.net/2016/04/16/cross-join--cartesian-product-between-pandas-dataframes.html>`_.
42 @param df1 dataframe 1
43 @param df2 dataframe 2
44 @param kwargs keyword arguments that will be passed to pd.merge()
45 @return cross join of df1 and df2
46 """
47 df1['_tmpkey'] = 1
48 df2['_tmpkey'] = 1
49 res = pandas.merge(df1, df2, on='_tmpkey',
50 ** kwargs).drop('_tmpkey', axis=1)
51 res.index = pandas.MultiIndex.from_product((df1.index, df2.index))
52 df1.drop('_tmpkey', axis=1, inplace=True)
53 df2.drop('_tmpkey', axis=1, inplace=True)
54 return res
57def add_missing_time(df, column, values, delay=10):
58 """
59 After aggregation, it usually happens that the series is sparse.
60 This function adds rows for missing time.
62 @param df dataframe to extend
63 @param column column with time
64 @param values columns which contain the values, the others are considered as the keys
65 @aram delay populate every *delay* minutes
66 @return new dataframe
67 """
68 if isinstance(values, str):
69 values = [values]
70 if len(values) == 0:
71 raise ValueError("values cannot be empty")
72 all_times = [time(i // 60, i % 60, 0) for i in range(0, 24 * 60, delay)]
73 keys = [_ for _ in df.columns if _ not in values and _ != column]
74 dfti = pandas.DataFrame({column: all_times})
75 allkeys = keys + [column]
76 only = df[allkeys].groupby(
77 keys, as_index=False).count().drop(column, axis=1)
78 dfti = df_crossjoin(only, dfti)
79 dfj = df.merge(dfti, on=keys + [column], how="right")
80 for i in range(dfj.shape[1]):
81 if dfj.dtypes[i] != object:
82 dfj[dfj.columns[i]].fillna(0, inplace=True)
83 return dfj.sort_values(column)
86def folium_html_stations_map(stations, html_width=None, html_height=None, radius=5, **kwargs):
87 """
88 Returns a :epkg:`folium` map which shows stations in different colors.
90 @param stations list ``[ (lat, lon), color ]`` or ``[ (lat, lon), (name, color) ]``
91 @param kwargs extra parameters for `Map <https://github.com/python-visualization/folium/blob/master/folium/folium.py#L19>`_
92 @param html_width sent to function
93 `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html
94 #pyensae.notebookhelper.folium_helper.folium_html_map>`_
95 @param html_height sent to function
96 `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html
97 #pyensae.notebookhelper.folium_helper.folium_html_map>`_
98 @param radius size of the circles
99 @return see function
100 `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html
101 #pyensae.notebookhelper.folium_helper.folium_html_map>`_
102 """
103 import folium
104 map_osm = None
105 for key, value in stations:
106 x, y = key
107 if map_osm is None:
108 if "zoom_start" not in kwargs:
109 kwargs["zoom_start"] = 11
110 if "location" not in kwargs:
111 map_osm = folium.Map(location=[x, y], **kwargs)
112 else:
113 map_osm = folium.Map(kwargs["location"], **kwargs)
114 if isinstance(value, tuple):
115 name, value = value
116 marker = folium.CircleMarker([x, y], popup=name, radius=radius,
117 fill_color=value, color=value)
118 map_osm.add_child(marker)
119 else:
120 marker = folium.CircleMarker([x, y], radius=radius,
121 fill_color=value, color=value)
122 map_osm.add_child(marker)
123 return folium_html_map(map_osm, width=html_width, height=html_height)