Coverage for src/ensae_projects/datainc/data_bikes.py: 89%

56 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-07-20 04:37 +0200

1""" 

2@file 

3@brief Data related to a challenge, streets in Seattle 

4""" 

5import os 

6from datetime import time 

7import pandas 

8from pyensae.datasource import download_data 

9from pyensae.notebookhelper import folium_html_map 

10 

11 

12def get_chicago_stations(folder=".", as_df=False): 

13 """ 

14 Retrieves processed data from 

15 `Divvy Data <https://www.divvybikes.com/system-data>`_. 

16 

17 @param folder temporary folder where to download files 

18 @param as_df 

19 @return filename or 2 dataframes (`as_df=True`) 

20 """ 

21 file = download_data("Divvy_Trips_2016_Q3Q4.zip", 

22 # url="https://s3.amazonaws.com/divvy-data/tripdata/", 

23 whereTo=folder) 

24 if as_df: 

25 df1 = pandas.read_csv(os.path.join( 

26 folder, "Divvy_Stations_2016_Q3.csv")) 

27 df2 = pandas.read_csv(os.path.join(folder, "Divvy_Trips_2016_Q3.csv")) 

28 df3 = pandas.read_csv(os.path.join(folder, "Divvy_Trips_2016_Q4.csv")) 

29 df34 = pandas.concat([df2, df3]) 

30 return df1, df34 

31 else: 

32 return file 

33 

34 

35def df_crossjoin(df1, df2, **kwargs): 

36 """ 

37 Makes a cross join (cartesian product) between two dataframes by using a constant temporary key. 

38 Also sets a MultiIndex which is the cartesian product of the indices of the input dataframes. 

39 Source: `Cross join / cartesian product between pandas DataFrames 

40 <https://mkonrad.net/2016/04/16/cross-join--cartesian-product-between-pandas-dataframes.html>`_. 

41 

42 @param df1 dataframe 1 

43 @param df2 dataframe 2 

44 @param kwargs keyword arguments that will be passed to pd.merge() 

45 @return cross join of df1 and df2 

46 """ 

47 df1['_tmpkey'] = 1 

48 df2['_tmpkey'] = 1 

49 res = pandas.merge(df1, df2, on='_tmpkey', 

50 ** kwargs).drop('_tmpkey', axis=1) 

51 res.index = pandas.MultiIndex.from_product((df1.index, df2.index)) 

52 df1.drop('_tmpkey', axis=1, inplace=True) 

53 df2.drop('_tmpkey', axis=1, inplace=True) 

54 return res 

55 

56 

57def add_missing_time(df, column, values, delay=10): 

58 """ 

59 After aggregation, it usually happens that the series is sparse. 

60 This function adds rows for missing time. 

61 

62 @param df dataframe to extend 

63 @param column column with time 

64 @param values columns which contain the values, the others are considered as the keys 

65 @aram delay populate every *delay* minutes 

66 @return new dataframe 

67 """ 

68 if isinstance(values, str): 

69 values = [values] 

70 if len(values) == 0: 

71 raise ValueError("values cannot be empty") 

72 all_times = [time(i // 60, i % 60, 0) for i in range(0, 24 * 60, delay)] 

73 keys = [_ for _ in df.columns if _ not in values and _ != column] 

74 dfti = pandas.DataFrame({column: all_times}) 

75 allkeys = keys + [column] 

76 only = df[allkeys].groupby( 

77 keys, as_index=False).count().drop(column, axis=1) 

78 dfti = df_crossjoin(only, dfti) 

79 dfj = df.merge(dfti, on=keys + [column], how="right") 

80 for i in range(dfj.shape[1]): 

81 if dfj.dtypes[i] != object: 

82 dfj[dfj.columns[i]].fillna(0, inplace=True) 

83 return dfj.sort_values(column) 

84 

85 

86def folium_html_stations_map(stations, html_width=None, html_height=None, radius=5, **kwargs): 

87 """ 

88 Returns a :epkg:`folium` map which shows stations in different colors. 

89 

90 @param stations list ``[ (lat, lon), color ]`` or ``[ (lat, lon), (name, color) ]`` 

91 @param kwargs extra parameters for `Map <https://github.com/python-visualization/folium/blob/master/folium/folium.py#L19>`_ 

92 @param html_width sent to function 

93 `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html 

94 #pyensae.notebookhelper.folium_helper.folium_html_map>`_ 

95 @param html_height sent to function 

96 `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html 

97 #pyensae.notebookhelper.folium_helper.folium_html_map>`_ 

98 @param radius size of the circles 

99 @return see function 

100 `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html 

101 #pyensae.notebookhelper.folium_helper.folium_html_map>`_ 

102 """ 

103 import folium 

104 map_osm = None 

105 for key, value in stations: 

106 x, y = key 

107 if map_osm is None: 

108 if "zoom_start" not in kwargs: 

109 kwargs["zoom_start"] = 11 

110 if "location" not in kwargs: 

111 map_osm = folium.Map(location=[x, y], **kwargs) 

112 else: 

113 map_osm = folium.Map(kwargs["location"], **kwargs) 

114 if isinstance(value, tuple): 

115 name, value = value 

116 marker = folium.CircleMarker([x, y], popup=name, radius=radius, 

117 fill_color=value, color=value) 

118 map_osm.add_child(marker) 

119 else: 

120 marker = folium.CircleMarker([x, y], radius=radius, 

121 fill_color=value, color=value) 

122 map_osm.add_child(marker) 

123 return folium_html_map(map_osm, width=html_width, height=html_height)