import mermaid from ''; mermaid.initialize({ startOnLoad: true });
This notebooks displays some of the data available at Divvy Data. We assume the data was downloaded.
from jyquickhelper import add_notebook_menu
add_notebook_menu()
%matplotlib inline
from pyensae.datasource import download_data
file = download_data("Divvy_Trips_2016_Q3Q4.zip", url="https://s3.amazonaws.com/divvy-data/tripdata/")
import pandas
stations = df = pandas.read_csv("Divvy_Stations_2016_Q3.csv")
df.head()
id | name | latitude | longitude | dpcapacity | online_date | |
---|---|---|---|---|---|---|
0 | 456 | 2112 W Peterson Ave | 41.991178 | -87.683593 | 15 | 5/12/2015 |
1 | 101 | 63rd St Beach | 41.781016 | -87.576120 | 23 | 4/20/2015 |
2 | 109 | 900 W Harrison St | 41.874675 | -87.650019 | 19 | 8/6/2013 |
3 | 21 | Aberdeen St & Jackson Blvd | 41.877726 | -87.654787 | 15 | 6/21/2013 |
4 | 80 | Aberdeen St & Monroe St | 41.880420 | -87.655599 | 19 | 6/26/2013 |
import folium
from pyensae.notebookhelper import folium_html_map
minx, maxx = df.latitude.min(), df.latitude.max()
miny, maxy = df.longitude.min(), df.longitude.max()
map_osm = folium.Map(location=[(minx + maxx)/2, (miny + maxy)/2],
min_lat=minx, max_lat=maxx, min_lon=miny, max_lon=maxy, zoom_start=11)
for rows in df.to_dict("records"):
x,y = rows["latitude"], rows["longitude"]
name = rows["name"]
map_osm.add_child(folium.CircleMarker([x, y], popup=name, radius=4, fill_color="yellow"))
folium_html_map(map_osm, width="80%")
bikes = df = pandas.read_csv("Divvy_Trips_2016_Q3.csv")
df.head()
trip_id | starttime | stoptime | bikeid | tripduration | from_station_id | from_station_name | to_station_id | to_station_name | usertype | gender | birthyear | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 12150160 | 9/30/2016 23:59:58 | 10/1/2016 00:04:03 | 4959 | 245 | 69 | Damen Ave & Pierce Ave | 17 | Wood St & Division St | Subscriber | Male | 1988.0 |
1 | 12150159 | 9/30/2016 23:59:58 | 10/1/2016 00:04:09 | 2589 | 251 | 383 | Ashland Ave & Harrison St | 320 | Loomis St & Lexington St | Subscriber | Female | 1990.0 |
2 | 12150158 | 9/30/2016 23:59:51 | 10/1/2016 00:24:51 | 3656 | 1500 | 302 | Sheffield Ave & Wrightwood Ave | 334 | Lake Shore Dr & Belmont Ave | Customer | NaN | NaN |
3 | 12150157 | 9/30/2016 23:59:51 | 10/1/2016 00:03:56 | 3570 | 245 | 475 | Washtenaw Ave & Lawrence Ave | 471 | Francisco Ave & Foster Ave | Subscriber | Female | 1988.0 |
4 | 12150156 | 9/30/2016 23:59:32 | 10/1/2016 00:26:50 | 3158 | 1638 | 302 | Sheffield Ave & Wrightwood Ave | 492 | Leavitt St & Addison St | Customer | NaN | NaN |
df.shape
(1441811, 12)
df["dtstart"] = pandas.to_datetime(df.starttime, infer_datetime_format=True)
df["dtstop"] = pandas.to_datetime(df.stoptime, infer_datetime_format=True)
from datetime import datetime, time
df["day"] = df.dtstart.apply(lambda r: r.timetuple().tm_yday)
df["time"] = df.dtstart.apply(lambda r: time(r.hour, r.minute, 0))
df.day.hist(figsize=(14,4), bins=92);
df.time.apply(lambda t: t.minute + t.hour*60).hist(figsize=(14,4), bins=100);
sept = df[(df.dtstart >= datetime(2016,9,1)) & (df.dtstart < datetime(2016,10,1))]
sept.day.hist(figsize=(14,4), bins=30);
sept16 = sept[(sept.dtstart >= datetime(2016,9,12)) & (sept.dtstart < datetime(2016,9,17))].copy()
sept16[["from_station_id", "trip_id"]].groupby("from_station_id") \
.count().sort_values("trip_id", ascending=False) \
.head(n=20)
trip_id | |
---|---|
from_station_id | |
35 | 1674 |
91 | 1298 |
77 | 1110 |
174 | 991 |
192 | 898 |
177 | 892 |
75 | 844 |
76 | 814 |
133 | 718 |
81 | 705 |
90 | 705 |
85 | 700 |
195 | 686 |
268 | 658 |
287 | 624 |
36 | 614 |
283 | 588 |
49 | 561 |
43 | 550 |
52 | 532 |
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 2, figsize=(14,4))
sept16[sept16.from_station_id == 35]["time"].apply(lambda t: t.minute + t.hour*60).hist(bins=100, ax=ax[0])
sept16[sept16.to_station_id == 35]["time"].apply(lambda t: t.minute + t.hour*60).hist(bins=100, ax=ax[1]);
fig, ax = plt.subplots(1, 2, figsize=(14,4))
sept16[sept16.from_station_id == 36]["time"].apply(lambda t: t.minute + t.hour*60).hist(bins=100, ax=ax[0])
sept16[sept16.to_station_id == 36]["time"].apply(lambda t: t.minute + t.hour*60).hist(bins=100, ax=ax[1]);
fig, ax = plt.subplots(1, 2, figsize=(14,4))
sept16[sept16.from_station_id == 49]["time"].apply(lambda t: t.minute + t.hour*60).hist(bins=100, ax=ax[0])
sept16[sept16.to_station_id == 49]["time"].apply(lambda t: t.minute + t.hour*60).hist(bins=100, ax=ax[1]);