Seattle#

Links: notebook, html, PDF, python, slides, GitHub

This notebooks displays some of the data available at Pronto Open Data. We assume the data was downloaded.

from jyquickhelper import add_notebook_menu
add_notebook_menu()
%matplotlib inline

Data#

from pyensae.datasource import download_data
file = download_data("open_data_2016-12.zip", url="https://s3.amazonaws.com/pronto-data/")

Stations#

import pandas
stations = df = pandas.read_csv("2016-12_station_data.csv")
df.head()
station_id name lat long install_date install_dockcount modification_date current_dockcount decommission_date
0 CH-06 12th Ave & E Denny Way 47.618549 -122.317017 10/13/2014 16 NaN 16 NaN
1 CH-15 12th Ave & E Mercer St 47.624142 -122.316811 10/13/2014 16 NaN 16 NaN
2 CD-01 12th Ave & E Yesler Way 47.602103 -122.316923 5/22/2015 16 8/9/2016 0 8/9/2016
3 UD-04 12th Ave & NE Campus Pkwy 47.656395 -122.315620 10/13/2014 16 NaN 16 NaN
4 CH-05 15th Ave E & E Thomas St 47.620712 -122.312805 10/13/2014 16 NaN 16 NaN
import folium
from pyensae.notebookhelper import folium_html_map
minx, maxx = df.lat.min(), df.lat.max()
miny, maxy = df.long.min(), df.long.max()
map_osm = folium.Map(location=[47.618549, -122.317017],
                     min_lat=minx, max_lat=maxx, min_lon=miny, max_lon=maxy, zoom_start=12)
for i, rows in enumerate(df.to_dict("records")):
    x, y = rows["lat"], rows["long"]
    # folium does not like quotes in strings.
    name = rows["name"].replace("/", "-").replace("'", '')
    if len(name) > 15:
        name = name[:15]
    map_osm.add_child(folium.CircleMarker([x, y], popup=name, radius=5))
folium_html_map(map_osm, width="80%")

Trips#

bikes = df = pandas.read_csv("2016-12_trip_data.csv")
df.head()
trip_id starttime stoptime bikeid tripduration from_station_name to_station_name from_station_id to_station_id usertype gender birthyear
0 431 10/13/2014 10:31 10/13/2014 10:48 SEA00298 985.935 2nd Ave & Spring St Occidental Park / Occidental Ave S & S Washing... CBD-06 PS-04 Member Male 1960.0
1 432 10/13/2014 10:32 10/13/2014 10:48 SEA00195 926.375 2nd Ave & Spring St Occidental Park / Occidental Ave S & S Washing... CBD-06 PS-04 Member Male 1970.0
2 433 10/13/2014 10:33 10/13/2014 10:48 SEA00486 883.831 2nd Ave & Spring St Occidental Park / Occidental Ave S & S Washing... CBD-06 PS-04 Member Female 1988.0
3 434 10/13/2014 10:34 10/13/2014 10:48 SEA00333 865.937 2nd Ave & Spring St Occidental Park / Occidental Ave S & S Washing... CBD-06 PS-04 Member Female 1977.0
4 435 10/13/2014 10:34 10/13/2014 10:49 SEA00202 923.923 2nd Ave & Spring St Occidental Park / Occidental Ave S & S Washing... CBD-06 PS-04 Member Male 1971.0
df.shape
(263136, 12)
df["dtstart"] = pandas.to_datetime(df.starttime, infer_datetime_format=True)
df["dtstop"] = pandas.to_datetime(df.stoptime, infer_datetime_format=True)
from datetime import datetime, time
df["day"] = df.dtstart.apply(lambda d: d.timetuple().tm_yday)
df["time"] = df.dtstart.apply(lambda r: time(r.hour, r.minute, 0))
df.day.hist(figsize=(14,4), bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0x228fa5cd080>
../_images/bike_seatle_13_1.png
icro = df.time.apply(lambda t: t.hour * 60 + t.minute)
icro.min(), icro.max()
(0, 1439)
icro.hist(figsize=(14,4), bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0x228f5e2b208>
../_images/bike_seatle_15_1.png
june = df[(df.dtstart >= datetime(2016,6,1)) & (df.dtstart < datetime(2016,7,1))]
june.day.hist(figsize=(14,4), bins=30)
<matplotlib.axes._subplots.AxesSubplot at 0x228f5f9f198>
../_images/bike_seatle_16_1.png
june11 = june[(june.dtstart >= datetime(2016,6,11)) & (june.dtstart < datetime(2016,6,12))].copy()
june11[["from_station_id", "trip_id"]].groupby("from_station_id") \
         .count().sort_values("trip_id", ascending=False).head(n=20)
trip_id
from_station_id
WF-01 28
SLU-02 25
EL-01 22
SLU-19 21
WF-04 19
PS-04 19
BT-01 18
BT-05 14
CBD-13 13
UD-01 12
SLU-18 12
CBD-06 11
CH-16 11
CH-02 11
SLU-23 11
CH-08 11
SLU-01 10
PS-05 10
UW-07 9
UW-10 8

Too few values…