{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# 2018-09-18 - Rappels sur pandas et maplotlib\n", "\n", "Manipulation de donn\u00e9es autour du jeu des passagers du Titanic qu'on peut r\u00e9cup\u00e9rer sur [opendatasoft](https://public.opendatasoft.com/explore/dataset/titanic-passengers/?flg=fr) ou [awesome-public-datasets](https://github.com/awesomedata/awesome-public-datasets/tree/master/Datasets)."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": ["import pandas"]}, {"cell_type": "code", "execution_count": 2, "metadata": {"scrolled": false}, "outputs": [], "source": ["df = pandas.read_csv(\"titanic.csv/titanic.csv\")"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [{"data": {"text/plain": ["pandas.core.frame.DataFrame"]}, "execution_count": 4, "metadata": {}, "output_type": "execute_result"}], "source": ["type(df)"]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "
\n", " \n", " \n", " | \n", " PassengerId | \n", " Survived | \n", " Pclass | \n", " Name | \n", " Sex | \n", " Age | \n", " SibSp | \n", " Parch | \n", " Ticket | \n", " Fare | \n", " Cabin | \n", " Embarked | \n", "
\n", " \n", " \n", " \n", " 0 | \n", " 1 | \n", " 0 | \n", " 3 | \n", " Braund, Mr. Owen Harris | \n", " male | \n", " 22.0 | \n", " 1 | \n", " 0 | \n", " A/5 21171 | \n", " 7.2500 | \n", " NaN | \n", " S | \n", "
\n", " \n", " 1 | \n", " 2 | \n", " 1 | \n", " 1 | \n", " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n", " female | \n", " 38.0 | \n", " 1 | \n", " 0 | \n", " PC 17599 | \n", " 71.2833 | \n", " C85 | \n", " C | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C "]}, "execution_count": 5, "metadata": {}, "output_type": "execute_result"}], "source": ["df.head(n=2)"]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " PassengerId | \n", " Survived | \n", " Pclass | \n", "
\n", " \n", " \n", " \n", " 0 | \n", " 1 | \n", " 0 | \n", " 3 | \n", "
\n", " \n", " 1 | \n", " 2 | \n", " 1 | \n", " 1 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" PassengerId Survived Pclass\n", "0 1 0 3\n", "1 2 1 1"]}, "execution_count": 6, "metadata": {}, "output_type": "execute_result"}], "source": ["subset = df[ [\"PassengerId\", \"Survived\", \"Pclass\"] ]\n", "subset.head(n=2)"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": ["survived = subset[ [\"Survived\", \"Pclass\"] ].groupby([\"Pclass\"]).sum()\n", "compte = subset[ [\"Survived\", \"Pclass\"] ].groupby([\"Pclass\"]).count()\n", "compte.columns = ['total']"]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " Survived | \n", "
\n", " \n", " Pclass | \n", " | \n", "
\n", " \n", " \n", " \n", " 1 | \n", " 136 | \n", "
\n", " \n", " 2 | \n", " 87 | \n", "
\n", " \n", " 3 | \n", " 119 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" Survived\n", "Pclass \n", "1 136\n", "2 87\n", "3 119"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["survived"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " total | \n", "
\n", " \n", " Pclass | \n", " | \n", "
\n", " \n", " \n", " \n", " 1 | \n", " 216 | \n", "
\n", " \n", " 2 | \n", " 184 | \n", "
\n", " \n", " 3 | \n", " 491 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" total\n", "Pclass \n", "1 216\n", "2 184\n", "3 491"]}, "execution_count": 9, "metadata": {}, "output_type": "execute_result"}], "source": ["compte"]}, {"cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " Survived | \n", " total | \n", "
\n", " \n", " Pclass | \n", " | \n", " | \n", "
\n", " \n", " \n", " \n", " 1 | \n", " 136 | \n", " 216 | \n", "
\n", " \n", " 2 | \n", " 87 | \n", " 184 | \n", "
\n", " \n", " 3 | \n", " 119 | \n", " 491 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" Survived total\n", "Pclass \n", "1 136 216\n", "2 87 184\n", "3 119 491"]}, "execution_count": 10, "metadata": {}, "output_type": "execute_result"}], "source": ["jointure = survived.join(compte)\n", "jointure"]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " Survived | \n", " total | \n", " survie | \n", "
\n", " \n", " Pclass | \n", " | \n", " | \n", " | \n", "
\n", " \n", " \n", " \n", " 1 | \n", " 136 | \n", " 216 | \n", " 0.629630 | \n", "
\n", " \n", " 2 | \n", " 87 | \n", " 184 | \n", " 0.472826 | \n", "
\n", " \n", " 3 | \n", " 119 | \n", " 491 | \n", " 0.242363 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" Survived total survie\n", "Pclass \n", "1 136 216 0.629630\n", "2 87 184 0.472826\n", "3 119 491 0.242363"]}, "execution_count": 11, "metadata": {}, "output_type": "execute_result"}], "source": ["jointure[\"survie\"] = jointure['Survived'] / jointure.total\n", "jointure"]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": ["%matplotlib inline"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [{"data": {"image/png": "iVBORw0KGgoAAAANSUhEUgAAAKYAAACmCAYAAABQiPR3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADOFJREFUeJzt3X2MVfWdx/H3p8y0A0IxHdxtZKAXFbJSwGkdQGNop60daenO2oizbJdSN0W2SUmplkZtbaEPpFSbNkWbiOkDjasrT2ZBZTVLqSulSJmrPKkhpRbtFKwzUx5EpQLz7R/nDF4ulxkZ7rnnd+d+X4nh3nN/99zvmXw8j7/zOzIznAvNO9IuwLlCPJguSB5MFyQPpguSB9MFyYPpguTBTJikr0n6aULzniJpdxLzTpv8POa5kXQk5+0g4G/Aifj9f5rZ/TltM8AfgWozO16qGstRVdoFlDszG9z9WtJeYLaZrU+vov7BN+UJk7RQ0n/Fb5+M/z0o6YikKyVdLGmDpE5JHZLul3R+zvf3SpovaYekQ5KWS6qJP2uU1JbTdoSkhyS1x/O7u3RLWlwezNL6UPzv+WY22Mw2AwK+B1wIXAqMABbmfa8FmAqMAiYAN+TPWNIA4BHgRSADDAceLPYClIpvylNmZnuAPfHbdkk/BBbkNVtiZvsAJD0M1BeY1SSicH81Z//1NwmUXBIezJRJ+gdgCTAFGEK0FTuQ1+zlnNevEwUw3wjgxf5yUOWb8tIqdArke/H0CWb2bmAm0eb9bP0JGCmpX6xsPJil1Q50ARflTBsCHCE6IBoOfLWP8/4dsB9YLOk8STWSrjqnalPkwSwhM3sdWARsknRQ0hXAt4APAoeAR4GH+jjvE8A/A5cALwFtwL8Wo+40+Al2FyRfY7ogeTBdkDyYLkgeTBckD6YLUtmdjB02bJhlMpm0y3B9lM1mO8zsgt7alV0wM5kMra2taZfh+kjSi2+nnW/KXZA8mC5IHkwXpLLbx3w7Mrc+WrR57V087azaHzt2jLa2No4ePVq0GspRTU0NdXV1VFdX9+n7iQZT0lTgx8AA4KdmtrhAmxaiHtsGbDezzyRZU9La2toYMmQImUwGqS+918qfmdHZ2UlbWxujRo3q0zwSC2bc1f8nwMeJerpslbTWzJ7LaTMauA24yswOxJ1my9rRo0crOpQAkqitraW9vb3P80hyH3MSsMfMXjCzN4nuP/mXvDY3Aj8xswMAZvZKgvWUTCWHstu5/g2SDOZwol7V3driabnGAGMkbZL0VLzpdwFau3YtixeftieWmCT3MQv9L5Pf+bMKGA00AnXARknjzOzgKTOS5gBzAEaOHFn8ShNUzAMxOPuDsbNx/PhxqqoKR6K5uZnm5ubEfjtfkmvMNqIbpLrVAfsKtFljZsfM7I/AbqKgnsLM7jWzBjNruOCCXq9mVbzXXnuNadOmcdlllzFu3DiWL19OJpOho6MDgNbWVhobGwFYuHAhc+bMoampiVmzZjF58mSeffbZk/NqbGwkm82ybNky5s6dC0B7ezvXXXcdEydOZOLEiWzatKnoy5BkMLcCoyWNkvROYAawNq/N/wAfAZA0jGjT/kKCNVWExx57jAsvvJDt27eza9cupk7teQ8pm82yZs0aHnjgAWbMmMGKFSsA2L9/P/v27ePyyy8/pf28efO46aab2Lp1K6tXr2b27NlFX4bEghnfRjoXeBx4HlhhZs9K+rak7m3C40CnpOeAXxPdE92ZVE2VYvz48axfv55bbrmFjRs3MnTo0B7bNzc3M3DgQABaWlpYuXIlACtWrOD6668/rf369euZO3cu9fX1NDc3c/jwYV599dWiLkOi5zHNbB2wLm/aN3NeG3Bz/J8rkjFjxpDNZlm3bh233XYbTU1NVFVV0dXVBXDayf/zzjvv5Ovhw4dTW1vLjh07WL58OUuXLj1t/l1dXWzevPlkmJPglyT7oX379jFo0CBmzpzJ/Pnzefrpp8lkMmSzWQBWr17d4/dnzJjBHXfcwaFDhxg/fvxpnzc1NXH33W8Ni7Rt27biLgAezH5p586dTJo0ifr6ehYtWsTtt9/OggULmDdvHlOmTGHAgAE9fn/69Ok8+OCDtLS0FPx8yZIltLa2MmHCBMaOHcs999xT9GUou9t3GxoarLf+mGleK3/++ee59NJLi/b75azQ30JS1swaevuurzFdkDyYLkgeTBckD2YCym2/PQnn+jfwYBZZTU0NnZ2dFR3O7v6YNTU1fZ5Hv+zBnqa6ujra2trOqS9if9Ddg72vPJhFVl1d3ede2+4tvil3QfJguiB5MF2QPJguSB5MFyQPpguSny4qoWL1ekryhrRQ+BrTBcmD6YLkwXRB8mC6IHkwXZA8mC5IHkwXpESDKWmqpN2S9ki6tYd20yWZpF7vnnOVIbFg5gzc+glgLPBvksYWaDcE+BKwJalaXPlJe+BWgO8AdwCVPWi5O0WqA7dK+gAwwsweSbAOV4aSDGaPA7dKegfwI+Arvc5ImiOpVVJrpd9LUynSHLh1CDAOeELSXuAKYG2hAyAfuLXypDZwq5kdMrNhZpYxswzwFNBsZv6gSJf6wK3OFZTqwK150xuTrMWVF7/y44LkwXRB8mC6IJ1VMCVdIWlD/CSza5MqyrkeD34kvdfMXs6ZdDPQTHTy/LdEz+lxruh6Oyq/R1IWuNPMjgIHgc8AXcDhpItzlavHTbmZXQtsAx6R9Fngy0ShHAT4ptwlptd9TDN7GLgGOB94CNhtZkvMzC9au8T0GExJzZJ+A2wAdhFdVvy0pP+WdHEpCnSVqbd9zO8CVwIDgXVmNgm4WdJoYBFRUJ0rut6CeYgofAOBV7onmtnv8VC6BPW2j/lpogOd40RH486VRI9rTDPrAO4qUS3OneSXJF2QPJguSB5MFyQPpguSB9MFyYPpguTBdEHyYLog+VMrKlyoT9LwNaYLkgfTBSnVgVsl3SzpOUk7JP1K0vuSrMeVj7QHbn0GaDCzCcAqonEynUt34FYz+7WZvR6/fYpoRDjn0h24Nc/ngf9NsB5XRpI8XdTjwK2nNJRmAg3Ah8/w+RxgDsDIkSOLVZ8LWJoDtwIg6Wrg60RjY/6t0Ix84NbKk9rArXByDPalRKF8pcA8XIVKe+DWO4HBwEpJ2yStPcPsXIVJdeBWM7s6yd935cuv/LggeTBdkDyYLkgeTBckD6YLkgfTBcmD6YLkwXRB8mC6IHkwXZA8mC5IHkwXJA+mC5IH0wXJg+mC5MF0QfJguiB5MF2QPJguSB5MFyQPpguSB9MFyYPpguTBdEFKe+DWd0laHn++RVImyXpc+Uh74NbPAwfM7BLgR8D3k6rHlZdUB26N3/8yfr0K+JikQsMXugqT5NhFhQZunXymNmZ2XNIhoBboyG2UOz4mcETS7iLVOCz/t/Kp9Ovw/l7T2xpnP+2BW9/W4K5mdi9wbzGKOuXHpVYzayj2fM+F1xRJe+DWk20kVQFDgb8mWJMrE6kO3Bq//1z8ejqwwcwKDoftKktim/J4n7F74NYBwM+7B24FWs1sLfAz4D5Je4jWlDOSqucMir57UAReEyBfQbkQ+ZUfFyQPpguSB9MFyYOZMkn/JOljkgbnTZ+aYk2TJE2MX4+NH0b7yZLW4Ac/IOk/zOwXKfzul4AvEj1uph6YZ2Zr4s+eNrMPplDTAqL+DVXA/xFdrXsCuBp43MwWlaQODyZIesnMSv4sQEk7gSvN7Ejcs2oVcJ+Z/VjSM2b2gZRqqgfeBbwM1JnZYUkDgS3xk5ITl+hzfkIiaceZPgL+sZS15BhgZkcAzGyvpEZgVfzc9rQ6sxw3sxPA65L+YGaH4/rekNRVqiIqJphE4bsGOJA3XcBvS18OAC9LqjezbQDxmvNTwM+B8SnV9KakQfHjui/vnihpKODBTMAjwODuEOSS9ETpywFgFnA8d0L8qMNZkpamUxIf6n7YrJnlBrGaty4fJ873MV2Q/HSRC5IH0wXJg5kASSfix1zvkrRS0qAe2i6UNL+U9ZUDD2Yy3jCzejMbB7wJfCHtgsqNBzN5G4FLACTNkrRD0nZJ9+U3lHSjpK3x56u717SSro/XvtslPRlPe7+k38Vr5h2SRpd0qRLmR+UJkHTEzAbHt4usBh4DngQeAq4ysw5J7zGzv0paCBwxsx9IqjWzznge3wX+YmZ3xVdjpprZnyWdb2YHJd0FPGVm98d3CAwwszdSWeAE+BozGQMlbQNagZeIeup/FFhlZh0AZlbo3qZxkjbGQfx34P3x9E3AMkk3Et0NALAZ+JqkW4D39adQQmWdYC+lN8ysPndCfL98b5unZcC1ZrZd0g1AI4CZfUHSZGAasC2+WvSApC3xtMclzTazDUVejtT4GrN0fgW0SKoFkPSeAm2GAPslVROtMYnbXmxmW8zsm0T3d4+QdBHwgpktIbqprySdK0rF15glEt+Itwj4f0kngGeAG/KafQPYArwI7CQKKsCd8cGNiAK+HbgVmCnpGFEvoG8nvhAl5Ac/Lki+KXdB8mC6IHkwXZA8mC5IHkwXJA+mC5IH0wXJg+mC9Hcm6VEEaJMIwQAAAABJRU5ErkJggg==\n", "text/plain": [""]}, "metadata": {}, "output_type": "display_data"}], "source": ["ax = jointure[['survie']].plot(kind=\"bar\", figsize=(2, 2))\n", "ax.set_title(\"Titanic\")\n", "ax.set_ylabel(\"%\");"]}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [{"data": {"image/png": "\n", "text/plain": [""]}, "metadata": {}, "output_type": "display_data"}], "source": ["import matplotlib.pyplot as plt\n", "fig, ax = plt.subplots(1, 2, figsize=(8,3))\n", "jointure[['survie']].plot(kind=\"bar\", ax=ax[0])\n", "ax[0].set_title(\"Titanic\")\n", "ax[0].set_ylabel(\"%\");\n", "jointure.drop('survie', axis=1).plot(kind=\"bar\", ax=ax[1])\n", "ax[1].set_title(\"Titanic\")\n", "ax[1].set_ylabel(\"%\");"]}, {"cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": ["jointure.to_excel(\"titanic.xlsx\")"]}, {"cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " PassengerId | \n", " Survived | \n", " Pclass | \n", " Name | \n", " Sex | \n", " Age | \n", " SibSp | \n", " Parch | \n", " Ticket | \n", " Fare | \n", " Cabin | \n", " Embarked | \n", "
\n", " \n", " \n", " \n", " 0 | \n", " 1 | \n", " 0 | \n", " 3 | \n", " Braund, Mr. Owen Harris | \n", " male | \n", " 22.0 | \n", " 1 | \n", " 0 | \n", " A/5 21171 | \n", " 7.2500 | \n", " NaN | \n", " S | \n", "
\n", " \n", " 1 | \n", " 2 | \n", " 1 | \n", " 1 | \n", " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n", " female | \n", " 38.0 | \n", " 1 | \n", " 0 | \n", " PC 17599 | \n", " 71.2833 | \n", " C85 | \n", " C | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C "]}, "execution_count": 16, "metadata": {}, "output_type": "execute_result"}], "source": ["df.head(n=2)"]}, {"cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": ["mat = df[['Survived', 'Age']].values"]}, {"cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([[ 0., 1., 1., ..., 0., 1., 0.],\n", " [22., 38., 26., ..., nan, 26., 32.]])"]}, "execution_count": 18, "metadata": {}, "output_type": "execute_result"}], "source": ["mat.T"]}, {"cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([[342., nan],\n", " [ nan, nan]])"]}, "execution_count": 19, "metadata": {}, "output_type": "execute_result"}], "source": ["mat.T @ mat"]}, {"cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([[ 484., 836., 572., ..., nan, 572., 704.],\n", " [ 836., 1445., 989., ..., nan, 989., 1216.],\n", " [ 572., 989., 677., ..., nan, 677., 832.],\n", " ...,\n", " [ nan, nan, nan, ..., nan, nan, nan],\n", " [ 572., 989., 677., ..., nan, 677., 832.],\n", " [ 704., 1216., 832., ..., nan, 832., 1024.]])"]}, "execution_count": 20, "metadata": {}, "output_type": "execute_result"}], "source": ["mat @ mat.T"]}, {"cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " PassengerId | \n", " Survived | \n", " Pclass | \n", " Name | \n", " Sex | \n", " Age | \n", " SibSp | \n", " Parch | \n", " Ticket | \n", " Fare | \n", " Cabin | \n", " Embarked | \n", "
\n", " \n", " \n", " \n", " 889 | \n", " 890 | \n", " 1 | \n", " 1 | \n", " Behr, Mr. Karl Howell | \n", " male | \n", " 26.0 | \n", " 0 | \n", " 0 | \n", " 111369 | \n", " 30.00 | \n", " C148 | \n", " C | \n", "
\n", " \n", " 890 | \n", " 891 | \n", " 0 | \n", " 3 | \n", " Dooley, Mr. Patrick | \n", " male | \n", " 32.0 | \n", " 0 | \n", " 0 | \n", " 370376 | \n", " 7.75 | \n", " NaN | \n", " Q | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" PassengerId Survived Pclass Name Sex Age SibSp \\\n", "889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 \n", "890 891 0 3 Dooley, Mr. Patrick male 32.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "889 0 111369 30.00 C148 C \n", "890 0 370376 7.75 NaN Q "]}, "execution_count": 21, "metadata": {}, "output_type": "execute_result"}], "source": ["df.tail(n=2)"]}, {"cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [{"data": {"text/plain": ["'Braund, Mr. Owen Harris'"]}, "execution_count": 22, "metadata": {}, "output_type": "execute_result"}], "source": ["names = list(df['Name'])\n", "names\n", "nom = names[0]\n", "nom"]}, {"cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [{"data": {"text/plain": ["'mr'"]}, "execution_count": 23, "metadata": {}, "output_type": "execute_result"}], "source": ["nom.split(',')[1].split('.')[0].strip().lower()"]}, {"cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": ["mr = []\n", "for nom in names:\n", " mr.append(nom.split(',')[1].split('.')[0].strip().lower())"]}, {"cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": ["df['mr'] = mr"]}, {"cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": ["gr = df[ ['Sex', \"mr\", \"PassengerId\"] ].groupby(['Sex', \"mr\"], as_index=False).count()"]}, {"cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " Sex | \n", " mr | \n", " PassengerId | \n", "
\n", " \n", " \n", " \n", " 0 | \n", " female | \n", " dr | \n", " 1 | \n", "
\n", " \n", " 1 | \n", " female | \n", " lady | \n", " 1 | \n", "
\n", " \n", " 2 | \n", " female | \n", " miss | \n", " 182 | \n", "
\n", " \n", " 3 | \n", " female | \n", " mlle | \n", " 2 | \n", "
\n", " \n", " 4 | \n", " female | \n", " mme | \n", " 1 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" Sex mr PassengerId\n", "0 female dr 1\n", "1 female lady 1\n", "2 female miss 182\n", "3 female mlle 2\n", "4 female mme 1"]}, "execution_count": 27, "metadata": {}, "output_type": "execute_result"}], "source": ["gr.head()"]}, {"cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " Sex | \n", " female | \n", " male | \n", "
\n", " \n", " mr | \n", " | \n", " | \n", "
\n", " \n", " \n", " \n", " capt | \n", " NaN | \n", " 1.0 | \n", "
\n", " \n", " col | \n", " NaN | \n", " 2.0 | \n", "
\n", " \n", " don | \n", " NaN | \n", " 1.0 | \n", "
\n", " \n", " dr | \n", " 1.0 | \n", " 6.0 | \n", "
\n", " \n", " jonkheer | \n", " NaN | \n", " 1.0 | \n", "
\n", " \n", " lady | \n", " 1.0 | \n", " NaN | \n", "
\n", " \n", " major | \n", " NaN | \n", " 2.0 | \n", "
\n", " \n", " master | \n", " NaN | \n", " 40.0 | \n", "
\n", " \n", " miss | \n", " 182.0 | \n", " NaN | \n", "
\n", " \n", " mlle | \n", " 2.0 | \n", " NaN | \n", "
\n", " \n", " mme | \n", " 1.0 | \n", " NaN | \n", "
\n", " \n", " mr | \n", " NaN | \n", " 517.0 | \n", "
\n", " \n", " mrs | \n", " 125.0 | \n", " NaN | \n", "
\n", " \n", " ms | \n", " 1.0 | \n", " NaN | \n", "
\n", " \n", " rev | \n", " NaN | \n", " 6.0 | \n", "
\n", " \n", " sir | \n", " NaN | \n", " 1.0 | \n", "
\n", " \n", " the countess | \n", " 1.0 | \n", " NaN | \n", "
\n", " \n", "
\n", "
"], "text/plain": ["Sex female male\n", "mr \n", "capt NaN 1.0\n", "col NaN 2.0\n", "don NaN 1.0\n", "dr 1.0 6.0\n", "jonkheer NaN 1.0\n", "lady 1.0 NaN\n", "major NaN 2.0\n", "master NaN 40.0\n", "miss 182.0 NaN\n", "mlle 2.0 NaN\n", "mme 1.0 NaN\n", "mr NaN 517.0\n", "mrs 125.0 NaN\n", "ms 1.0 NaN\n", "rev NaN 6.0\n", "sir NaN 1.0\n", "the countess 1.0 NaN"]}, "execution_count": 28, "metadata": {}, "output_type": "execute_result"}], "source": ["gr.pivot(\"mr\", \"Sex\", \"PassengerId\")"]}, {"cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": []}, {"cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0"}}, "nbformat": 4, "nbformat_minor": 2}