{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Graphes en machine learning - correction\n", "\n", "Correction (en cours de r\u00e9daction) des exercices autour des graphes courants en machine learning."]}, {"cell_type": "code", "execution_count": 1, "metadata": {"collapsed": true}, "outputs": [], "source": ["%matplotlib inline\n", "%load_ext pyensae"]}, {"cell_type": "code", "execution_count": 2, "metadata": {"collapsed": true}, "outputs": [], "source": ["import matplotlib.pyplot as plt\n", "plt.style.use('ggplot')"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [{"data": {"text/html": ["
run previous cell, wait for 2 seconds
\n", ""], "text/plain": [""]}, "execution_count": 4, "metadata": {}, "output_type": "execute_result"}], "source": ["from jyquickhelper import add_notebook_menu\n", "add_notebook_menu()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Le module utilise des donn\u00e9es issue de [Wine Quality Data Set](https://archive.ics.uci.edu/ml/datasets/Wine+Quality) pour lequel on essaye de pr\u00e9dire la qualit\u00e9 du vin en fonction de ses caract\u00e9ristiques chimiques."]}, {"cell_type": "code", "execution_count": 4, "metadata": {"collapsed": true}, "outputs": [], "source": ["from pyensae.datasource import download_data, DownloadDataException\n", "uci = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/\"\n", "try:\n", " download_data(\"winequality-red.csv\", url=uci)\n", " download_data(\"winequality-white.csv\", url=uci)\n", "except DownloadDataException:\n", " print(\"backup\")\n", " download_data(\"winequality-red.csv\", website=\"xd\")\n", " download_data(\"winequality-white.csv\", website=\"xd\")"]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\"fixed acidity\";\"volatile acidity\";\"citric acid\";\"residual sugar\";\"chlorides\";\"free sulfur dioxide\";\"total sulfur dioxide\";\"density\";\"pH\";\"sulphates\";\"alcohol\";\"quality\"\n", "7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5\n", "7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5\n", "7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;9.8;5\n", "11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58;9.8;6\n", "7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5\n", "7.4;0.66;0;1.8;0.075;13;40;0.9978;3.51;0.56;9.4;5\n", "7.9;0.6;0.06;1.6;0.069;15;59;0.9964;3.3;0.46;9.4;5\n", "7.3;0.65;0;1.2;0.065;15;21;0.9946;3.39;0.47;10;7\n", "7.8;0.58;0.02;2;0.073;9;18;0.9968;3.36;0.57;9.5;7\n", "\n", "
"], "text/plain": [""]}, "execution_count": 6, "metadata": {}, "output_type": "execute_result"}], "source": ["%head winequality-red.csv"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholqualityred
07.40.700.001.90.07611.034.00.99783.510.569.451
17.80.880.002.60.09825.067.00.99683.200.689.851
27.80.760.042.30.09215.054.00.99703.260.659.851
311.20.280.561.90.07517.060.00.99803.160.589.861
47.40.700.001.90.07611.034.00.99783.510.569.451
\n", "
"], "text/plain": [" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 7.4 0.70 0.00 1.9 0.076 \n", "1 7.8 0.88 0.00 2.6 0.098 \n", "2 7.8 0.76 0.04 2.3 0.092 \n", "3 11.2 0.28 0.56 1.9 0.075 \n", "4 7.4 0.70 0.00 1.9 0.076 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 11.0 34.0 0.9978 3.51 0.56 \n", "1 25.0 67.0 0.9968 3.20 0.68 \n", "2 15.0 54.0 0.9970 3.26 0.65 \n", "3 17.0 60.0 0.9980 3.16 0.58 \n", "4 11.0 34.0 0.9978 3.51 0.56 \n", "\n", " alcohol quality red \n", "0 9.4 5 1 \n", "1 9.8 5 1 \n", "2 9.8 5 1 \n", "3 9.8 6 1 \n", "4 9.4 5 1 "]}, "execution_count": 7, "metadata": {}, "output_type": "execute_result"}], "source": ["import pandas\n", "red_wine = pandas.read_csv(\"winequality-red.csv\", sep=\";\")\n", "red_wine[\"red\"] = 1\n", "white_wine = pandas.read_csv(\"winequality-white.csv\", sep=\";\")\n", "white_wine[\"red\"] = 0\n", "wines = pandas.concat([red_wine, white_wine])\n", "wines.head()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On d\u00e9coupe en base d'apprentissage, base de test :"]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/plain": ["(pandas.core.frame.DataFrame, pandas.core.series.Series)"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.model_selection import train_test_split\n", "X = wines[[c for c in wines.columns if c != \"quality\"]]\n", "Y = wines[\"quality\"]\n", "x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)\n", "type(x_train), type(y_train)"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["((6497, 13), (4352, 12), (4352,))"]}, "execution_count": 9, "metadata": {}, "output_type": "execute_result"}], "source": ["wines.shape, x_train.shape, y_train.shape"]}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["## Exercice 1 : cr\u00e9er une fonction pour automatiser la cr\u00e9ation de ce graphe"]}, {"cell_type": "code", "execution_count": 9, "metadata": {"collapsed": true}, "outputs": [], "source": []}, {"cell_type": "markdown", "metadata": {"collapsed": true}, "source": ["## Exercice 2 : simplifier l'apprentissage de chaque mod\u00e8le"]}, {"cell_type": "code", "execution_count": 10, "metadata": {"collapsed": true}, "outputs": [], "source": []}, {"cell_type": "markdown", "metadata": {}, "source": ["## Exercice 3 : grid_search\n", "\n", "Consid\u00e9rer un mod\u00e8le et estimer au mieux ses param\u00e8tres."]}, {"cell_type": "code", "execution_count": 11, "metadata": {"collapsed": true}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1"}}, "nbformat": 4, "nbformat_minor": 2}