Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Jeux de données reliés aux vins.
5"""
6import os
7import pandas
8from numpy.random import permutation
9from .data_helper import get_data_folder
12__all__ = ['load_wines_dataset']
15def load_wines_dataset(download=False, shuffle=False):
16 """
17 Retourne le jeu de données
18 `wines quality <https://archive.ics.uci.edu/ml/datasets/wine+quality>`_.
19 Notebooks associés à ce jeu de données :
21 .. runpython::
22 :rst:
24 from papierstat.datasets.documentation import list_notebooks_rst_links
25 links = list_notebooks_rst_links('lectures', 'wines')
26 links = [' * %s' % s for s in links]
27 print('\\n'.join(links))
29 @param download télécharge le jeu de données ou considères une copie en local.
30 @param shuffle permute aléatoire les données (elles ne le sont pas)
31 @return :epkg:`pandas:DataFrame`
32 """
33 if download:
34 url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/"
35 red = pandas.read_csv(url + "winequality-red.csv", sep=';')
36 white = pandas.read_csv(url + "winequality-white.csv", sep=';')
37 red['color'] = 'red'
38 white['color'] = 'white'
39 df = pandas.concat([red, white])
40 df.columns = [_.replace(' ', '_') for _ in df.columns]
41 else:
42 fold = get_data_folder()
43 data = os.path.join(fold, 'wines-quality.csv')
44 df = pandas.read_csv(data)
45 if shuffle:
46 df = df.reset_index(drop=True)
47 ind = permutation(df.index)
48 df = df.iloc[ind, :].reset_index(drop=True)
49 return df
52def load_wine_dataset(download=False, shuffle=False):
53 """
54 Retourne le jeu de données
55 `wine quality <https://archive.ics.uci.edu/ml/datasets/wine>`_.
56 Notebooks associés à ce jeu de données :
58 .. runpython::
59 :rst:
61 from papierstat.datasets.documentation import list_notebooks_rst_links
62 links = list_notebooks_rst_links('encours', 'linreg')
63 links = [' * %s' % s for s in links]
64 print('\\n'.join(links))
66 @param download télécharge le jeu de données ou considères une copie en local.
67 @param shuffle permute aléatoire les données (elles ne le sont pas)
68 @return :epkg:`pandas:DataFrame`
69 """
70 if download:
71 url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
72 df = pandas.read_csv(url, header=None)
73 else:
74 fold = get_data_folder()
75 data = os.path.join(fold, 'wine.data.txt')
76 df = pandas.read_csv(data, header=None)
77 s = "index Alcohol Malica_cid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids"
78 s += " Nonflavanoid_phenols Proanthocyanins Color_intensity Hue"
79 s += " OD280_OD315_diluted_wine Proline"
80 df.columns = s.split()
81 if shuffle:
82 df = df.reset_index(drop=True) # pylint: disable=E1101
83 ind = permutation(df.index)
84 df = df.iloc[ind, :].reset_index(drop=True)
85 return df