Coverage for src/papierstat/datasets/titanic.py: 100%
14 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-30 06:49 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-30 06:49 +0200
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Jeux de données reliés aux vins.
5"""
6import os
7import pandas
8from .data_helper import get_data_folder
11__all__ = ['load_titanic_dataset']
14def load_titanic_dataset(download=False, subset="A"):
15 """
16 Retourne le jeu de données Titanic,
17 Data obtained from `biostat.mc.vanderbilt.edu/DataSets
18 <http://biostat.mc.vanderbilt.edu/DataSets>`_.
19 Notebooks associés à ce jeu de données :
21 .. runpython::
22 :rst:
24 from papierstat.datasets.documentation import list_notebooks_rst_links
25 links = list_notebooks_rst_links('encours', contains='titanic')
26 links = [' * %s' % s for s in links]
27 print('\\n'.join(links))
29 @param download télécharge le jeu de données ou considères une copie en local.
30 @param subset ``"A"`` ou ``"B"``
31 @return :epkg:`pandas:DataFrame`
32 """
33 urls = dict(
34 A='https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic3.csv',
35 B='https://biostat.app.vumc.org/wiki/pub/Main/DataSets/titanic.txt'
36 )
37 url = urls[subset]
38 if download:
39 df = pandas.read_csv(url)
40 else:
41 fold = get_data_folder()
42 data = os.path.join(fold, url.split('/')[-1])
43 df = pandas.read_csv(data)
44 return df