Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Loads data from :epkg:`CSSE Johns Hopkins`. 

3""" 

4import numpy 

5import pandas 

6from ..preprocess import ( 

7 ts_normalise_negative_values, ts_moving_average, 

8 ts_remove_decreasing_values) 

9 

10 

11population = { 

12 'Belgium': 11.5e6, 

13 'France': 67e6, 

14 'Germany': 83e6, 

15 'Spain': 47e6, 

16 'Italy': 60e6, 

17 'UK': 67e6, 

18} 

19 

20 

21def download_hopkins_data(kind='deaths', country='France'): 

22 """ 

23 Downloads data from :epkg:`CSSE Johns Hopkins` 

24 for a particular country. 

25 

26 :param kind: `'deaths'`, `'confirmed'` or `'recovered'` 

27 :param country: `'France'`, `'UK'`, ... 

28 :return: dataframe 

29 

30 .. runpython:: 

31 :showcode: 

32 

33 from aftercovid.data import download_hopkins_data 

34 df = download_hopkins_data() 

35 print(df.tail()) 

36 """ 

37 url = ( 

38 "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/" 

39 "master/csse_covid_19_data/" 

40 "csse_covid_19_time_series/time_series_covid19_%s_global.csv" % 

41 kind) 

42 df = pandas.read_csv(url) 

43 eur = df[df['Country/Region'].isin([country]) 

44 & df['Province/State'].isna()] 

45 tf = eur.T.iloc[4:] 

46 tf.columns = [kind] 

47 return tf 

48 

49 

50def extract_hopkins_data(kinds=('deaths', 'confirmed', 'recovered'), 

51 country='France', delay=21, raw=False): 

52 """ 

53 Downloads data from :epkg:`CSSE Johns Hopkins` and infers 

54 the number of current positive cases in a very simple way. 

55 

56 :param kinds: series to extracts, by default 

57 `('deaths', 'confirmed', 'recovered')` 

58 :param country: `'France'`, `'UK'`, ... 

59 :param delay: the function assumes after 21 days, a confirmed 

60 case moves is not positive anymore 

61 :param raw: if True, returns the raw data as well 

62 :return: dataframe 

63 

64 .. runpython:: 

65 :showcode: 

66 

67 from aftercovid.data import extract_hopkins_data 

68 df = extract_hopkins_data() 

69 print(df.tail()) 

70 """ 

71 total = population[country] 

72 dfs = [] 

73 for k in kinds: 

74 df = download_hopkins_data(k, country) 

75 dfs.append(df) 

76 conc0 = pandas.concat(dfs, axis=1) 

77 for c in conc0: 

78 conc0[c] = ts_remove_decreasing_values(conc0[c].astype(numpy.int64)) 

79 conc = conc0.copy() 

80 infected = conc['confirmed'] - (conc['deaths'] + conc['recovered']) 

81 conf30 = infected[:-delay] 

82 recovered = conc['recovered'].values.copy() 

83 recovered[delay:] += conf30 

84 delta_conf = conc['confirmed'].values[1:] - conc['confirmed'].values[:-1] 

85 infected = conc['confirmed'].values * 0 

86 infected[:] = conc['confirmed'] - (conc['deaths'] + recovered) 

87 infected[1:] = numpy.maximum(1, numpy.maximum(infected[1:], delta_conf)) 

88 infected[20:] = numpy.maximum(10, infected[20:]) 

89 infected[60:] = numpy.maximum(100, infected[60:]) 

90 conc['recovered'] = recovered 

91 conc['infected'] = infected 

92 conc['safe'] = total - conc.drop('confirmed', axis=1).sum(axis=1) 

93 if raw: 

94 return conc, conc0 

95 return conc 

96 

97 

98def preprocess_hopkins_data(df): 

99 """ 

100 Improves the differentiated series by removing negative values. 

101 

102 :param df: dataframe returned by :func:`extract_hopkins_data 

103 <aftercovid.data.extract_hopkins_data>` 

104 :return: (smoothed differentiated series, 

105 preprocessed dataframe) 

106 """ 

107 total = df.drop('confirmed', axis=1).sum(axis=1) 

108 total = list(total)[0] 

109 diff = df.diff() 

110 diff['deaths'] = ts_normalise_negative_values(diff['deaths'], extreme=2) 

111 diff['recovered'] = ts_normalise_negative_values( 

112 diff['recovered'], extreme=2) 

113 diff['confirmed'] = ts_normalise_negative_values( 

114 diff['confirmed'], extreme=2) 

115 mov = ts_moving_average(diff, n=7, center=True) 

116 df2 = mov.cumsum() 

117 df2['safe'] = total - df2.drop(['confirmed', 'safe'], axis=1).sum(axis=1) 

118 return mov, df2