Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Timeseries data manipulations. 

4""" 

5import numpy 

6from sklearn import get_config 

7 

8 

9def build_ts_X_y(model, X, y, weights=None, same_rows=False): 

10 """ 

11 Builds standard *X, y* based in the given one. 

12 

13 @param model a timeseries model (@see cl BaseTimeSeries) 

14 @param X times series, used as features, [n_obs, n_features], 

15 X may be empty (None) 

16 @param y timeseries (one single vector), [n_obs] 

17 @param weights weights None or array [n_obs] 

18 @param same_rows keep the same number of rows 

19 as the original datasets, use nan when no value is 

20 available 

21 @return *(X, y, weights)*: X is array of features [nrows, n_features + past] 

22 where `nrows = n_obs + model.delay2 - model.past + 2`, 

23 y is an array of targets [nrows], 

24 weights is None or array [nrows] 

25 

26 .. runpython:: 

27 :showcode: 

28 

29 import numpy 

30 from mlinsights.timeseries import build_ts_X_y 

31 from mlinsights.timeseries.base import BaseTimeSeries 

32 

33 X = numpy.arange(10).reshape(5, 2) 

34 y = numpy.arange(5) * 100 

35 weights = numpy.arange(5) * 1000 

36 bs = BaseTimeSeries(past=2) 

37 nx, ny, nw = build_ts_X_y(bs, X, y, weights) 

38 print('X=', X) 

39 print('y=', y) 

40 print('nx=', nx) 

41 print('ny=', ny) 

42 

43 With ``use_all_past=True``: 

44 

45 .. runpython:: 

46 :showcode: 

47 

48 import numpy 

49 from mlinsights.timeseries.base import BaseTimeSeries 

50 from mlinsights.timeseries import build_ts_X_y 

51 

52 X = numpy.arange(10).reshape(5, 2) 

53 y = numpy.arange(5) * 100 

54 weights = numpy.arange(5) * 1000 

55 bs = BaseTimeSeries(past=2, use_all_past=True) 

56 nx, ny, nw = build_ts_X_y(bs, X, y, weights) 

57 print('X=', X) 

58 print('y=', y) 

59 print('nx=', nx) 

60 print('ny=', ny) 

61 """ 

62 if not hasattr(model, "use_all_past") or not hasattr(model, "past"): 

63 raise TypeError( # pragma: no cover 

64 "model must be of type BaseTimeSeries not {}".format(type(model))) 

65 if same_rows: 

66 if model.use_all_past: 

67 ncol = X.shape[1] if X is not None else 0 

68 nrow = y.shape[0] - model.delay2 - model.past + 2 

69 

70 new_X = numpy.full( 

71 (y.shape[0], ncol * model.past + model.past), numpy.nan, dtype=y.dtype) 

72 first = y.shape[0] - nrow 

73 if X is not None: 

74 for i in range(0, model.past): 

75 begin = i * ncol 

76 end = begin + ncol 

77 new_X[i:, begin:end] = X[i:] 

78 for i in range(0, model.past): 

79 end = y.shape[0] + i + model.delay1 - 1 - model.delay2 

80 new_X[first - i:first - i + end - i, 

81 i + ncol * model.past] = y[i: end] 

82 

83 new_y = numpy.full( 

84 (y.shape[0], model.delay2 - model.delay1), numpy.nan, dtype=y.dtype) 

85 for i in range(model.delay1, model.delay2): 

86 new_y[first:, i - model.delay1] = y[i + 1:i + nrow + 1] 

87 

88 new_weights = weights 

89 else: 

90 ncol = X.shape[1] if X is not None else 0 

91 nrow = y.shape[0] - model.delay2 - model.past + 2 

92 first = y.shape[0] - nrow 

93 

94 new_X = numpy.full( 

95 (y.shape[0], ncol + model.past), numpy.nan, dtype=y.dtype) 

96 if X is not None: 

97 new_X[first:, :X.shape[1]] = ( 

98 X[model.past - 1: X.shape[0] - model.delay2 + 1]) 

99 for i in range(model.past): 

100 end = y.shape[0] + i + model.delay1 - \ 

101 1 - model.delay2 - model.past + 2 

102 new_X[first:, i + ncol] = y[i: end] 

103 

104 new_y = numpy.full( 

105 (y.shape[0], model.delay2 - model.delay1), numpy.nan, dtype=y.dtype) 

106 for i in range(model.delay1, model.delay2): 

107 dec = model.past - 1 

108 new_y[first:, i - model.delay1] = y[i + dec:i + nrow + dec] 

109 new_weights = weights 

110 else: 

111 if model.use_all_past: 

112 ncol = X.shape[1] if X is not None else 0 

113 nrow = y.shape[0] - model.delay2 - model.past + 2 

114 

115 new_X = numpy.empty( 

116 (nrow, ncol * model.past + model.past), dtype=y.dtype) 

117 if X is not None: 

118 for i in range(0, model.past): 

119 begin = i * ncol 

120 end = begin + ncol 

121 new_X[:, begin:end] = X[i: i + nrow] 

122 for i in range(0, model.past): 

123 end = y.shape[0] + i + model.delay1 - 1 - model.delay2 

124 new_X[:, i + ncol * model.past] = y[i: end] 

125 

126 new_y = numpy.empty( 

127 (nrow, model.delay2 - model.delay1), dtype=y.dtype) 

128 for i in range(model.delay1, model.delay2): 

129 new_y[:, i - model.delay1] = y[i + 1:i + nrow + 1] 

130 

131 new_weights = (None if weights is None 

132 else weights[model.past - 1:model.past - 1 + nrow]) 

133 else: 

134 ncol = X.shape[1] if X is not None else 0 

135 nrow = y.shape[0] - model.delay2 - model.past + 2 

136 

137 new_X = numpy.empty((nrow, ncol + model.past), dtype=y.dtype) 

138 if X is not None: 

139 new_X[:, :X.shape[1]] = X[model.past - 

140 1: X.shape[0] - model.delay2 + 1] 

141 for i in range(model.past): 

142 end = y.shape[0] + i + model.delay1 - \ 

143 1 - model.delay2 - model.past + 2 

144 new_X[:, i + ncol] = y[i: end] 

145 

146 new_y = numpy.empty( 

147 (nrow, model.delay2 - model.delay1), dtype=y.dtype) 

148 for i in range(model.delay1, model.delay2): 

149 dec = model.past - 1 

150 new_y[:, i - model.delay1] = y[i + dec:i + nrow + dec] 

151 new_weights = (None if weights is None 

152 else weights[model.past - 1:model.past - 1 + nrow]) 

153 return new_X, new_y, new_weights 

154 

155 

156def check_ts_X_y(model, X, y): 

157 """ 

158 Checks that datasets *(X, y)* was built with function 

159 @see fn build_ts_X_y. 

160 """ 

161 cfg = get_config() 

162 if cfg.get('assume_finite', True): 

163 return # pragma: no cover 

164 if X.dtype not in (numpy.float32, numpy.float64): 

165 raise TypeError( 

166 "Features must be of type float32 and float64 not {}.".format(X.dtype)) 

167 if y is not None and y.dtype not in (numpy.float32, numpy.float64): 

168 raise TypeError( # pragma: no cover 

169 "Features must be of type float32 and float64 not {}.".format(y.dtype)) 

170 cst = model.past 

171 if (hasattr(model, 'preprocessing_') and model.preprocessing_ is not None): 

172 cst += model.preprocessing_.context_length 

173 if y is None: 

174 if cst > 0: 

175 raise AssertionError( # pragma: no cover 

176 "y must be specified to give the model past data to predict, " 

177 "it requires at least {} observations.".format(cst)) 

178 return # pragma: no cover 

179 if y.shape[0] != X.shape[0]: 

180 raise AssertionError( # pragma: no cover 

181 "X and y must have the same number of rows {} != {}.".format( 

182 X.shape[0], y.shape[0])) 

183 if len(y.shape) > 1 and y.shape[1] != 1: 

184 raise AssertionError( # pragma: no cover 

185 "y must be 1-dimensional not has shape {}.".format(y.shape)) 

186 if y.shape[0] < cst: 

187 raise AssertionError( # pragma: no cover 

188 "y is not enough past data to predict, " 

189 "it requires at least {} observations.".format(cst))