Coverage for mlinsights/timeseries/base.py: 92%

53 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-08-09 08:45 +0200

1""" 

2@file 

3@brief Base class for timeseries. 

4""" 

5from sklearn.base import BaseEstimator, RegressorMixin, clone 

6from ..mlmodel.sklearn_transform_inv import BaseReciprocalTransformer 

7from .metrics import ts_mape 

8from .utils import check_ts_X_y, build_ts_X_y 

9 

10 

11class BaseReciprocalTimeSeriesTransformer(BaseReciprocalTransformer): 

12 """ 

13 Base for all timeseries preprocessing 

14 automatically applied within a predictor. 

15 """ 

16 

17 def __init__(self, context_length=0): 

18 """ 

19 @param context_length number of previous observations to 

20 build or rebuild the observations 

21 """ 

22 BaseReciprocalTransformer.__init__(self) 

23 self.context_length = context_length 

24 

25 def fit(self, X, y, sample_weight=None): 

26 """ 

27 Stores the first values. 

28 """ 

29 raise NotImplementedError("Should be overwritten.") # pragma: no cover 

30 

31 def transform(self, X, y, sample_weight=None, context=None): 

32 """ 

33 Transforms both *X* and *y*. 

34 Returns *X* and *y*, returns 

35 *sample_weight* as well if not None. 

36 The context is used when the *y* series stored 

37 in the predictor is not related to the *y* series 

38 given to the *transform* method. 

39 """ 

40 raise NotImplementedError("Should be overwritten.") # pragma: no cover 

41 

42 def get_fct_inv(self): 

43 """ 

44 Returns the reverse tranform. 

45 """ 

46 raise NotImplementedError("Should be overwritten.") # pragma: no cover 

47 

48 

49class BaseTimeSeries(BaseEstimator): 

50 """ 

51 Base class to build a predictor on timeseries. 

52 The class computes one or several predictions at each time, 

53 between *delay1* and *delay2*. It computes: 

54 :math:`\\hat{Y_{t+d} = f(Y_{t-1}, ..., Y_{t-p})}` 

55 with *d* in *[delay1, delay2[* and 

56 :math:`1 \\leqslant p \\leqslant past`. 

57 """ 

58 

59 def __init__(self, past=1, delay1=1, delay2=2, 

60 use_all_past=False, preprocessing=None): 

61 """ 

62 @param past values to use to predict 

63 @param delay1 the model computes the first prediction for 

64 *time=t + delay1* 

65 @param delay2 the model computes the last prediction for 

66 *time=t + delay2* excluded 

67 @param use_all_past use all past features, not only the timeseries 

68 @param preprocessing preprocessing to apply before predicting, 

69 only the timeseries itselves, it can be 

70 a difference, it must be of type 

71 @see cl BaseReciprocalTimeSeriesTransformer 

72 """ 

73 self.past = past 

74 self.delay1 = delay1 

75 self.delay2 = delay2 

76 self.use_all_past = use_all_past 

77 self.preprocessing = preprocessing 

78 if self.delay1 < 1: 

79 raise ValueError("delay1 must be >= 1") # pragma: no cover 

80 if self.delay2 <= self.delay1: 

81 raise ValueError("delay2 must be >= 1") # pragma: no cover 

82 if self.past < 0: 

83 raise ValueError("past must be > 0") # pragma: no cover 

84 if (preprocessing is not None and 

85 not isinstance(preprocessing, BaseReciprocalTimeSeriesTransformer)): 

86 raise TypeError( # pragma: no cover 

87 f"preprocessing must be of type " 

88 f"'BaseReciprocalTimeSeriesTransformer' " 

89 f"not {type(preprocessing)}.") 

90 

91 def _fit_preprocessing(self, X, y, sample_weight=None): 

92 """ 

93 Applies the preprocessing. 

94 *X*, *y*, *sample_weight*. 

95 

96 :param X: output of 

97 X may be empty (None) 

98 :param y: timeseries (one single vector), array [n_obs] 

99 :param sample_weight: weights None or array [n_obs] 

100 :return: *X*, *y*, *sample_weight* 

101 """ 

102 check_ts_X_y(self, X, y) 

103 

104 if self.preprocessing is not None: 

105 self.preprocessing_ = clone(self.preprocessing) 

106 self.preprocessing_.fit(X, y, sample_weight) 

107 xyw = self.preprocessing_.transform(X, y, sample_weight) 

108 X, y = xyw[:2] 

109 sample_weight = xyw[-1] if sample_weight is not None else None 

110 return X, y, sample_weight 

111 

112 def _base_fit_predict(self, X, y, sample_weight=None): 

113 """ 

114 Trains the preprocessing and returns the modified 

115 *X*, *y*, *sample_weight*. 

116 

117 :param X: output of 

118 X may be empty (None) 

119 :param y: timeseries (one single vector), array [n_obs] 

120 :param sample_weight: weights None or array [n_obs] 

121 :return: *X*, *y*, *sample_weight* 

122 

123 The *y* series is moved by *self.delay1* in the past. 

124 """ 

125 if y is None: 

126 raise RuntimeError("y cannot be None") # pragma: no cover 

127 X, y, sample_weight = build_ts_X_y( 

128 self, X, y, sample_weight, same_rows=True) 

129 X, y, sample_weight = self._fit_preprocessing(X, y, sample_weight) 

130 return X, y, sample_weight 

131 

132 def has_preprocessing(self): 

133 """ 

134 Tells if there is one preprocessing. 

135 """ 

136 return hasattr(self, 'preprocessing_') and self.preprocessing_ is not None 

137 

138 def _applies_preprocessing(self, X, y, sample_weight): 

139 """ 

140 Applies the preprocessing to the series. 

141 """ 

142 if self.has_preprocessing(): 

143 xyw = self.preprocessing_.transform(X, y, sample_weight) 

144 X, y = xyw[:2] 

145 sample_weight = xyw[-1] if sample_weight is not None else None 

146 return X, y, sample_weight 

147 

148 def _applies_preprocessing_inv(self, X, y, sample_weight): 

149 """ 

150 Applies the preprocessing to the series. 

151 """ 

152 if self.has_preprocessing(): 

153 inv = self.preprocessing_.get_fct_inv() 

154 X, y, sample_weight = inv.transform(X, y, sample_weight) 

155 

156 return X, y, sample_weight 

157 

158 

159class TimeSeriesRegressorMixin(RegressorMixin): 

160 """ 

161 Addition to :epkg:`sklearn:base:RegressorMixin`. 

162 """ 

163 

164 def score(self, X, y, sample_weight=None): 

165 """ 

166 Scores the prediction using 

167 @see fn ts_mape 

168 

169 :param X: features 

170 :param y: expected values 

171 :param sample_weight: sample weight 

172 :return: see @see fn ts_mape 

173 """ 

174 pred = self.predict(X, y) 

175 return ts_mape(y, pred, sample_weight=sample_weight)