Coverage for mlinsights/mlmodel/interval_regressor.py: 100%

47 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-28 08:46 +0100

1""" 

2@file 

3@brief Implements a piecewise linear regression. 

4""" 

5import numpy 

6import numpy.random 

7from sklearn.base import RegressorMixin, clone, BaseEstimator 

8from sklearn.utils._joblib import Parallel, delayed 

9try: 

10 from tqdm import tqdm 

11except ImportError: # pragma: no cover 

12 pass 

13 

14 

15class IntervalRegressor(BaseEstimator, RegressorMixin): 

16 """ 

17 Trains multiple regressors to provide a confidence 

18 interval on prediction. It only works for 

19 single regression. Every training is made with a new 

20 sample of the training data, parameter *alpha* 

21 let the user choose the size of this sample. 

22 A smaller *alpha* increases the variance 

23 of the predictions. The current implementation 

24 draws sample by random but keeps the weight associated 

25 to each of them. Another way could be to draw 

26 a weighted sample but give them uniform weights. 

27 """ 

28 

29 def __init__(self, estimator=None, n_estimators=10, n_jobs=None, 

30 alpha=1., verbose=False): 

31 """ 

32 @param estimator predictor trained on every bucket 

33 @param n_estimators number of estimators to train 

34 @param n_jobs number of parallel jobs (for training and predicting) 

35 @param alpha proportion of samples resampled for each training 

36 @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm` 

37 to fit the estimators 

38 """ 

39 BaseEstimator.__init__(self) 

40 RegressorMixin.__init__(self) 

41 if estimator is None: 

42 raise ValueError("estimator cannot be null.") # pragma: no cover 

43 self.estimator = estimator 

44 self.n_jobs = n_jobs 

45 self.alpha = alpha 

46 self.verbose = verbose 

47 self.n_estimators = n_estimators 

48 

49 @property 

50 def n_estimators_(self): 

51 """ 

52 Returns the number of estimators = the number of buckets 

53 the data was split in. 

54 """ 

55 return len(self.estimators_) 

56 

57 def fit(self, X, y, sample_weight=None): 

58 """ 

59 Trains the binner and an estimator on every 

60 bucket. 

61 

62 :param X: features, *X* is converted into an array if *X* is a dataframe 

63 :param y: target 

64 :param sample_weight: sample weights 

65 :return: self: returns an instance of self. 

66 

67 Fitted attributes: 

68 

69 * `binner_`: binner 

70 * `estimators_`: dictionary of estimators, each of them 

71 mapped to a leave to the tree 

72 * `mean_estimator_`: estimator trained on the whole 

73 datasets in case the binner can find a bucket for 

74 a new observation 

75 * `dim_`: dimension of the output 

76 * `mean_`: average targets 

77 """ 

78 self.estimators_ = [] 

79 estimators = [clone(self.estimator) for i in range(self.n_estimators)] 

80 

81 loop = tqdm(range(len(estimators)) 

82 ) if self.verbose == 'tqdm' else range(len(estimators)) 

83 verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0) 

84 

85 def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha): 

86 new_size = int(X.shape[0] * alpha + 0.5) 

87 rnd = numpy.random.randint(0, X.shape[0] - 1, new_size) 

88 Xr = X[rnd] 

89 yr = y[rnd] 

90 sr = sample_weight[rnd] if sample_weight else None 

91 return est.fit(Xr, yr, sr) 

92 

93 self.estimators_ = \ 

94 Parallel(n_jobs=self.n_jobs, verbose=verbose, 

95 prefer='threads')( 

96 delayed(_fit_piecewise_estimator)( 

97 i, estimators[i], X, y, sample_weight, self.alpha) 

98 for i in loop) 

99 

100 return self 

101 

102 def predict_all(self, X): 

103 """ 

104 Computes the predictions for all estimators. 

105 

106 :param X: features, *X* is converted into an array if *X* is a dataframe 

107 :return: predictions 

108 """ 

109 container = numpy.empty((X.shape[0], len(self.estimators_))) 

110 for i, est in enumerate(self.estimators_): 

111 pred = est.predict(X) 

112 container[:, i] = pred 

113 return container 

114 

115 def predict(self, X): 

116 """ 

117 Computes the average predictions. 

118 

119 :param X: features, *X* is converted into an array if *X* is a dataframe 

120 :return: predictions 

121 """ 

122 preds = self.predict_all(X) 

123 return preds.mean(axis=1) 

124 

125 def predict_sorted(self, X): 

126 """ 

127 Computes the predictions for all estimators. 

128 Sorts them for all observations. 

129 

130 :param X: features, *X* is converted into an array if *X* is a dataframe 

131 :return: predictions sorted for each observation 

132 """ 

133 preds = self.predict_all(X) 

134 for i in range(preds.shape[0]): 

135 preds[i, :] = numpy.sort(preds[i, :]) 

136 return preds