Coverage for mlinsights/mlmodel/interval

1"""

2@file

3@brief Implements a piecewise linear regression.

4"""

5import numpy

6import numpy.random

7from sklearn.base import RegressorMixin, clone, BaseEstimator

8from sklearn.utils._joblib import Parallel, delayed

9try:

10 from tqdm import tqdm

11except ImportError: # pragma: no cover

12 pass

15class IntervalRegressor(BaseEstimator, RegressorMixin):

16 """

17 Trains multiple regressors to provide a confidence

18 interval on prediction. It only works for

19 single regression. Every training is made with a new

20 sample of the training data, parameter *alpha*

21 let the user choose the size of this sample.

22 A smaller *alpha* increases the variance

23 of the predictions. The current implementation

24 draws sample by random but keeps the weight associated

25 to each of them. Another way could be to draw

26 a weighted sample but give them uniform weights.

27 """

29 def __init__(self, estimator=None, n_estimators=10, n_jobs=None,

30 alpha=1., verbose=False):

31 """

32 @param estimator predictor trained on every bucket

33 @param n_estimators number of estimators to train

34 @param n_jobs number of parallel jobs (for training and predicting)

35 @param alpha proportion of samples resampled for each training

36 @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm`

37 to fit the estimators

38 """

39 BaseEstimator.__init__(self)

40 RegressorMixin.__init__(self)

41 if estimator is None:

42 raise ValueError("estimator cannot be null.") # pragma: no cover

43 self.estimator = estimator

44 self.n_jobs = n_jobs

45 self.alpha = alpha

46 self.verbose = verbose

47 self.n_estimators = n_estimators

49 @property

50 def n_estimators_(self):

51 """

52 Returns the number of estimators = the number of buckets

53 the data was split in.

54 """

55 return len(self.estimators_)

57 def fit(self, X, y, sample_weight=None):

58 """

59 Trains the binner and an estimator on every

60 bucket.

62 :param X: features, *X* is converted into an array if *X* is a dataframe

63 :param y: target

64 :param sample_weight: sample weights

65 :return: self: returns an instance of self.

67 Fitted attributes:

69 * `binner_`: binner

70 * `estimators_`: dictionary of estimators, each of them

71 mapped to a leave to the tree

72 * `mean_estimator_`: estimator trained on the whole

73 datasets in case the binner can find a bucket for

74 a new observation

75 * `dim_`: dimension of the output

76 * `mean_`: average targets

77 """

78 self.estimators_ = []

79 estimators = [clone(self.estimator) for i in range(self.n_estimators)]

81 loop = tqdm(range(len(estimators))

82 ) if self.verbose == 'tqdm' else range(len(estimators))

83 verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0)

85 def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha):

86 new_size = int(X.shape[0] * alpha + 0.5)

87 rnd = numpy.random.randint(0, X.shape[0] - 1, new_size)

88 Xr = X[rnd]

89 yr = y[rnd]

90 sr = sample_weight[rnd] if sample_weight else None

91 return est.fit(Xr, yr, sr)

93 self.estimators_ = \

94 Parallel(n_jobs=self.n_jobs, verbose=verbose,

95 prefer='threads')(

96 delayed(_fit_piecewise_estimator)(

97 i, estimators[i], X, y, sample_weight, self.alpha)

98 for i in loop)

100 return self

101

102 def predict_all(self, X):

103 """

104 Computes the predictions for all estimators.

105

106 :param X: features, *X* is converted into an array if *X* is a dataframe

107 :return: predictions

108 """

109 container = numpy.empty((X.shape[0], len(self.estimators_)))

110 for i, est in enumerate(self.estimators_):

111 pred = est.predict(X)

112 container[:, i] = pred

113 return container

114

115 def predict(self, X):

116 """

117 Computes the average predictions.

118

119 :param X: features, *X* is converted into an array if *X* is a dataframe

120 :return: predictions

121 """

122 preds = self.predict_all(X)

123 return preds.mean(axis=1)

124

125 def predict_sorted(self, X):

126 """

127 Computes the predictions for all estimators.

128 Sorts them for all observations.

129

130 :param X: features, *X* is converted into an array if *X* is a dataframe

131 :return: predictions sorted for each observation

132 """

133 preds = self.predict_all(X)

134 for i in range(preds.shape[0]):

135 preds[i, :] = numpy.sort(preds[i, :])

136 return preds

Coverage for mlinsights/mlmodel/interval_regressor.py: 100%

47 statements