Coverage for mlinsights/mlmodel/interval_regressor.py: 100%
47 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Implements a piecewise linear regression.
4"""
5import numpy
6import numpy.random
7from sklearn.base import RegressorMixin, clone, BaseEstimator
8from sklearn.utils._joblib import Parallel, delayed
9try:
10 from tqdm import tqdm
11except ImportError: # pragma: no cover
12 pass
15class IntervalRegressor(BaseEstimator, RegressorMixin):
16 """
17 Trains multiple regressors to provide a confidence
18 interval on prediction. It only works for
19 single regression. Every training is made with a new
20 sample of the training data, parameter *alpha*
21 let the user choose the size of this sample.
22 A smaller *alpha* increases the variance
23 of the predictions. The current implementation
24 draws sample by random but keeps the weight associated
25 to each of them. Another way could be to draw
26 a weighted sample but give them uniform weights.
27 """
29 def __init__(self, estimator=None, n_estimators=10, n_jobs=None,
30 alpha=1., verbose=False):
31 """
32 @param estimator predictor trained on every bucket
33 @param n_estimators number of estimators to train
34 @param n_jobs number of parallel jobs (for training and predicting)
35 @param alpha proportion of samples resampled for each training
36 @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm`
37 to fit the estimators
38 """
39 BaseEstimator.__init__(self)
40 RegressorMixin.__init__(self)
41 if estimator is None:
42 raise ValueError("estimator cannot be null.") # pragma: no cover
43 self.estimator = estimator
44 self.n_jobs = n_jobs
45 self.alpha = alpha
46 self.verbose = verbose
47 self.n_estimators = n_estimators
49 @property
50 def n_estimators_(self):
51 """
52 Returns the number of estimators = the number of buckets
53 the data was split in.
54 """
55 return len(self.estimators_)
57 def fit(self, X, y, sample_weight=None):
58 """
59 Trains the binner and an estimator on every
60 bucket.
62 :param X: features, *X* is converted into an array if *X* is a dataframe
63 :param y: target
64 :param sample_weight: sample weights
65 :return: self: returns an instance of self.
67 Fitted attributes:
69 * `binner_`: binner
70 * `estimators_`: dictionary of estimators, each of them
71 mapped to a leave to the tree
72 * `mean_estimator_`: estimator trained on the whole
73 datasets in case the binner can find a bucket for
74 a new observation
75 * `dim_`: dimension of the output
76 * `mean_`: average targets
77 """
78 self.estimators_ = []
79 estimators = [clone(self.estimator) for i in range(self.n_estimators)]
81 loop = tqdm(range(len(estimators))
82 ) if self.verbose == 'tqdm' else range(len(estimators))
83 verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0)
85 def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha):
86 new_size = int(X.shape[0] * alpha + 0.5)
87 rnd = numpy.random.randint(0, X.shape[0] - 1, new_size)
88 Xr = X[rnd]
89 yr = y[rnd]
90 sr = sample_weight[rnd] if sample_weight else None
91 return est.fit(Xr, yr, sr)
93 self.estimators_ = \
94 Parallel(n_jobs=self.n_jobs, verbose=verbose,
95 prefer='threads')(
96 delayed(_fit_piecewise_estimator)(
97 i, estimators[i], X, y, sample_weight, self.alpha)
98 for i in loop)
100 return self
102 def predict_all(self, X):
103 """
104 Computes the predictions for all estimators.
106 :param X: features, *X* is converted into an array if *X* is a dataframe
107 :return: predictions
108 """
109 container = numpy.empty((X.shape[0], len(self.estimators_)))
110 for i, est in enumerate(self.estimators_):
111 pred = est.predict(X)
112 container[:, i] = pred
113 return container
115 def predict(self, X):
116 """
117 Computes the average predictions.
119 :param X: features, *X* is converted into an array if *X* is a dataframe
120 :return: predictions
121 """
122 preds = self.predict_all(X)
123 return preds.mean(axis=1)
125 def predict_sorted(self, X):
126 """
127 Computes the predictions for all estimators.
128 Sorts them for all observations.
130 :param X: features, *X* is converted into an array if *X* is a dataframe
131 :return: predictions sorted for each observation
132 """
133 preds = self.predict_all(X)
134 for i in range(preds.shape[0]):
135 preds[i, :] = numpy.sort(preds[i, :])
136 return preds