Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Implements a kind of piecewise linear regression by modifying
5the criterion used by the algorithm which builds a decision tree.
6"""
7import numpy
8from sklearn.tree import DecisionTreeRegressor
11class PiecewiseTreeRegressor(DecisionTreeRegressor):
12 """
13 Implements a kind of piecewise linear regression by modifying
14 the criterion used by the algorithm which builds a decision tree.
15 See :epkg:`sklearn:tree:DecisionTreeRegressor` to get the meaning
16 of the parameters except criterion:
18 * ``mselin``: optimizes for a piecewise linear regression
19 * ``simple``: optimizes for a stepwise regression (equivalent to *mse*)
20 """
22 def __init__(self, criterion='mselin', splitter='best', max_depth=None,
23 min_samples_split=2, min_samples_leaf=1,
24 min_weight_fraction_leaf=0.0, max_features=None,
25 random_state=None, max_leaf_nodes=None,
26 min_impurity_decrease=0.0):
27 DecisionTreeRegressor.__init__(
28 self, criterion=criterion,
29 splitter=splitter, max_depth=max_depth,
30 min_samples_split=min_samples_split,
31 min_samples_leaf=min_samples_leaf,
32 min_weight_fraction_leaf=min_weight_fraction_leaf,
33 max_features=max_features, random_state=random_state,
34 max_leaf_nodes=max_leaf_nodes,
35 min_impurity_decrease=min_impurity_decrease)
37 def fit(self, X, y, sample_weight=None, check_input=True,
38 X_idx_sorted=None):
39 """
40 Replaces the string stored in criterion by an instance of a class.
41 """
42 replace = None
43 if isinstance(self.criterion, str):
44 if self.criterion == 'mselin':
45 from .piecewise_tree_regression_criterion_linear import ( # pylint: disable=E0611,C0415
46 LinearRegressorCriterion)
47 replace = self.criterion
48 self.criterion = LinearRegressorCriterion(X)
49 elif self.criterion == "simple":
50 from .piecewise_tree_regression_criterion_fast import ( # pylint: disable=E0611,C0415
51 SimpleRegressorCriterionFast)
52 replace = self.criterion
53 self.criterion = SimpleRegressorCriterionFast(X)
54 else:
55 replace = None
57 DecisionTreeRegressor.fit(self, X, y, sample_weight=sample_weight, check_input=check_input,
58 X_idx_sorted=X_idx_sorted)
60 if replace:
61 self.criterion = replace
63 if self.criterion == "mselin":
64 self._fit_reglin(X, y, sample_weight)
65 return self
67 def _mapping_train(self, X):
68 tree = self.tree_
69 leaves = [i for i in range(len(tree.children_left))
70 if tree.children_left[i] <= i and tree.children_right[i] <= i] # pylint: disable=E1136
71 dec_path = self.decision_path(X)
72 association = numpy.zeros((X.shape[0],))
73 association[:] = -1
74 mapping = {}
75 ntree = 0
76 for j in leaves:
77 ind = dec_path[:, j] == 1
78 ind = numpy.asarray(ind.todense()).flatten()
79 if not numpy.any(ind):
80 # No training example for this bucket.
81 continue
82 mapping[j] = ntree
83 association[ind] = ntree
84 ntree += 1
85 return mapping
87 def predict_leaves(self, X):
88 """
89 Returns the leave index for each observation of *X*.
91 :param X: array
92 :return: array
93 leaves index in ``self.leaves_index_``
94 """
95 # The creation of the sparse matrix could be avoided.
96 leaves = self.decision_path(X)
97 leaves = leaves[:, self.leaves_index_]
98 mat = numpy.argmax(leaves, 1)
99 res = numpy.asarray(mat).ravel()
100 return res
102 def _fit_reglin(self, X, y, sample_weight):
103 """
104 Fits linear regressions for all leaves.
105 Sets attributes ``leaves_mapping_``, ``betas_``, ``leaves_index_``.
106 The first attribute is a dictionary ``{leave: row}``
107 which maps a leave of the tree to the coefficients
108 ``betas_[row, :]`` of a regression trained on all training
109 points mapped a specific leave. ``leaves_index_`` keeps
110 in memory a set of leaves.
111 """
112 from .piecewise_tree_regression_criterion_linear import ( # pylint: disable=E0611,C0415
113 LinearRegressorCriterion)
115 tree = self.tree_
116 self.leaves_index_ = [i for i in range(len(tree.children_left))
117 if tree.children_left[i] <= i and tree.children_right[i] <= i] # pylint: disable=E1136
118 if tree.n_leaves != len(self.leaves_index_):
119 raise RuntimeError( # pragma: no cover
120 "Unexpected number of leaves {} != {}".format(
121 tree.n_leaves, len(self.leaves_index_)))
122 pred_leaves = self.predict_leaves(X)
123 self.leaves_mapping_ = {k: i for i, k in enumerate(pred_leaves)}
124 self.betas_ = numpy.empty((len(self.leaves_index_), X.shape[1] + 1))
125 for i, _ in enumerate(self.leaves_index_):
126 ind = pred_leaves == i
127 xs = X[ind, :].copy()
128 ys = y[ind].astype(numpy.float64)
129 if len(ys.shape) == 1:
130 ys = ys[:, numpy.newaxis]
131 ys = ys.copy()
132 ws = sample_weight[ind].copy() if sample_weight else None
133 dec = LinearRegressorCriterion.create(xs, ys, ws)
134 dec.node_beta(self.betas_[i, :])
136 def predict(self, X, check_input=True):
137 """
138 Overloads method *predict*. Falls back into
139 the predict from a decision tree is criterion is
140 *mse*, *mae*, *simple*. Computes the predictions
141 from linear regression if the criterion is *mselin*.
142 """
143 if self.criterion == 'mselin':
144 return self._predict_reglin(X, check_input=check_input)
145 return DecisionTreeRegressor.predict(self, X, check_input=check_input)
147 def _predict_reglin(self, X, check_input=True):
148 """
149 Computes the predictions with a linear regression
150 fitted with the observations mapped to each leave
151 of the tree.
153 :param X: array-like or sparse matrix of shape = [n_samples, n_features]
154 The input samples. Internally, it will be converted to
155 ``dtype=np.float32`` and if a sparse matrix is provided
156 to a sparse ``csr_matrix``.
157 :param check_input: boolean, (default=True)
158 Allow to bypass several input checking.
159 Don't use this parameter unless you know what you do.
160 :return: y, array of shape = [n_samples] or [n_samples, n_outputs]
161 The predicted classes, or the predict values.
162 """
163 leaves = self.predict_leaves(X)
164 pred = numpy.ones((X.shape[0], 1))
165 Xone = numpy.hstack([X, pred])
166 for i in range(0, X.shape[0]):
167 li = leaves[i]
168 pred[i] = numpy.dot(Xone[i, :], self.betas_[li, :])
169 return pred.ravel()