Coverage for mlinsights/mlmodel/extended_features.py: 100%
85 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Implements new features such as polynomial features.
4"""
5import numpy
6from scipy import sparse
7from sklearn.base import BaseEstimator, TransformerMixin
8from sklearn.utils import check_array
9from ._extended_features_polynomial import _transform_iall, _transform_ionly, _combinations_poly
12class ExtendedFeatures(BaseEstimator, TransformerMixin):
13 """
14 Generates extended features such as polynomial features.
16 :param kind: string
17 ``'poly'`` for polynomial features,
18 ``'poly-slow'`` for polynomial features in *scikit-learn 0.20.2*
19 :param poly_degree: integer
20 The degree of the polynomial features. Default = 2.
21 :param poly_interaction_only: boolean
22 If true, only interaction features are produced: features that
23 are products of at most degree distinct input features
24 (so not ``x[1] ** 2, x[0] * x[2] ** 3``, etc.).
25 :param poly_include_bias: boolean
26 If True (default), then include a bias column, the feature in
27 which all polynomial powers are zero (i.e. a column of ones -
28 acts as an intercept term in a linear model).
30 Fitted attributes:
32 * `n_input_features_`: int
33 The total number of input features.
34 * `n_output_features_`: int
35 The total number of polynomial output features. The number of output
36 features is computed by iterating over all suitably sized combinations
37 of input features.
38 """
40 def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False,
41 poly_include_bias=True):
42 BaseEstimator.__init__(self)
43 TransformerMixin.__init__(self)
44 self.kind = kind
45 self.poly_degree = poly_degree
46 self.poly_include_bias = poly_include_bias
47 self.poly_interaction_only = poly_interaction_only
49 def get_feature_names(self, input_features=None):
50 """
51 Returns feature names for output features.
53 :param input_features: list of string, length n_features, optional
54 String names for input features if available. By default,
55 "x0", "x1", ... "xn_features" is used.
56 :return: output_feature_names : list of string, length n_output_features
57 """
58 if self.kind == 'poly':
59 return self._get_feature_names_poly(input_features)
60 if self.kind == 'poly-slow':
61 return self._get_feature_names_poly(input_features)
62 raise ValueError( # pragma: no cover
63 f"Unknown extended features '{self.kind}'.")
65 def _get_feature_names_poly(self, input_features=None):
66 """
67 Returns feature names for output features for
68 the polynomial features.
69 """
70 if input_features is None:
71 input_features = ["x%d" %
72 i for i in range(0, self.n_input_features_)]
73 elif len(input_features) != self.n_input_features_:
74 raise ValueError( # pragma: no cover
75 f"input_features should contain {self.n_input_features_} strings.")
77 names = ["1"] if self.poly_include_bias else []
78 n = self.n_input_features_
79 interaction_only = self.poly_interaction_only
80 for d in range(0, self.poly_degree):
81 if d == 0:
82 pos = len(names)
83 names.extend(input_features)
84 index = list(range(pos, len(names)))
85 index.append(len(names))
86 else:
87 new_index = []
88 end = index[-1]
89 for i in range(0, n):
90 a = index[i]
91 new_index.append(len(names))
92 start = a + (index[i + 1] - index[i]
93 if interaction_only else 0)
94 names.extend([a + " " + input_features[i]
95 for a in names[start:end]])
96 new_index.append(len(names))
97 index = new_index
99 def process_name(col):
100 scol = col.split()
101 res = []
102 for c in sorted(scol):
103 if len(res) == 0 or res[-1][0] != c:
104 res.append((c, 1))
105 else:
106 res[-1] = (c, res[-1][1] + 1)
107 return " ".join(["%s^%d" % r if r[1] > 1 else r[0] for r in res])
109 names = [process_name(s) for s in names]
110 return names
112 def fit(self, X, y=None):
113 """
114 Compute number of output features.
116 :param X: array-like, shape (n_samples, n_features)
117 The data.
118 :return: self : instance
119 """
120 self.n_input_features_ = X.shape[1]
121 self.n_output_features_ = len(self.get_feature_names())
123 if self.kind == 'poly':
124 return self._fit_poly(X, y)
125 elif self.kind == 'poly-slow':
126 return self._fit_poly(X, y)
127 raise ValueError( # pragma: no cover
128 f"Unknown extended features '{self.kind}'.")
130 def _fit_poly(self, X, y=None):
131 """
132 Fitting method for the polynomial features.
133 """
134 check_array(X, accept_sparse=False)
135 return self
137 def transform(self, X):
138 """
139 Transforms data to extended features.
141 :param X: array-like, shape [n_samples, n_features]
142 The data to transform, row by row.
143 rns
144 :param XP: numpy.ndarray, shape [n_samples, NP]
145 The matrix of features, where NP is the number of polynomial
146 features generated from the combination of inputs.
147 """
148 n_features = X.shape[1]
149 if n_features != self.n_input_features_:
150 raise ValueError( # pragma: no cover
151 "X shape does not match training shape")
152 if self.kind == 'poly':
153 return self._transform_poly(X)
154 if self.kind == 'poly-slow':
155 return self._transform_poly_slow(X)
156 raise ValueError( # pragma: no cover
157 f"Unknown extended features '{self.kind}'.")
159 def _transform_poly(self, X):
160 """
161 Transforms data to polynomial features.
162 """
163 if sparse.isspmatrix(X):
164 raise NotImplementedError( # pragma: no cover
165 "Not implemented for sparse matrices.")
167 XP = numpy.empty(
168 (X.shape[0], self.n_output_features_), dtype=X.dtype)
170 def multiply(A, B, C):
171 return numpy.multiply(A, B, out=C)
173 def final(X):
174 return X
176 if self.poly_interaction_only:
177 return _transform_ionly(self.poly_degree, self.poly_include_bias,
178 XP, X, multiply, final)
179 return _transform_iall(self.poly_degree, self.poly_include_bias,
180 XP, X, multiply, final)
182 def _transform_poly_slow(self, X):
183 """
184 Transforms data to polynomial features.
185 """
186 if sparse.isspmatrix(X):
187 raise NotImplementedError( # pragma: no cover
188 "Not implemented for sparse matrices.")
190 comb = _combinations_poly(X.shape[1], self.poly_degree, self.poly_interaction_only,
191 include_bias=self.poly_include_bias)
192 order = 'C' # how to get order from X.
193 XP = numpy.empty((X.shape[0], self.n_output_features_),
194 dtype=X.dtype, order=order)
195 for i, comb in enumerate(comb):
196 XP[:, i] = X[:, comb].prod(1)
197 return XP