Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Implements new features such as polynomial features.
4"""
5import numpy
6from scipy import sparse
7from sklearn.base import BaseEstimator, TransformerMixin
8from sklearn.utils import check_array
9from ._extended_features_polynomial import _transform_iall, _transform_ionly, _combinations_poly
12class ExtendedFeatures(BaseEstimator, TransformerMixin):
13 """
14 Generates extended features such as polynomial features.
16 :param kind: string
17 ``'poly'`` for polynomial features,
18 ``'poly-slow'`` for polynomial features in *scikit-learn 0.20.2*
19 :param poly_degree: integer
20 The degree of the polynomial features. Default = 2.
21 :param poly_interaction_only: boolean
22 If true, only interaction features are produced: features that
23 are products of at most degree distinct input features
24 (so not ``x[1] ** 2, x[0] * x[2] ** 3``, etc.).
25 :param poly_include_bias: boolean
26 If True (default), then include a bias column, the feature in
27 which all polynomial powers are zero (i.e. a column of ones -
28 acts as an intercept term in a linear model).
30 Fitted attributes:
32 * `n_input_features_`: int
33 The total number of input features.
34 * `n_output_features_`: int
35 The total number of polynomial output features. The number of output
36 features is computed by iterating over all suitably sized combinations
37 of input features.
38 """
40 def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False,
41 poly_include_bias=True):
42 BaseEstimator.__init__(self)
43 TransformerMixin.__init__(self)
44 self.kind = kind
45 self.poly_degree = poly_degree
46 self.poly_include_bias = poly_include_bias
47 self.poly_interaction_only = poly_interaction_only
49 def get_feature_names(self, input_features=None):
50 """
51 Returns feature names for output features.
53 :param input_features: list of string, length n_features, optional
54 String names for input features if available. By default,
55 "x0", "x1", ... "xn_features" is used.
56 :return: output_feature_names : list of string, length n_output_features
57 """
58 if self.kind == 'poly':
59 return self._get_feature_names_poly(input_features)
60 if self.kind == 'poly-slow':
61 return self._get_feature_names_poly(input_features)
62 raise ValueError( # pragma: no cover
63 "Unknown extended features '{}'.".format(self.kind))
65 def _get_feature_names_poly(self, input_features=None):
66 """
67 Returns feature names for output features for
68 the polynomial features.
69 """
70 if input_features is None:
71 input_features = ["x%d" %
72 i for i in range(0, self.n_input_features_)]
73 elif len(input_features) != self.n_input_features_:
74 raise ValueError( # pragma: no cover
75 "input_features should contain {} strings.".format(
76 self.n_input_features_))
78 names = ["1"] if self.poly_include_bias else []
79 n = self.n_input_features_
80 interaction_only = self.poly_interaction_only
81 for d in range(0, self.poly_degree):
82 if d == 0:
83 pos = len(names)
84 names.extend(input_features)
85 index = list(range(pos, len(names)))
86 index.append(len(names))
87 else:
88 new_index = []
89 end = index[-1]
90 for i in range(0, n):
91 a = index[i]
92 new_index.append(len(names))
93 start = a + (index[i + 1] - index[i]
94 if interaction_only else 0)
95 names.extend([a + " " + input_features[i]
96 for a in names[start:end]])
97 new_index.append(len(names))
98 index = new_index
100 def process_name(col):
101 scol = col.split()
102 res = []
103 for c in sorted(scol):
104 if len(res) == 0 or res[-1][0] != c:
105 res.append((c, 1))
106 else:
107 res[-1] = (c, res[-1][1] + 1)
108 return " ".join(["%s^%d" % r if r[1] > 1 else r[0] for r in res])
110 names = [process_name(s) for s in names]
111 return names
113 def fit(self, X, y=None):
114 """
115 Compute number of output features.
117 :param X: array-like, shape (n_samples, n_features)
118 The data.
119 :return: self : instance
120 """
121 self.n_input_features_ = X.shape[1]
122 self.n_output_features_ = len(self.get_feature_names())
124 if self.kind == 'poly':
125 return self._fit_poly(X, y)
126 elif self.kind == 'poly-slow':
127 return self._fit_poly(X, y)
128 raise ValueError( # pragma: no cover
129 "Unknown extended features '{}'.".format(self.kind))
131 def _fit_poly(self, X, y=None):
132 """
133 Fitting method for the polynomial features.
134 """
135 check_array(X, accept_sparse=False)
136 return self
138 def transform(self, X):
139 """
140 Transforms data to extended features.
142 :param X: array-like, shape [n_samples, n_features]
143 The data to transform, row by row.
144 rns
145 :param XP: numpy.ndarray, shape [n_samples, NP]
146 The matrix of features, where NP is the number of polynomial
147 features generated from the combination of inputs.
148 """
149 n_features = X.shape[1]
150 if n_features != self.n_input_features_:
151 raise ValueError( # pragma: no cover
152 "X shape does not match training shape")
153 if self.kind == 'poly':
154 return self._transform_poly(X)
155 if self.kind == 'poly-slow':
156 return self._transform_poly_slow(X)
157 raise ValueError( # pragma: no cover
158 "Unknown extended features '{}'.".format(self.kind))
160 def _transform_poly(self, X):
161 """
162 Transforms data to polynomial features.
163 """
164 if sparse.isspmatrix(X):
165 raise NotImplementedError( # pragma: no cover
166 "Not implemented for sparse matrices.")
168 XP = numpy.empty(
169 (X.shape[0], self.n_output_features_), dtype=X.dtype)
171 def multiply(A, B, C):
172 return numpy.multiply(A, B, out=C)
174 def final(X):
175 return X
177 if self.poly_interaction_only:
178 return _transform_ionly(self.poly_degree, self.poly_include_bias,
179 XP, X, multiply, final)
180 return _transform_iall(self.poly_degree, self.poly_include_bias,
181 XP, X, multiply, final)
183 def _transform_poly_slow(self, X):
184 """
185 Transforms data to polynomial features.
186 """
187 if sparse.isspmatrix(X):
188 raise NotImplementedError( # pragma: no cover
189 "Not implemented for sparse matrices.")
191 comb = _combinations_poly(X.shape[1], self.poly_degree, self.poly_interaction_only,
192 include_bias=self.poly_include_bias)
193 order = 'C' # how to get order from X.
194 XP = numpy.empty((X.shape[0], self.n_output_features_),
195 dtype=X.dtype, order=order)
196 for i, comb in enumerate(comb):
197 XP[:, i] = X[:, comb].prod(1)
198 return XP