Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Implements new features such as polynomial features. 

4""" 

5import numpy 

6from scipy import sparse 

7from sklearn.base import BaseEstimator, TransformerMixin 

8from sklearn.utils import check_array 

9from ._extended_features_polynomial import _transform_iall, _transform_ionly, _combinations_poly 

10 

11 

12class ExtendedFeatures(BaseEstimator, TransformerMixin): 

13 """ 

14 Generates extended features such as polynomial features. 

15 

16 :param kind: string 

17 ``'poly'`` for polynomial features, 

18 ``'poly-slow'`` for polynomial features in *scikit-learn 0.20.2* 

19 :param poly_degree: integer 

20 The degree of the polynomial features. Default = 2. 

21 :param poly_interaction_only: boolean 

22 If true, only interaction features are produced: features that 

23 are products of at most degree distinct input features 

24 (so not ``x[1] ** 2, x[0] * x[2] ** 3``, etc.). 

25 :param poly_include_bias: boolean 

26 If True (default), then include a bias column, the feature in 

27 which all polynomial powers are zero (i.e. a column of ones - 

28 acts as an intercept term in a linear model). 

29 

30 Fitted attributes: 

31 

32 * `n_input_features_`: int 

33 The total number of input features. 

34 * `n_output_features_`: int 

35 The total number of polynomial output features. The number of output 

36 features is computed by iterating over all suitably sized combinations 

37 of input features. 

38 """ 

39 

40 def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False, 

41 poly_include_bias=True): 

42 BaseEstimator.__init__(self) 

43 TransformerMixin.__init__(self) 

44 self.kind = kind 

45 self.poly_degree = poly_degree 

46 self.poly_include_bias = poly_include_bias 

47 self.poly_interaction_only = poly_interaction_only 

48 

49 def get_feature_names(self, input_features=None): 

50 """ 

51 Returns feature names for output features. 

52 

53 :param input_features: list of string, length n_features, optional 

54 String names for input features if available. By default, 

55 "x0", "x1", ... "xn_features" is used. 

56 :return: output_feature_names : list of string, length n_output_features 

57 """ 

58 if self.kind == 'poly': 

59 return self._get_feature_names_poly(input_features) 

60 if self.kind == 'poly-slow': 

61 return self._get_feature_names_poly(input_features) 

62 raise ValueError( # pragma: no cover 

63 "Unknown extended features '{}'.".format(self.kind)) 

64 

65 def _get_feature_names_poly(self, input_features=None): 

66 """ 

67 Returns feature names for output features for 

68 the polynomial features. 

69 """ 

70 if input_features is None: 

71 input_features = ["x%d" % 

72 i for i in range(0, self.n_input_features_)] 

73 elif len(input_features) != self.n_input_features_: 

74 raise ValueError( # pragma: no cover 

75 "input_features should contain {} strings.".format( 

76 self.n_input_features_)) 

77 

78 names = ["1"] if self.poly_include_bias else [] 

79 n = self.n_input_features_ 

80 interaction_only = self.poly_interaction_only 

81 for d in range(0, self.poly_degree): 

82 if d == 0: 

83 pos = len(names) 

84 names.extend(input_features) 

85 index = list(range(pos, len(names))) 

86 index.append(len(names)) 

87 else: 

88 new_index = [] 

89 end = index[-1] 

90 for i in range(0, n): 

91 a = index[i] 

92 new_index.append(len(names)) 

93 start = a + (index[i + 1] - index[i] 

94 if interaction_only else 0) 

95 names.extend([a + " " + input_features[i] 

96 for a in names[start:end]]) 

97 new_index.append(len(names)) 

98 index = new_index 

99 

100 def process_name(col): 

101 scol = col.split() 

102 res = [] 

103 for c in sorted(scol): 

104 if len(res) == 0 or res[-1][0] != c: 

105 res.append((c, 1)) 

106 else: 

107 res[-1] = (c, res[-1][1] + 1) 

108 return " ".join(["%s^%d" % r if r[1] > 1 else r[0] for r in res]) 

109 

110 names = [process_name(s) for s in names] 

111 return names 

112 

113 def fit(self, X, y=None): 

114 """ 

115 Compute number of output features. 

116 

117 :param X: array-like, shape (n_samples, n_features) 

118 The data. 

119 :return: self : instance 

120 """ 

121 self.n_input_features_ = X.shape[1] 

122 self.n_output_features_ = len(self.get_feature_names()) 

123 

124 if self.kind == 'poly': 

125 return self._fit_poly(X, y) 

126 elif self.kind == 'poly-slow': 

127 return self._fit_poly(X, y) 

128 raise ValueError( # pragma: no cover 

129 "Unknown extended features '{}'.".format(self.kind)) 

130 

131 def _fit_poly(self, X, y=None): 

132 """ 

133 Fitting method for the polynomial features. 

134 """ 

135 check_array(X, accept_sparse=False) 

136 return self 

137 

138 def transform(self, X): 

139 """ 

140 Transforms data to extended features. 

141 

142 :param X: array-like, shape [n_samples, n_features] 

143 The data to transform, row by row. 

144 rns 

145 :param XP: numpy.ndarray, shape [n_samples, NP] 

146 The matrix of features, where NP is the number of polynomial 

147 features generated from the combination of inputs. 

148 """ 

149 n_features = X.shape[1] 

150 if n_features != self.n_input_features_: 

151 raise ValueError( # pragma: no cover 

152 "X shape does not match training shape") 

153 if self.kind == 'poly': 

154 return self._transform_poly(X) 

155 if self.kind == 'poly-slow': 

156 return self._transform_poly_slow(X) 

157 raise ValueError( # pragma: no cover 

158 "Unknown extended features '{}'.".format(self.kind)) 

159 

160 def _transform_poly(self, X): 

161 """ 

162 Transforms data to polynomial features. 

163 """ 

164 if sparse.isspmatrix(X): 

165 raise NotImplementedError( # pragma: no cover 

166 "Not implemented for sparse matrices.") 

167 

168 XP = numpy.empty( 

169 (X.shape[0], self.n_output_features_), dtype=X.dtype) 

170 

171 def multiply(A, B, C): 

172 return numpy.multiply(A, B, out=C) 

173 

174 def final(X): 

175 return X 

176 

177 if self.poly_interaction_only: 

178 return _transform_ionly(self.poly_degree, self.poly_include_bias, 

179 XP, X, multiply, final) 

180 return _transform_iall(self.poly_degree, self.poly_include_bias, 

181 XP, X, multiply, final) 

182 

183 def _transform_poly_slow(self, X): 

184 """ 

185 Transforms data to polynomial features. 

186 """ 

187 if sparse.isspmatrix(X): 

188 raise NotImplementedError( # pragma: no cover 

189 "Not implemented for sparse matrices.") 

190 

191 comb = _combinations_poly(X.shape[1], self.poly_degree, self.poly_interaction_only, 

192 include_bias=self.poly_include_bias) 

193 order = 'C' # how to get order from X. 

194 XP = numpy.empty((X.shape[0], self.n_output_features_), 

195 dtype=X.dtype, order=order) 

196 for i, comb in enumerate(comb): 

197 XP[:, i] = X[:, comb].prod(1) 

198 return XP