Coverage for mlinsights/mlmodel/extended_features.py: 100%

85 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-08-09 08:45 +0200

1""" 

2@file 

3@brief Implements new features such as polynomial features. 

4""" 

5import numpy 

6from scipy import sparse 

7from sklearn.base import BaseEstimator, TransformerMixin 

8from sklearn.utils import check_array 

9from ._extended_features_polynomial import _transform_iall, _transform_ionly, _combinations_poly 

10 

11 

12class ExtendedFeatures(BaseEstimator, TransformerMixin): 

13 """ 

14 Generates extended features such as polynomial features. 

15 

16 :param kind: string 

17 ``'poly'`` for polynomial features, 

18 ``'poly-slow'`` for polynomial features in *scikit-learn 0.20.2* 

19 :param poly_degree: integer 

20 The degree of the polynomial features. Default = 2. 

21 :param poly_interaction_only: boolean 

22 If true, only interaction features are produced: features that 

23 are products of at most degree distinct input features 

24 (so not ``x[1] ** 2, x[0] * x[2] ** 3``, etc.). 

25 :param poly_include_bias: boolean 

26 If True (default), then include a bias column, the feature in 

27 which all polynomial powers are zero (i.e. a column of ones - 

28 acts as an intercept term in a linear model). 

29 

30 Fitted attributes: 

31 

32 * `n_input_features_`: int 

33 The total number of input features. 

34 * `n_output_features_`: int 

35 The total number of polynomial output features. The number of output 

36 features is computed by iterating over all suitably sized combinations 

37 of input features. 

38 """ 

39 

40 def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False, 

41 poly_include_bias=True): 

42 BaseEstimator.__init__(self) 

43 TransformerMixin.__init__(self) 

44 self.kind = kind 

45 self.poly_degree = poly_degree 

46 self.poly_include_bias = poly_include_bias 

47 self.poly_interaction_only = poly_interaction_only 

48 

49 def get_feature_names(self, input_features=None): 

50 """ 

51 Returns feature names for output features. 

52 

53 :param input_features: list of string, length n_features, optional 

54 String names for input features if available. By default, 

55 "x0", "x1", ... "xn_features" is used. 

56 :return: output_feature_names : list of string, length n_output_features 

57 """ 

58 if self.kind == 'poly': 

59 return self._get_feature_names_poly(input_features) 

60 if self.kind == 'poly-slow': 

61 return self._get_feature_names_poly(input_features) 

62 raise ValueError( # pragma: no cover 

63 f"Unknown extended features '{self.kind}'.") 

64 

65 def _get_feature_names_poly(self, input_features=None): 

66 """ 

67 Returns feature names for output features for 

68 the polynomial features. 

69 """ 

70 if input_features is None: 

71 input_features = ["x%d" % 

72 i for i in range(0, self.n_input_features_)] 

73 elif len(input_features) != self.n_input_features_: 

74 raise ValueError( # pragma: no cover 

75 f"input_features should contain {self.n_input_features_} strings.") 

76 

77 names = ["1"] if self.poly_include_bias else [] 

78 n = self.n_input_features_ 

79 interaction_only = self.poly_interaction_only 

80 for d in range(0, self.poly_degree): 

81 if d == 0: 

82 pos = len(names) 

83 names.extend(input_features) 

84 index = list(range(pos, len(names))) 

85 index.append(len(names)) 

86 else: 

87 new_index = [] 

88 end = index[-1] 

89 for i in range(0, n): 

90 a = index[i] 

91 new_index.append(len(names)) 

92 start = a + (index[i + 1] - index[i] 

93 if interaction_only else 0) 

94 names.extend([a + " " + input_features[i] 

95 for a in names[start:end]]) 

96 new_index.append(len(names)) 

97 index = new_index 

98 

99 def process_name(col): 

100 scol = col.split() 

101 res = [] 

102 for c in sorted(scol): 

103 if len(res) == 0 or res[-1][0] != c: 

104 res.append((c, 1)) 

105 else: 

106 res[-1] = (c, res[-1][1] + 1) 

107 return " ".join(["%s^%d" % r if r[1] > 1 else r[0] for r in res]) 

108 

109 names = [process_name(s) for s in names] 

110 return names 

111 

112 def fit(self, X, y=None): 

113 """ 

114 Compute number of output features. 

115 

116 :param X: array-like, shape (n_samples, n_features) 

117 The data. 

118 :return: self : instance 

119 """ 

120 self.n_input_features_ = X.shape[1] 

121 self.n_output_features_ = len(self.get_feature_names()) 

122 

123 if self.kind == 'poly': 

124 return self._fit_poly(X, y) 

125 elif self.kind == 'poly-slow': 

126 return self._fit_poly(X, y) 

127 raise ValueError( # pragma: no cover 

128 f"Unknown extended features '{self.kind}'.") 

129 

130 def _fit_poly(self, X, y=None): 

131 """ 

132 Fitting method for the polynomial features. 

133 """ 

134 check_array(X, accept_sparse=False) 

135 return self 

136 

137 def transform(self, X): 

138 """ 

139 Transforms data to extended features. 

140 

141 :param X: array-like, shape [n_samples, n_features] 

142 The data to transform, row by row. 

143 rns 

144 :param XP: numpy.ndarray, shape [n_samples, NP] 

145 The matrix of features, where NP is the number of polynomial 

146 features generated from the combination of inputs. 

147 """ 

148 n_features = X.shape[1] 

149 if n_features != self.n_input_features_: 

150 raise ValueError( # pragma: no cover 

151 "X shape does not match training shape") 

152 if self.kind == 'poly': 

153 return self._transform_poly(X) 

154 if self.kind == 'poly-slow': 

155 return self._transform_poly_slow(X) 

156 raise ValueError( # pragma: no cover 

157 f"Unknown extended features '{self.kind}'.") 

158 

159 def _transform_poly(self, X): 

160 """ 

161 Transforms data to polynomial features. 

162 """ 

163 if sparse.isspmatrix(X): 

164 raise NotImplementedError( # pragma: no cover 

165 "Not implemented for sparse matrices.") 

166 

167 XP = numpy.empty( 

168 (X.shape[0], self.n_output_features_), dtype=X.dtype) 

169 

170 def multiply(A, B, C): 

171 return numpy.multiply(A, B, out=C) 

172 

173 def final(X): 

174 return X 

175 

176 if self.poly_interaction_only: 

177 return _transform_ionly(self.poly_degree, self.poly_include_bias, 

178 XP, X, multiply, final) 

179 return _transform_iall(self.poly_degree, self.poly_include_bias, 

180 XP, X, multiply, final) 

181 

182 def _transform_poly_slow(self, X): 

183 """ 

184 Transforms data to polynomial features. 

185 """ 

186 if sparse.isspmatrix(X): 

187 raise NotImplementedError( # pragma: no cover 

188 "Not implemented for sparse matrices.") 

189 

190 comb = _combinations_poly(X.shape[1], self.poly_degree, self.poly_interaction_only, 

191 include_bias=self.poly_include_bias) 

192 order = 'C' # how to get order from X. 

193 XP = numpy.empty((X.shape[0], self.n_output_features_), 

194 dtype=X.dtype, order=order) 

195 for i, comb in enumerate(comb): 

196 XP[:, i] = X[:, comb].prod(1) 

197 return XP