Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Featurizers for machine learned models. 

4""" 

5import numpy 

6import pandas 

7from sklearn.linear_model import LogisticRegression 

8from sklearn.ensemble import RandomForestClassifier 

9 

10 

11class FeaturizerTypeError(TypeError): 

12 """ 

13 Unable to process a type. 

14 """ 

15 pass 

16 

17 

18def model_featurizer(model, **params): 

19 """ 

20 Converts a machine learned model into a function which converts 

21 a vector into features produced by the model. 

22 It can be the output itself or intermediate results. 

23 The model can come from :epkg:`scikit-learn`, 

24 :epkg:`keras` or :epkg:`torch`. 

25 

26 @param model model 

27 @param params additional parameters 

28 @return function 

29 """ 

30 tried = [] 

31 if isinstance(model, LogisticRegression): 

32 return model_featurizer_lr(model, **params) 

33 tried.append(LogisticRegression) 

34 if isinstance(model, RandomForestClassifier): 

35 return model_featurizer_rfc(model, **params) 

36 tried.append(RandomForestClassifier) 

37 if hasattr(model, "layers"): 

38 # It should be a keras model. 

39 return model_featurizer_keras(model, **params) # pragma: no cover 

40 tried.append("Keras") 

41 if hasattr(model, "forward"): 

42 # It should be a torch model. 

43 return model_featurizer_torch(model, **params) 

44 tried.append("torch") 

45 raise FeaturizerTypeError( # pragma no cover 

46 "Unable to process type '{0}', allowed:\n{1}".format( 

47 type(model), "\n".join(sorted(str(_) for _ in tried)))) 

48 

49 

50def is_vector(X): 

51 """ 

52 Tells if *X* is a vector. 

53 

54 @param X vector 

55 @return boolean 

56 """ 

57 if isinstance(X, list): 

58 if len(X) == 0 or isinstance(X[0], (list, tuple)): 

59 return False 

60 return True 

61 if isinstance(X, numpy.ndarray): 

62 if len(X.shape) > 1 and X.shape[0] != 1: 

63 return False 

64 return True 

65 if isinstance(X, pandas.DataFrame): 

66 if len(X.shape) > 1 and X.shape[0] != 1: 

67 return False 

68 return True 

69 raise TypeError( # pragma no cover 

70 "Unable to guess if X is a vector, type(X)={0}".format(type(X))) 

71 

72 

73def wrap_predict_sklearn(X, fct, many): 

74 """ 

75 Checks types and dimension. 

76 Calls *fct* and returns the approriate type. 

77 A vector if *X* is a vector, the raw output 

78 otherwise. 

79 

80 @param X vector or list 

81 @param fct function 

82 @param many many observations or just one 

83 """ 

84 isv = is_vector(X) 

85 if many == isv: 

86 raise ValueError( # pragma: no cover 

87 "Inconsistency X is a single vector, many is True") 

88 if isv: 

89 X = [X] 

90 y = fct(X) 

91 if isv: 

92 y = y.ravel() 

93 return y 

94 

95 

96def model_featurizer_lr(model): 

97 """ 

98 Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`. 

99 It returns a function which returns ``model.decision_function(X)``. 

100 

101 @param model model to use to featurize a vector 

102 @return function 

103 """ 

104 

105 def feat(X, model, many): 

106 "wraps sklearn" 

107 return wrap_predict_sklearn(X, model.decision_function, many) 

108 

109 return lambda X, many, model=model: feat(X, model, many) 

110 

111 

112def model_featurizer_rfc(model, output=True): 

113 """ 

114 Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`. 

115 It returns a function which returns the output of every tree 

116 (method *apply*). 

117 

118 @param model model to use to featurize a vector 

119 @param output use output (``model.predict_proba(X)``) 

120 or trees output (``model.apply(X)``) 

121 @return function 

122 """ 

123 if output: 

124 def feat1(X, model, many): 

125 "wraps sklearn" 

126 return wrap_predict_sklearn(X, model.predict_proba, many) 

127 

128 return lambda X, many, model=model: feat1(X, model, many) 

129 

130 def feat2(X, model, many): 

131 "wraps sklearn" 

132 return wrap_predict_sklearn(X, model.apply, many) 

133 

134 return lambda X, many, model=model: feat2(X, model, many) 

135 

136 

137def wrap_predict_keras(X, fct, many, shapes): # pragma: no cover 

138 """ 

139 Checks types and dimension. 

140 Calls *fct* and returns the approriate type. 

141 A vector if *X* is a vector, the raw output 

142 otherwise. 

143 

144 @param X vector or list 

145 @param fct function 

146 @param many many observations or just one 

147 @param shapes expected input shapes for the neural network 

148 """ 

149 if many: 

150 y = [fct(X[i]).ravel() for i in range(X.shape[0])] 

151 return numpy.stack(y) 

152 if len(X.shape) == len(shapes): 

153 return fct(X).ravel() 

154 x = X[numpy.newaxis, :, :, :] 

155 return fct(x).ravel() 

156 

157 

158def model_featurizer_keras(model, layer=None): # pragma: no cover 

159 """ 

160 Builds a featurizer from a :epkg:`keras` model 

161 It returns a function which returns the output of one 

162 particular layer. 

163 

164 @param model model to use to featurize a vector 

165 @param layer number of layers to keep 

166 @return function 

167 

168 See `About Keras models <https://keras.io/models/about-keras-models/>`_. 

169 """ 

170 if layer is not None: 

171 output = model.layers[layer].output 

172 model = model.__class__(model.input, output) 

173 

174 def feat(X, model, many, shapes): 

175 "wraps keras" 

176 return wrap_predict_keras(X, model.predict, many, shapes) 

177 

178 return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(X, model, many, shapes) 

179 

180 

181def wrap_predict_torch(X, fct, many, shapes): 

182 """ 

183 Checks types and dimension. 

184 Calls *fct* and returns the approriate type. 

185 A vector if *X* is a vector, the raw output 

186 otherwise. 

187 

188 @param X vector or list 

189 @param fct function 

190 @param many many observations or just one 

191 @param shapes expected input shapes for the neural network 

192 """ 

193 if many: 

194 y = [fct(X[i]).ravel() for i in range(X.shape[0])] 

195 return numpy.stack(y) 

196 if shapes is None or len(X.shape) == len(shapes): 

197 t = fct(X) 

198 nt = t.detach().numpy().ravel() 

199 else: 

200 x = X[numpy.newaxis, :, :, :] 

201 t = fct(x) 

202 nt = t.detach().numpy().ravel() 

203 return nt 

204 

205 

206def model_featurizer_torch(model, layer=None): 

207 """ 

208 Builds a featurizer from a :epkg:`torch` model 

209 It returns a function which returns the output of one 

210 particular layer. 

211 

212 @param model model to use to featurize a vector 

213 @param layer number of layers to keep 

214 @return function 

215 """ 

216 if layer is not None: 

217 output = model.layers[layer].output 

218 model = model.__class__(model.input, output) 

219 

220 def feat(X, model, many, shapes): 

221 "wraps torch" 

222 return wrap_predict_torch(X, model.forward, many, shapes) 

223 

224 return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)