Coverage for mlinsights/mlmodel/ml

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Featurizers for machine learned models.

4"""

5import numpy

6import pandas

7from sklearn.linear_model import LogisticRegression

8from sklearn.ensemble import RandomForestClassifier

11class FeaturizerTypeError(TypeError):

12 """

13 Unable to process a type.

14 """

15 pass

18def model_featurizer(model, **params):

19 """

20 Converts a machine learned model into a function which converts

21 a vector into features produced by the model.

22 It can be the output itself or intermediate results.

23 The model can come from :epkg:`scikit-learn`,

24 :epkg:`keras` or :epkg:`torch`.

26 @param model model

27 @param params additional parameters

28 @return function

29 """

30 tried = []

31 if isinstance(model, LogisticRegression):

32 return model_featurizer_lr(model, **params)

33 tried.append(LogisticRegression)

34 if isinstance(model, RandomForestClassifier):

35 return model_featurizer_rfc(model, **params)

36 tried.append(RandomForestClassifier)

37 if hasattr(model, "layers"):

38 # It should be a keras model.

39 return model_featurizer_keras(model, **params) # pragma: no cover

40 tried.append("Keras")

41 if hasattr(model, "forward"):

42 # It should be a torch model.

43 return model_featurizer_torch(model, **params)

44 tried.append("torch")

45 raise FeaturizerTypeError( # pragma no cover

46 "Unable to process type '{0}', allowed:\n{1}".format(

47 type(model), "\n".join(sorted(str(_) for _ in tried))))

50def is_vector(X):

51 """

52 Tells if *X* is a vector.

54 @param X vector

55 @return boolean

56 """

57 if isinstance(X, list):

58 if len(X) == 0 or isinstance(X[0], (list, tuple)):

59 return False

60 return True

61 if isinstance(X, numpy.ndarray):

62 if len(X.shape) > 1 and X.shape[0] != 1:

63 return False

64 return True

65 if isinstance(X, pandas.DataFrame):

66 if len(X.shape) > 1 and X.shape[0] != 1:

67 return False

68 return True

69 raise TypeError( # pragma no cover

70 "Unable to guess if X is a vector, type(X)={0}".format(type(X)))

73def wrap_predict_sklearn(X, fct, many):

74 """

75 Checks types and dimension.

76 Calls *fct* and returns the approriate type.

77 A vector if *X* is a vector, the raw output

78 otherwise.

80 @param X vector or list

81 @param fct function

82 @param many many observations or just one

83 """

84 isv = is_vector(X)

85 if many == isv:

86 raise ValueError( # pragma: no cover

87 "Inconsistency X is a single vector, many is True")

88 if isv:

89 X = [X]

90 y = fct(X)

91 if isv:

92 y = y.ravel()

93 return y

96def model_featurizer_lr(model):

97 """

98 Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`.

99 It returns a function which returns ``model.decision_function(X)``.

100

101 @param model model to use to featurize a vector

102 @return function

103 """

104

105 def feat(X, model, many):

106 "wraps sklearn"

107 return wrap_predict_sklearn(X, model.decision_function, many)

108

109 return lambda X, many, model=model: feat(X, model, many)

110

111

112def model_featurizer_rfc(model, output=True):

113 """

114 Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`.

115 It returns a function which returns the output of every tree

116 (method *apply*).

117

118 @param model model to use to featurize a vector

119 @param output use output (``model.predict_proba(X)``)

120 or trees output (``model.apply(X)``)

121 @return function

122 """

123 if output:

124 def feat1(X, model, many):

125 "wraps sklearn"

126 return wrap_predict_sklearn(X, model.predict_proba, many)

127

128 return lambda X, many, model=model: feat1(X, model, many)

129

130 def feat2(X, model, many):

131 "wraps sklearn"

132 return wrap_predict_sklearn(X, model.apply, many)

133

134 return lambda X, many, model=model: feat2(X, model, many)

135

136

137def wrap_predict_keras(X, fct, many, shapes): # pragma: no cover

138 """

139 Checks types and dimension.

140 Calls *fct* and returns the approriate type.

141 A vector if *X* is a vector, the raw output

142 otherwise.

143

144 @param X vector or list

145 @param fct function

146 @param many many observations or just one

147 @param shapes expected input shapes for the neural network

148 """

149 if many:

150 y = [fct(X[i]).ravel() for i in range(X.shape[0])]

151 return numpy.stack(y)

152 if len(X.shape) == len(shapes):

153 return fct(X).ravel()

154 x = X[numpy.newaxis, :, :, :]

155 return fct(x).ravel()

156

157

158def model_featurizer_keras(model, layer=None): # pragma: no cover

159 """

160 Builds a featurizer from a :epkg:`keras` model

161 It returns a function which returns the output of one

162 particular layer.

163

164 @param model model to use to featurize a vector

165 @param layer number of layers to keep

166 @return function

167

168 See `About Keras models <https://keras.io/models/about-keras-models/>`_.

169 """

170 if layer is not None:

171 output = model.layers[layer].output

172 model = model.__class__(model.input, output)

173

174 def feat(X, model, many, shapes):

175 "wraps keras"

176 return wrap_predict_keras(X, model.predict, many, shapes)

177

178 return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(X, model, many, shapes)

179

180

181def wrap_predict_torch(X, fct, many, shapes):

182 """

183 Checks types and dimension.

184 Calls *fct* and returns the approriate type.

185 A vector if *X* is a vector, the raw output

186 otherwise.

187

188 @param X vector or list

189 @param fct function

190 @param many many observations or just one

191 @param shapes expected input shapes for the neural network

192 """

193 if many:

194 y = [fct(X[i]).ravel() for i in range(X.shape[0])]

195 return numpy.stack(y)

196 if shapes is None or len(X.shape) == len(shapes):

197 t = fct(X)

198 nt = t.detach().numpy().ravel()

199 else:

200 x = X[numpy.newaxis, :, :, :]

201 t = fct(x)

202 nt = t.detach().numpy().ravel()

203 return nt

204

205

206def model_featurizer_torch(model, layer=None):

207 """

208 Builds a featurizer from a :epkg:`torch` model

209 It returns a function which returns the output of one

210 particular layer.

211

212 @param model model to use to featurize a vector

213 @param layer number of layers to keep

214 @return function

215 """

216 if layer is not None:

217 output = model.layers[layer].output

218 model = model.__class__(model.input, output)

219

220 def feat(X, model, many, shapes):

221 "wraps torch"

222 return wrap_predict_torch(X, model.forward, many, shapes)

223

224 return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)

Coverage for mlinsights/mlmodel/ml_featurizer.py : 80%

71 statements

Coverage for mlinsights/mlmodel/ml_featurizer.py : 80%

71 statements 57 run 14 missing 22 excluded

71 statements