Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Featurizers for machine learned models.
4"""
5import numpy
6import pandas
7from sklearn.linear_model import LogisticRegression
8from sklearn.ensemble import RandomForestClassifier
11class FeaturizerTypeError(TypeError):
12 """
13 Unable to process a type.
14 """
15 pass
18def model_featurizer(model, **params):
19 """
20 Converts a machine learned model into a function which converts
21 a vector into features produced by the model.
22 It can be the output itself or intermediate results.
23 The model can come from :epkg:`scikit-learn`,
24 :epkg:`keras` or :epkg:`torch`.
26 @param model model
27 @param params additional parameters
28 @return function
29 """
30 tried = []
31 if isinstance(model, LogisticRegression):
32 return model_featurizer_lr(model, **params)
33 tried.append(LogisticRegression)
34 if isinstance(model, RandomForestClassifier):
35 return model_featurizer_rfc(model, **params)
36 tried.append(RandomForestClassifier)
37 if hasattr(model, "layers"):
38 # It should be a keras model.
39 return model_featurizer_keras(model, **params) # pragma: no cover
40 tried.append("Keras")
41 if hasattr(model, "forward"):
42 # It should be a torch model.
43 return model_featurizer_torch(model, **params)
44 tried.append("torch")
45 raise FeaturizerTypeError( # pragma no cover
46 "Unable to process type '{0}', allowed:\n{1}".format(
47 type(model), "\n".join(sorted(str(_) for _ in tried))))
50def is_vector(X):
51 """
52 Tells if *X* is a vector.
54 @param X vector
55 @return boolean
56 """
57 if isinstance(X, list):
58 if len(X) == 0 or isinstance(X[0], (list, tuple)):
59 return False
60 return True
61 if isinstance(X, numpy.ndarray):
62 if len(X.shape) > 1 and X.shape[0] != 1:
63 return False
64 return True
65 if isinstance(X, pandas.DataFrame):
66 if len(X.shape) > 1 and X.shape[0] != 1:
67 return False
68 return True
69 raise TypeError( # pragma no cover
70 "Unable to guess if X is a vector, type(X)={0}".format(type(X)))
73def wrap_predict_sklearn(X, fct, many):
74 """
75 Checks types and dimension.
76 Calls *fct* and returns the approriate type.
77 A vector if *X* is a vector, the raw output
78 otherwise.
80 @param X vector or list
81 @param fct function
82 @param many many observations or just one
83 """
84 isv = is_vector(X)
85 if many == isv:
86 raise ValueError( # pragma: no cover
87 "Inconsistency X is a single vector, many is True")
88 if isv:
89 X = [X]
90 y = fct(X)
91 if isv:
92 y = y.ravel()
93 return y
96def model_featurizer_lr(model):
97 """
98 Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`.
99 It returns a function which returns ``model.decision_function(X)``.
101 @param model model to use to featurize a vector
102 @return function
103 """
105 def feat(X, model, many):
106 "wraps sklearn"
107 return wrap_predict_sklearn(X, model.decision_function, many)
109 return lambda X, many, model=model: feat(X, model, many)
112def model_featurizer_rfc(model, output=True):
113 """
114 Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`.
115 It returns a function which returns the output of every tree
116 (method *apply*).
118 @param model model to use to featurize a vector
119 @param output use output (``model.predict_proba(X)``)
120 or trees output (``model.apply(X)``)
121 @return function
122 """
123 if output:
124 def feat1(X, model, many):
125 "wraps sklearn"
126 return wrap_predict_sklearn(X, model.predict_proba, many)
128 return lambda X, many, model=model: feat1(X, model, many)
130 def feat2(X, model, many):
131 "wraps sklearn"
132 return wrap_predict_sklearn(X, model.apply, many)
134 return lambda X, many, model=model: feat2(X, model, many)
137def wrap_predict_keras(X, fct, many, shapes): # pragma: no cover
138 """
139 Checks types and dimension.
140 Calls *fct* and returns the approriate type.
141 A vector if *X* is a vector, the raw output
142 otherwise.
144 @param X vector or list
145 @param fct function
146 @param many many observations or just one
147 @param shapes expected input shapes for the neural network
148 """
149 if many:
150 y = [fct(X[i]).ravel() for i in range(X.shape[0])]
151 return numpy.stack(y)
152 if len(X.shape) == len(shapes):
153 return fct(X).ravel()
154 x = X[numpy.newaxis, :, :, :]
155 return fct(x).ravel()
158def model_featurizer_keras(model, layer=None): # pragma: no cover
159 """
160 Builds a featurizer from a :epkg:`keras` model
161 It returns a function which returns the output of one
162 particular layer.
164 @param model model to use to featurize a vector
165 @param layer number of layers to keep
166 @return function
168 See `About Keras models <https://keras.io/models/about-keras-models/>`_.
169 """
170 if layer is not None:
171 output = model.layers[layer].output
172 model = model.__class__(model.input, output)
174 def feat(X, model, many, shapes):
175 "wraps keras"
176 return wrap_predict_keras(X, model.predict, many, shapes)
178 return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(X, model, many, shapes)
181def wrap_predict_torch(X, fct, many, shapes):
182 """
183 Checks types and dimension.
184 Calls *fct* and returns the approriate type.
185 A vector if *X* is a vector, the raw output
186 otherwise.
188 @param X vector or list
189 @param fct function
190 @param many many observations or just one
191 @param shapes expected input shapes for the neural network
192 """
193 if many:
194 y = [fct(X[i]).ravel() for i in range(X.shape[0])]
195 return numpy.stack(y)
196 if shapes is None or len(X.shape) == len(shapes):
197 t = fct(X)
198 nt = t.detach().numpy().ravel()
199 else:
200 x = X[numpy.newaxis, :, :, :]
201 t = fct(x)
202 nt = t.detach().numpy().ravel()
203 return nt
206def model_featurizer_torch(model, layer=None):
207 """
208 Builds a featurizer from a :epkg:`torch` model
209 It returns a function which returns the output of one
210 particular layer.
212 @param model model to use to featurize a vector
213 @param layer number of layers to keep
214 @return function
215 """
216 if layer is not None:
217 output = model.layers[layer].output
218 model = model.__class__(model.input, output)
220 def feat(X, model, many, shapes):
221 "wraps torch"
222 return wrap_predict_torch(X, model.forward, many, shapes)
224 return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)