Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Conversion from tree to neural network.
5"""
6import numpy
7import numpy.random as rnd
8from scipy.special import expit, softmax, kl_div as kl_fct # pylint: disable=E0611
9from ._neural_tree_api import _TrainingAPI
12class NeuralTreeNode(_TrainingAPI):
13 """
14 One node in a neural network.
15 """
17 @staticmethod
18 def _relu(x):
19 "Relu function."
20 return numpy.maximum(x, 0)
22 @staticmethod
23 def _leakyrelu(x):
24 "Leaky Relu function."
25 return numpy.maximum(x, 0) + numpy.minimum(x, 0) * 0.01
27 @staticmethod
28 def _drelu(x):
29 "Derivative of the Relu function."
30 res = numpy.ones(x.shape, dtype=x.dtype)
31 res[x < 0] = 0.
32 return res
34 @staticmethod
35 def _dleakyrelu(x):
36 "Derivative of the Leaky Relu function."
37 res = numpy.ones(x.shape, dtype=x.dtype)
38 res[x < 0] = 0.01
39 return res
41 @staticmethod
42 def _dsigmoid(x):
43 "Derivativ of the sigmoid function."
44 y = expit(x)
45 return y * (1 - y)
47 @staticmethod
48 def _softmax(x):
49 "Derivative of the softmax function."
50 if len(x.shape) == 2:
51 return softmax(x, axis=1)
52 return softmax(x)
54 @staticmethod
55 def _dsoftmax(x):
56 "Derivative of the softmax function."
57 soft = softmax(x)
58 grad = - soft @ soft.T
59 diag = numpy.diag(soft)
60 return diag + grad
62 @staticmethod
63 def get_activation_function(activation):
64 """
65 Returns the activation function.
66 It returns a function *y=f(x)*.
67 """
68 if activation == 'softmax':
69 return NeuralTreeNode._softmax
70 if activation == 'softmax4':
71 return lambda x: NeuralTreeNode._softmax(x * 4)
72 if activation in {'logistic', 'expit', 'sigmoid'}:
73 return expit
74 if activation == 'sigmoid4':
75 return lambda x: expit(x * 4)
76 if activation == 'relu':
77 return NeuralTreeNode._relu
78 if activation == 'leakyrelu':
79 return NeuralTreeNode._leakyrelu
80 if activation == 'identity':
81 return lambda x: x
82 raise ValueError( # pragma: no cover
83 "Unknown activation function '{}'.".format(activation))
85 @staticmethod
86 def get_activation_gradient_function(activation):
87 """
88 Returns the activation function.
89 It returns a function *y=f'(x)*.
90 About the sigmoid:
92 .. math::
94 \\begin{array}{l}
95 f(x) &=& \frac{1}{1 + e^{-x}} \\\\
96 f'(x) &=& \frac{e^{-x}}{(1 + e^{-x})^2} = f(x)(1-f(x))
97 \\end{array}}
98 """
99 if activation == 'softmax':
100 return NeuralTreeNode._dsoftmax
101 if activation == 'softmax4':
102 return lambda x: NeuralTreeNode._dsoftmax(x) * 4
103 if activation in {'logistic', 'expit', 'sigmoid'}:
104 return NeuralTreeNode._dsigmoid
105 if activation == 'sigmoid4':
106 return lambda x: NeuralTreeNode._dsigmoid(x) * 4
107 if activation == 'relu':
108 return NeuralTreeNode._drelu
109 if activation == 'leakyrelu':
110 return NeuralTreeNode._dleakyrelu
111 if activation == 'identity':
112 return lambda x: numpy.ones(x.shape, dtype=x.dtype)
113 raise ValueError( # pragma: no cover
114 "Unknown activation gradient function '{}'.".format(activation))
116 @staticmethod
117 def get_activation_loss_function(activation):
118 """
119 Returns a default loss function based on the activation
120 function. It returns two functions *g=loss(x,y)*.
121 """
122 if activation in {'logistic', 'expit', 'sigmoid', 'sigmoid4'}:
123 # regression + regularization
124 return lambda x, y: (x - y) ** 2
125 if activation in {'softmax', 'softmax4'}:
126 cst = numpy.finfo(numpy.float32).eps
128 # classification
129 def kl_fct2(x, y):
130 return kl_fct(x + cst, y + cst)
131 return kl_fct2
132 if activation in {'identity', 'relu', 'leakyrelu'}:
133 # regression
134 return lambda x, y: (x - y) ** 2
135 raise ValueError(
136 "Unknown activation function '{}'.".format(activation))
138 @staticmethod
139 def get_activation_dloss_function(activation):
140 """
141 Returns the derivative of the default loss function based
142 on the activation function. It returns a function
143 *df(x,y)/dw, df(w)/dw* where *w* are the weights.
144 """
145 if activation in {'logistic', 'expit', 'sigmoid', 'sigmoid4'}:
146 # regression + regularization
147 def dregrdx(x, y):
148 return (x - y) * 2
150 return dregrdx
152 if activation in {'softmax', 'softmax4'}:
153 # classification
154 cst = numpy.finfo(numpy.float32).eps
156 def dclsdx(x, y):
157 return numpy.log(x + cst) - numpy.log(y + cst)
159 return dclsdx
161 if activation in {'identity', 'relu', 'leakyrelu'}:
162 # regression
163 def dregdx(x, y):
164 return (x - y) * 2
166 return dregdx
167 raise ValueError( # pragma: no cover
168 "Unknown activation function '{}'.".format(activation))
170 def __init__(self, weights, bias=None, activation='sigmoid', nodeid=-1,
171 tag=None):
172 """
173 @param weights weights
174 @param bias bias, if None, draws a random number
175 @param activation activation function
176 @param nodeid node id
177 @param tag unused but to add information
178 on how this node was created
179 """
180 self.tag = tag
181 if isinstance(weights, int):
182 if activation.startswith('softmax'):
183 weights = rnd.randn(2, weights)
184 else:
185 weights = rnd.randn(weights)
186 if isinstance(weights, list):
187 weights = numpy.array(weights)
189 if len(weights.shape) == 1:
190 self.n_outputs = 1
191 if bias is None:
192 bias = rnd.randn()
193 self.coef = numpy.empty(len(weights) + 1)
194 self.coef[1:] = weights
195 self.coef[0] = bias
197 elif len(weights.shape) == 2:
198 self.n_outputs = weights.shape[0]
199 if self.n_outputs == 1:
200 raise RuntimeError( # pragma: no cover
201 "Unexpected unsqueezed weights shape: {}".format(weights.shape))
202 if bias is None:
203 bias = rnd.randn(self.n_outputs)
204 shape = list(weights.shape)
205 shape[1] += 1
206 self.coef = numpy.empty(shape)
207 self.coef[:, 1:] = weights
208 self.coef[:, 0] = bias
209 else:
210 raise RuntimeError( # pragma: no cover
211 "Unexpected weights shape: {}".format(weights.shape))
213 self.activation = activation
214 self.nodeid = nodeid
215 self._set_fcts()
217 def _set_fcts(self):
218 self.activation_ = NeuralTreeNode.get_activation_function(
219 self.activation)
220 self.gradient_ = NeuralTreeNode.get_activation_gradient_function(
221 self.activation)
222 self.losss_ = NeuralTreeNode.get_activation_loss_function(
223 self.activation)
224 self.dlossds_ = NeuralTreeNode.get_activation_dloss_function(
225 self.activation)
227 @property
228 def input_weights(self):
229 "Returns the weights."
230 if self.n_outputs == 1:
231 return self.coef[1:]
232 return self.coef[:, 1:]
234 @property
235 def bias(self):
236 "Returns the weights."
237 if self.n_outputs == 1:
238 return self.coef[0]
239 return self.coef[:, 0]
241 def __getstate__(self):
242 "usual"
243 return {
244 'coef': self.coef, 'activation': self.activation,
245 'nodeid': self.nodeid, 'n_outputs': self.n_outputs,
246 'tag': self.tag}
248 def __setstate__(self, state):
249 "usual"
250 self.coef = state['coef']
251 self.activation = state['activation']
252 self.nodeid = state['nodeid']
253 self.n_outputs = state['n_outputs']
254 self.tag = state['tag']
255 self._set_fcts()
257 def __eq__(self, obj):
258 if self.coef.shape != obj.coef.shape:
259 return False
260 if any(map(lambda xy: xy[0] != xy[1],
261 zip(self.coef.ravel(), obj.coef.ravel()))):
262 return False
263 if self.activation != obj.activation:
264 return False
265 return True
267 def __repr__(self):
268 "usual"
269 if len(self.coef.shape) == 1:
270 return "%s(weights=%r, bias=%r, activation=%r)" % (
271 self.__class__.__name__, self.coef[1:],
272 self.coef[0], self.activation)
273 return "%s(weights=%r, bias=%r, activation=%r)" % (
274 self.__class__.__name__, self.coef[:, 1:],
275 self.coef[:, 0], self.activation)
277 def _predict(self, X):
278 "Computes inputs of the activation function."
279 if self.n_outputs == 1:
280 return X @ self.coef[1:] + self.coef[0]
281 res = X.reshape((1, -1)) @ self.coef[:, 1:].T + self.coef[:, 0]
282 return res.ravel()
284 def predict(self, X):
285 "Computes neuron outputs."
286 if self.n_outputs == 1:
287 return self.activation_(X @ self.coef[1:] + self.coef[0])
288 if len(X.shape) == 2:
289 return self.activation_(
290 (X @ self.coef[:, 1:].T + self.coef[:, 0]))
291 return self.activation_(
292 (X.reshape((1, -1)) @ self.coef[:, 1:].T + self.coef[:, 0]).ravel())
294 @property
295 def ndim(self):
296 "Returns the input dimension."
297 if len(self.coef.shape) == 1:
298 return self.coef.shape[0] - 1
299 return self.coef.shape[1] - 1
301 @property
302 def training_weights(self):
303 "Returns the weights stored in the neuron."
304 return self.coef.ravel()
306 def update_training_weights(self, X, add=True): # pylint: disable=W0237
307 """
308 Updates weights.
310 :param grad: vector to add to the weights such as gradient
311 :param add: addition or replace
312 """
313 if add:
314 self.coef += X.reshape(self.coef.shape)
315 else:
316 numpy.copyto(self.coef, X.reshape(self.coef.shape))
318 def fill_cache(self, X):
319 """
320 Creates a cache with intermediate results.
321 ``lX`` is the results before the activation function,
322 ``aX`` is the results after the activation function, the prediction.
323 """
324 cache = dict(lX=self._predict(X))
325 cache['aX'] = self.activation_(cache['lX'])
326 return cache
328 def _common_loss_dloss(self, X, y, cache=None):
329 """
330 Common beginning to methods *loss*, *dlossds*,
331 *dlossdw*.
332 """
333 if cache is not None and 'aX' in cache:
334 act = cache['aX']
335 else:
336 act = self.predict(X)
337 return act
339 def loss(self, X, y, cache=None):
340 """
341 Computes the loss. Returns a float.
342 """
343 act = self._common_loss_dloss(X, y, cache=cache)
344 if len(X.shape) == 1:
345 return self.losss_(act, y) # pylint: disable=E1120
346 return self.losss_(act, y) # pylint: disable=E1120
348 def dlossds(self, X, y, cache=None):
349 """
350 Computes the loss derivative due to prediction error.
351 """
352 act = self._common_loss_dloss(X, y, cache=cache)
353 return self.dlossds_(act, y)
355 def gradient_backward(self, graddx, X, inputs=False, cache=None):
356 """
357 Computes the gradients at point *X*.
359 :param graddx: existing gradient against the inputs
360 :param X: computes the gradient in X
361 :param inputs: if False, derivative against the coefficients,
362 otherwise against the inputs.
363 :param cache: cache intermediate results
364 :return: gradient
365 """
366 if cache is None:
367 cache = self.fill_cache(X)
369 pred = cache['aX']
370 ga = self.gradient_(pred)
371 if len(ga.shape) == 2:
372 f = graddx @ ga
373 else:
374 f = graddx * ga
376 if inputs:
377 if len(self.coef.shape) == 1:
378 rgrad = numpy.empty(X.shape)
379 rgrad[:] = self.coef[1:]
380 rgrad *= f
381 else:
382 rgrad = numpy.sum(
383 self.coef[:, 1:] * f.reshape((-1, 1)), axis=0)
384 return rgrad
386 rgrad = numpy.empty(self.coef.shape)
387 if len(self.coef.shape) == 1:
388 rgrad[0] = 1
389 rgrad[1:] = X
390 rgrad *= f
391 else:
392 rgrad[:, 0] = 1
393 rgrad[:, 1:] = X
394 rgrad *= f.reshape((-1, 1))
395 return rgrad