Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Conversion from tree to neural network.
5"""
6from io import BytesIO
7import pickle
8import numpy
9from ._neural_tree_api import _TrainingAPI
10from ._neural_tree_node import NeuralTreeNode
13def label_class_to_softmax_output(y_label):
14 """
15 Converts a binary class label into a matrix
16 with two columns of probabilities.
18 .. runpython::
19 :showcode:
21 import numpy
22 from mlstatpy.ml.neural_tree import label_class_to_softmax_output
24 y_label = numpy.array([0, 1, 0, 0])
25 soft_y = label_class_to_softmax_output(y_label)
26 print(soft_y)
27 """
28 if len(y_label.shape) != 1:
29 raise ValueError(
30 "y_label must be a vector but has shape {}.".format(y_label.shape))
31 y = numpy.empty((y_label.shape[0], 2), dtype=numpy.float64)
32 y[:, 0] = (y_label < 0.5).astype(numpy.float64)
33 y[:, 1] = 1 - y[:, 0]
34 return y
37class NeuralTreeNet(_TrainingAPI):
38 """
39 Node ensemble.
41 .. runpython::
42 :showcode:
44 import numpy
45 from mlstatpy.ml.neural_tree import NeuralTreeNode, NeuralTreeNet
47 w1 = numpy.array([-0.5, 0.8, -0.6])
49 neu = NeuralTreeNode(w1[1:], bias=w1[0], activation='sigmoid')
50 net = NeuralTreeNet(2, empty=True)
51 net.append(neu, numpy.arange(2))
53 ide = NeuralTreeNode(numpy.array([1.]),
54 bias=numpy.array([0.]),
55 activation='identity')
57 net.append(ide, numpy.arange(2, 3))
59 X = numpy.abs(numpy.random.randn(10, 2))
60 pred = net.predict(X)
61 print(pred)
62 """
64 def __init__(self, dim, empty=True):
65 """
66 @param dim space dimension
67 @param empty empty network, other adds an identity node
68 """
69 self.dim = dim
70 if empty:
71 self.nodes = []
72 self.nodes_attr = []
73 else:
74 self.nodes = [
75 NeuralTreeNode(
76 numpy.ones((dim,), dtype=numpy.float64),
77 bias=numpy.float64(0.),
78 activation='identity', nodeid=0)]
79 self.nodes_attr = [dict(inputs=numpy.arange(0, dim), output=dim,
80 coef_size=self.nodes[0].coef.size,
81 first_coef=0)]
82 self._update_members()
84 def copy(self):
85 st = BytesIO()
86 pickle.dump(self, st)
87 cop = BytesIO(st.getvalue())
88 return pickle.load(cop)
90 def _update_members(self, node=None, attr=None):
91 "Updates internal members."
92 if node is None or attr is None:
93 if len(self.nodes_attr) == 0:
94 self.size_ = self.dim
95 else:
96 self.size_ = max(d['output'] for d in self.nodes_attr) + 1
97 self.output_to_node_ = {}
98 self.input_to_node_ = {}
99 for node2, attr2 in zip(self.nodes, self.nodes_attr):
100 if isinstance(attr2['output'], list):
101 for o in attr2['output']:
102 self.output_to_node_[o] = node2, attr2
103 else:
104 self.output_to_node_[attr2['output']] = node2, attr2
105 for i in attr2['inputs']:
106 self.input_to_node_[i] = node2, attr2
107 else:
108 if len(node.input_weights.shape) == 1:
109 self.size_ += 1
110 else:
111 self.size_ += node.input_weights.shape[0]
112 if isinstance(attr['output'], list):
113 for o in attr['output']:
114 self.output_to_node_[o] = node, attr
115 else:
116 self.output_to_node_[attr['output']] = node, attr
117 for i in attr['inputs']:
118 self.input_to_node_[i] = node, attr
120 def __repr__(self):
121 "usual"
122 return "%s(%d)" % (self.__class__.__name__, self.dim)
124 def clear(self):
125 "Clear all nodes"
126 del self.nodes[:]
127 del self.nodes_attr[:]
128 self._update_members()
130 def append(self, node, inputs):
131 """
132 Appends a node into the graph.
134 @param node node to add
135 @param inputs index of input nodes
136 """
137 if len(node.input_weights.shape) == 1:
138 if node.input_weights.shape[0] != len(inputs):
139 raise RuntimeError(
140 "Dimension mismatch between weights [{}] and inputs [{}].".format(
141 node.input_weights.shape[0], len(inputs)))
142 node.nodeid = len(self.nodes)
143 self.nodes.append(node)
144 first_coef = (
145 0 if len(self.nodes_attr) == 0 else
146 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size'])
147 attr = dict(inputs=numpy.array(inputs), output=self.size_,
148 coef_size=node.coef.size, first_coef=first_coef)
149 self.nodes_attr.append(attr)
150 elif len(node.input_weights.shape) == 2:
151 if node.input_weights.shape[1] != len(inputs):
152 raise RuntimeError( # pragma: no cover
153 "Dimension mismatch between weights [{}] and inputs [{}].".format(
154 node.input_weights.shape[1], len(inputs)))
155 node.nodeid = len(self.nodes)
156 self.nodes.append(node)
157 first_coef = (
158 0 if len(self.nodes_attr) == 0 else
159 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size'])
160 attr = dict(inputs=numpy.array(inputs),
161 output=list(range(self.size_, self.size_ +
162 node.input_weights.shape[0])),
163 coef_size=node.coef.size, first_coef=first_coef)
164 self.nodes_attr.append(attr)
165 else:
166 raise RuntimeError( # pragma: no cover
167 "Coefficients should have 1 or 2 dimension not {}.".format(node.input_weights.shape))
168 self._update_members(node, attr)
170 def __getitem__(self, i):
171 "Retrieves node and attributes for node i."
172 return self.nodes[i], self.nodes_attr[i]
174 def __len__(self):
175 "Returns the number of nodes"
176 return len(self.nodes)
178 def _predict_one(self, X):
179 res = numpy.zeros((self.size_,), dtype=numpy.float64)
180 res[:self.dim] = X
181 for node, attr in zip(self.nodes, self.nodes_attr):
182 res[attr['output']] = node.predict(res[attr['inputs']])
183 return res
185 def predict(self, X):
186 if len(X.shape) == 2:
187 res = numpy.zeros((X.shape[0], self.size_))
188 for i, x in enumerate(X):
189 res[i, :] = self._predict_one(x)
190 return res
191 return self._predict_one(X)
193 @staticmethod
194 def create_from_tree(tree, k=1., arch='one'):
195 """
196 Creates a @see cl NeuralTreeNet instance from a
197 :epkg:`DecisionTreeClassifier`
199 @param tree :epkg:`DecisionTreeClassifier`
200 @param k slant of the sigmoïd
201 @param arch architecture, see below
202 @return @see cl NeuralTreeNet
204 The function only works for binary problems.
205 Available architecture:
206 * `'one'`: the method adds nodes with one output, there
207 is no soecific definition of layers,
208 * `'compact'`: the adds two nodes, the first computes
209 the threshold, the second one computes the leaves
210 output, a final node merges all outputs into one
212 See notebook :ref:`neuraltreerst` for examples.
213 """
214 if arch == 'one':
215 return NeuralTreeNet._create_from_tree_one(tree, k)
216 if arch == 'compact':
217 return NeuralTreeNet._create_from_tree_compact(tree, k)
218 raise ValueError("Unknown arch value '{}'.".format(arch))
220 @staticmethod
221 def _create_from_tree_one(tree, k=1.):
222 "Implements strategy one. See @see meth create_from_tree."
224 if tree.n_classes_ > 2:
225 raise RuntimeError(
226 "The function only support binary classification problem.")
228 n_nodes = tree.tree_.node_count
229 children_left = tree.tree_.children_left
230 children_right = tree.tree_.children_right
231 feature = tree.tree_.feature
232 threshold = tree.tree_.threshold
233 value = tree.tree_.value.reshape((-1, 2))
234 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64)
235 max_features_ = tree.max_features_
237 root = NeuralTreeNet(tree.max_features_, empty=True)
238 feat_index = numpy.arange(0, max_features_)
239 predecessor = {}
240 outputs = {i: [] for i in range(0, tree.n_classes_)}
241 for i in range(n_nodes):
243 if children_left[i] != children_right[i]:
244 # node with a threshold
245 # right side
246 coef = numpy.zeros((max_features_,), dtype=numpy.float64)
247 coef[feature[i]] = -k
248 node_th = NeuralTreeNode(coef, bias=k * threshold[i],
249 activation='sigmoid4', tag="N%d-th" % i)
250 root.append(node_th, feat_index)
252 if i in predecessor:
253 pred = predecessor[i]
254 node1 = pred
255 node2 = node_th
256 attr1 = root[node1.nodeid][1]
257 attr2 = root[node2.nodeid][1]
259 coef = numpy.ones((2,), dtype=numpy.float64) * k
260 node_true = NeuralTreeNode(coef, bias=-k * 1.5,
261 activation='sigmoid4',
262 tag="N%d-T" % i)
263 root.append(node_true, [attr1['output'], attr2['output']])
265 coef = numpy.zeros((2,), dtype=numpy.float64)
266 coef[0] = k
267 coef[1] = -k
268 node_false = NeuralTreeNode(coef, bias=-k * 0.25,
269 activation='sigmoid4',
270 tag="N%d-F" % i)
271 root.append(node_false, [attr1['output'], attr2['output']])
273 predecessor[children_left[i]] = node_true
274 predecessor[children_right[i]] = node_false
275 else:
276 coef = numpy.ones((1,), dtype=numpy.float64) * -1
277 node_false = NeuralTreeNode(
278 coef, bias=1, activation='identity', tag="N%d-F" % i)
279 attr = root[node_th.nodeid][1]
280 root.append(node_false, [attr['output']])
282 predecessor[children_left[i]] = node_th
283 predecessor[children_right[i]] = node_false
285 elif i in predecessor:
286 # leave
287 outputs[output_class[i]].append(predecessor[i])
289 # final node
290 output = []
291 index = [0]
292 nb = []
293 for i in range(0, tree.n_classes_):
294 output.extend(outputs[i])
295 nb.append(len(outputs[i]))
296 index.append(len(outputs[i]) + index[-1])
297 coef = numpy.zeros((len(nb), len(output)), dtype=numpy.float64)
298 for i in range(0, tree.n_classes_):
299 coef[i, index[i]:index[i + 1]] = k
300 feat = [root[n.nodeid][1]['output'] for n in output]
301 root.append(
302 NeuralTreeNode(coef, bias=(-k / 2) * len(feat),
303 activation='softmax4', tag="Nfinal"),
304 feat)
306 # final
307 return root
309 @staticmethod
310 def _create_from_tree_compact(tree, k=1.):
311 "Implements strategy one. See @see meth create_from_tree."
313 if tree.n_classes_ > 2:
314 raise RuntimeError(
315 "The function only support binary classification problem.")
317 n_nodes = tree.tree_.node_count
318 children_left = tree.tree_.children_left
319 children_right = tree.tree_.children_right
320 feature = tree.tree_.feature
321 threshold = tree.tree_.threshold
322 value = tree.tree_.value.reshape((-1, 2))
323 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64)
324 max_features_ = tree.max_features_
325 feat_index = numpy.arange(0, max_features_)
327 root = NeuralTreeNet(tree.max_features_, empty=True)
328 coef1 = []
329 bias1 = []
330 parents = {}
331 rows = {}
333 # first pass: threshold
335 for i in range(n_nodes):
336 if children_left[i] == children_right[i]:
337 # leaves
338 continue
339 rows[i] = len(coef1)
340 parents[children_left[i]] = i
341 parents[children_right[i]] = i
342 coef = numpy.zeros((max_features_,), dtype=numpy.float64)
343 coef[feature[i]] = -k
344 coef1.append(coef)
345 bias1.append(k * threshold[i])
347 coef1 = numpy.vstack(coef1)
348 if len(bias1) == 1:
349 bias1 = bias1[0]
350 node1 = NeuralTreeNode(
351 coef1 if coef1.shape[0] > 1 else coef1[0], bias=bias1,
352 activation='sigmoid4', tag="threshold")
353 root.append(node1, feat_index)
354 th_index = numpy.arange(max_features_, max_features_ + coef1.shape[0])
356 # second pass: decision path
357 coef2 = []
358 bias2 = []
359 output = []
361 for i in range(n_nodes):
362 if children_left[i] != children_right[i]:
363 # not a leave
364 continue
366 path = []
367 last = i
368 lr = "class", output_class[i]
369 output.append(output_class[i])
370 while last is not None:
371 path.append((last, lr))
372 if last not in parents:
373 break
374 par = parents[last]
375 if children_right[par] == last:
376 lr = 'right'
377 elif children_left[par] == last:
378 lr = 'left'
379 else:
380 raise RuntimeError( # pragma: no cover
381 "Inconsistent tree structure.")
382 last = par
384 coef = numpy.zeros((coef1.shape[0], ), dtype=numpy.float64)
385 bias = 0.
386 for ip, lr in path:
387 if isinstance(lr, tuple):
388 lr, value = lr
389 if lr != 'class':
390 raise RuntimeError(
391 "algorithm issue") # pragma: no cover
392 else:
393 r = rows[ip]
394 if lr == 'right':
395 coef[r] = k
396 bias -= k / 2
397 else:
398 coef[r] = -k
399 bias += k / 2
400 coef2.append(coef)
401 bias2.append(bias)
403 coef2 = numpy.vstack(coef2)
404 if len(bias2) == 1:
405 bias2 = bias2[0]
406 node2 = NeuralTreeNode(
407 coef2 if coef2.shape[0] > 1 else coef2[0], bias=bias2,
408 activation='sigmoid4', tag="pathes")
409 root.append(node2, th_index)
411 # final node
412 coef = numpy.zeros(
413 (tree.n_classes_, coef2.shape[0]), dtype=numpy.float64)
414 bias = [0. for i in range(tree.n_classes_)]
415 for i, cls in enumerate(output):
416 coef[cls, i] = -k
417 coef[1 - cls, i] = k
418 bias[cls] += k / 2
419 bias[1 - cls] += -k / 2
420 findex = numpy.arange(max_features_ + coef1.shape[0],
421 max_features_ + coef1.shape[0] + coef2.shape[0])
422 root.append(
423 NeuralTreeNode(coef, bias=bias,
424 activation='softmax4', tag="final"),
425 findex)
427 # end
428 return root
430 def to_dot(self, X=None):
431 """
432 Exports the neural network into :epkg:`dot`.
434 @param X input as an example
435 """
436 y = None
437 if X is not None:
438 y = self.predict(X)
439 rows = ['digraph Tree {',
440 "node [shape=box, fontsize=10];",
441 "edge [fontsize=8];"]
442 for i in range(self.dim):
443 if y is None:
444 rows.append('{0} [label="X[{0}]"];'.format(i))
445 else:
446 rows.append(
447 '{0} [label="X[{0}]=\\n{1:1.2f}"];'.format(i, X[i]))
449 labels = {}
451 for i in range(0, len(self)): # pylint: disable=C0200
452 o = self[i][1]['output']
453 if isinstance(o, int):
454 lo = str(o)
455 labels[o] = lo
456 lof = "%s"
457 else:
458 lo = "s" + 'a'.join(map(str, o))
459 for oo in o:
460 labels[oo] = '{}:f{}'.format(lo, oo)
461 los = "|".join("<f{0}> {0}".format(oo) for oo in o)
462 lof = "%s\n" + los
464 a = "a={}\n".format(self[i][0].activation)
465 stag = "" if self[i][0].tag is None else (self[i][0].tag + "\\n")
466 bias = str(numpy.array(self[i][0].bias)).replace(" ", "\ ")
467 if y is None:
468 lab = lof % '{}{}id={} b={} s={}'.format(
469 stag, a, i, bias, self[i][0].n_outputs)
470 else:
471 yo = numpy.array(y[o])
472 lab = lof % '{}{}id={} b={} s={}\ny={}'.format(
473 stag, a, i, bias, self[i][0].n_outputs, yo)
474 rows.append('{} [label="{}"];'.format(
475 lo, lab.replace("\n", "\n")))
476 for ii, inp in enumerate(self[i][1]['inputs']):
477 if isinstance(o, int):
478 w = self[i][0].input_weights[ii]
479 if w == 0:
480 c = ', color=grey, fontcolor=grey'
481 elif w < 0:
482 c = ', color=red, fontcolor=red'
483 else:
484 c = ', color=blue, fontcolor=blue'
485 rows.append(
486 '{} -> {} [label="{}"{}];'.format(inp, o, w, c))
487 continue
489 w = self[i][0].input_weights[:, ii]
490 for oi, oo in enumerate(o):
491 if w[oi] == 0:
492 c = ', color=grey, fontcolor=grey'
493 elif w[oi] < 0:
494 c = ', color=red, fontcolor=red'
495 else:
496 c = ', color=blue, fontcolor=blue'
497 rows.append('{} -> {} [label="{}|{}"{}];'.format(
498 labels.get(inp, inp), labels[oo], oi, w[oi], c))
500 rows.append('}')
501 return '\n'.join(rows)
503 @property
504 def shape(self):
505 "Returns the shape of the coefficients."
506 return (sum(n.coef.size for n in self.nodes), )
508 @property
509 def training_weights(self):
510 "Returns the weights."
511 sh = self.shape
512 res = numpy.empty(sh[0], dtype=numpy.float64)
513 pos = 0
514 for n in self.nodes:
515 s = n.coef.size
516 res[pos: pos + s] = (
517 n.coef if len(n.coef.shape) == 1 else n.coef.ravel())
518 pos += s
519 return res
521 def update_training_weights(self, X, add=True): # pylint: disable=W0237
522 """
523 Updates weights.
525 :param grad: vector to add to the weights such as gradient
526 :param add: addition or replace
527 """
528 pos = 0
529 if add:
530 for n in self.nodes:
531 s = n.coef.size
532 n.coef += X[pos: pos + s].reshape(n.coef.shape)
533 pos += s
534 else:
535 for n in self.nodes:
536 s = n.coef.size
537 numpy.copyto(n.coef, X[pos: pos + s].reshape(n.coef.shape))
538 pos += s
540 def fill_cache(self, X):
541 """
542 Creates a cache with intermediate results.
543 """
544 big_cache = {}
545 res = numpy.zeros((self.size_,), dtype=numpy.float64)
546 res[:self.dim] = X
547 for node, attr in zip(self.nodes, self.nodes_attr):
548 cache = node.fill_cache(res[attr['inputs']])
549 big_cache[node.nodeid] = cache
550 res[attr['output']] = cache['aX']
551 big_cache[-1] = res
552 return big_cache
554 def _get_output_node_attr(self, nb_last):
555 """
556 Retrieves the output nodes.
557 *nb_last* is the number of expected outputs.
558 """
559 neurones = set(self.output_to_node_[i][0].nodeid
560 for i in range(self.size_ - nb_last, self.size_))
561 if len(neurones) != 1:
562 raise RuntimeError( # pragma: no cover
563 "Only one output node is implemented not {}".format(
564 len(neurones)))
565 return self.output_to_node_[self.size_ - 1]
567 def _common_loss_dloss(self, X, y, cache=None):
568 """
569 Common beginning to methods *loss*, *dlossds*,
570 *dlossdw*.
571 """
572 last = 1 if len(y.shape) <= 1 else y.shape[1]
573 if cache is not None and -1 in cache:
574 res = cache[-1]
575 else:
576 res = self.predict(X)
577 if len(res.shape) == 2:
578 pred = res[:, -last:]
579 else:
580 pred = res[-last:]
581 last_node, last_attr = self._get_output_node_attr(last)
582 return res, pred, last_node, last_attr
584 def loss(self, X, y, cache=None):
585 """
586 Computes the loss due to prediction error. Returns a float.
587 """
588 res, _, last_node, last_attr = self._common_loss_dloss(
589 X, y, cache=cache)
590 if len(res.shape) <= 1:
591 return last_node.loss(res[last_attr['inputs']], y) # pylint: disable=E1120
592 return last_node.loss(res[:, last_attr['inputs']], y) # pylint: disable=E1120
594 def dlossds(self, X, y, cache=None):
595 """
596 Computes the loss derivative against the inputs.
597 """
598 res, _, last_node, last_attr = self._common_loss_dloss(
599 X, y, cache=cache)
600 if len(res.shape) <= 1:
601 return last_node.dlossds(res[last_attr['inputs']], y) # pylint: disable=E1120
602 return last_node.dlossds(res[:, last_attr['inputs']], y) # pylint: disable=E1120
604 def gradient_backward(self, graddx, X, inputs=False, cache=None):
605 """
606 Computes the gradient in X.
608 :param graddx: existing gradient against the inputs
609 :param X: computes the gradient in X
610 :param inputs: if False, derivative against the coefficients,
611 otherwise against the inputs.
612 :param cache: cache intermediate results to avoid more computation
613 :return: gradient
614 """
615 if cache is None:
616 cache = self.fill_cache(X)
617 shape = self.training_weights.shape
618 pred = self.predict(X)
620 whole_gradx = numpy.zeros(pred.shape, dtype=numpy.float64)
621 whole_gradw = numpy.zeros(shape, dtype=numpy.float64)
622 if len(graddx.shape) == 0:
623 whole_gradx[-1] = graddx
624 else:
625 whole_gradx[-graddx.shape[0]:] = graddx
627 for node, attr in zip(self.nodes[::-1], self.nodes_attr[::-1]):
628 ch = cache[node.nodeid]
630 node_graddx = whole_gradx[attr['output']]
631 xi = pred[attr['inputs']]
633 temp_gradw = node.gradient_backward(
634 node_graddx, xi, inputs=False, cache=ch)
635 temp_gradx = node.gradient_backward(
636 node_graddx, xi, inputs=True, cache=ch)
638 whole_gradw[attr['first_coef']:attr['first_coef'] +
639 attr['coef_size']] += temp_gradw.reshape((attr['coef_size'],))
640 whole_gradx[attr['inputs']
641 ] += temp_gradx.reshape((len(attr['inputs']),))
643 if inputs:
644 return whole_gradx
645 return whole_gradw