Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Conversion from tree to neural network. 

5""" 

6from io import BytesIO 

7import pickle 

8import numpy 

9from ._neural_tree_api import _TrainingAPI 

10from ._neural_tree_node import NeuralTreeNode 

11 

12 

13def label_class_to_softmax_output(y_label): 

14 """ 

15 Converts a binary class label into a matrix 

16 with two columns of probabilities. 

17 

18 .. runpython:: 

19 :showcode: 

20 

21 import numpy 

22 from mlstatpy.ml.neural_tree import label_class_to_softmax_output 

23 

24 y_label = numpy.array([0, 1, 0, 0]) 

25 soft_y = label_class_to_softmax_output(y_label) 

26 print(soft_y) 

27 """ 

28 if len(y_label.shape) != 1: 

29 raise ValueError( 

30 "y_label must be a vector but has shape {}.".format(y_label.shape)) 

31 y = numpy.empty((y_label.shape[0], 2), dtype=numpy.float64) 

32 y[:, 0] = (y_label < 0.5).astype(numpy.float64) 

33 y[:, 1] = 1 - y[:, 0] 

34 return y 

35 

36 

37class NeuralTreeNet(_TrainingAPI): 

38 """ 

39 Node ensemble. 

40 

41 .. runpython:: 

42 :showcode: 

43 

44 import numpy 

45 from mlstatpy.ml.neural_tree import NeuralTreeNode, NeuralTreeNet 

46 

47 w1 = numpy.array([-0.5, 0.8, -0.6]) 

48 

49 neu = NeuralTreeNode(w1[1:], bias=w1[0], activation='sigmoid') 

50 net = NeuralTreeNet(2, empty=True) 

51 net.append(neu, numpy.arange(2)) 

52 

53 ide = NeuralTreeNode(numpy.array([1.]), 

54 bias=numpy.array([0.]), 

55 activation='identity') 

56 

57 net.append(ide, numpy.arange(2, 3)) 

58 

59 X = numpy.abs(numpy.random.randn(10, 2)) 

60 pred = net.predict(X) 

61 print(pred) 

62 """ 

63 

64 def __init__(self, dim, empty=True): 

65 """ 

66 @param dim space dimension 

67 @param empty empty network, other adds an identity node 

68 """ 

69 self.dim = dim 

70 if empty: 

71 self.nodes = [] 

72 self.nodes_attr = [] 

73 else: 

74 self.nodes = [ 

75 NeuralTreeNode( 

76 numpy.ones((dim,), dtype=numpy.float64), 

77 bias=numpy.float64(0.), 

78 activation='identity', nodeid=0)] 

79 self.nodes_attr = [dict(inputs=numpy.arange(0, dim), output=dim, 

80 coef_size=self.nodes[0].coef.size, 

81 first_coef=0)] 

82 self._update_members() 

83 

84 def copy(self): 

85 st = BytesIO() 

86 pickle.dump(self, st) 

87 cop = BytesIO(st.getvalue()) 

88 return pickle.load(cop) 

89 

90 def _update_members(self, node=None, attr=None): 

91 "Updates internal members." 

92 if node is None or attr is None: 

93 if len(self.nodes_attr) == 0: 

94 self.size_ = self.dim 

95 else: 

96 self.size_ = max(d['output'] for d in self.nodes_attr) + 1 

97 self.output_to_node_ = {} 

98 self.input_to_node_ = {} 

99 for node2, attr2 in zip(self.nodes, self.nodes_attr): 

100 if isinstance(attr2['output'], list): 

101 for o in attr2['output']: 

102 self.output_to_node_[o] = node2, attr2 

103 else: 

104 self.output_to_node_[attr2['output']] = node2, attr2 

105 for i in attr2['inputs']: 

106 self.input_to_node_[i] = node2, attr2 

107 else: 

108 if len(node.input_weights.shape) == 1: 

109 self.size_ += 1 

110 else: 

111 self.size_ += node.input_weights.shape[0] 

112 if isinstance(attr['output'], list): 

113 for o in attr['output']: 

114 self.output_to_node_[o] = node, attr 

115 else: 

116 self.output_to_node_[attr['output']] = node, attr 

117 for i in attr['inputs']: 

118 self.input_to_node_[i] = node, attr 

119 

120 def __repr__(self): 

121 "usual" 

122 return "%s(%d)" % (self.__class__.__name__, self.dim) 

123 

124 def clear(self): 

125 "Clear all nodes" 

126 del self.nodes[:] 

127 del self.nodes_attr[:] 

128 self._update_members() 

129 

130 def append(self, node, inputs): 

131 """ 

132 Appends a node into the graph. 

133 

134 @param node node to add 

135 @param inputs index of input nodes 

136 """ 

137 if len(node.input_weights.shape) == 1: 

138 if node.input_weights.shape[0] != len(inputs): 

139 raise RuntimeError( 

140 "Dimension mismatch between weights [{}] and inputs [{}].".format( 

141 node.input_weights.shape[0], len(inputs))) 

142 node.nodeid = len(self.nodes) 

143 self.nodes.append(node) 

144 first_coef = ( 

145 0 if len(self.nodes_attr) == 0 else 

146 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size']) 

147 attr = dict(inputs=numpy.array(inputs), output=self.size_, 

148 coef_size=node.coef.size, first_coef=first_coef) 

149 self.nodes_attr.append(attr) 

150 elif len(node.input_weights.shape) == 2: 

151 if node.input_weights.shape[1] != len(inputs): 

152 raise RuntimeError( # pragma: no cover 

153 "Dimension mismatch between weights [{}] and inputs [{}].".format( 

154 node.input_weights.shape[1], len(inputs))) 

155 node.nodeid = len(self.nodes) 

156 self.nodes.append(node) 

157 first_coef = ( 

158 0 if len(self.nodes_attr) == 0 else 

159 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size']) 

160 attr = dict(inputs=numpy.array(inputs), 

161 output=list(range(self.size_, self.size_ + 

162 node.input_weights.shape[0])), 

163 coef_size=node.coef.size, first_coef=first_coef) 

164 self.nodes_attr.append(attr) 

165 else: 

166 raise RuntimeError( # pragma: no cover 

167 "Coefficients should have 1 or 2 dimension not {}.".format(node.input_weights.shape)) 

168 self._update_members(node, attr) 

169 

170 def __getitem__(self, i): 

171 "Retrieves node and attributes for node i." 

172 return self.nodes[i], self.nodes_attr[i] 

173 

174 def __len__(self): 

175 "Returns the number of nodes" 

176 return len(self.nodes) 

177 

178 def _predict_one(self, X): 

179 res = numpy.zeros((self.size_,), dtype=numpy.float64) 

180 res[:self.dim] = X 

181 for node, attr in zip(self.nodes, self.nodes_attr): 

182 res[attr['output']] = node.predict(res[attr['inputs']]) 

183 return res 

184 

185 def predict(self, X): 

186 if len(X.shape) == 2: 

187 res = numpy.zeros((X.shape[0], self.size_)) 

188 for i, x in enumerate(X): 

189 res[i, :] = self._predict_one(x) 

190 return res 

191 return self._predict_one(X) 

192 

193 @staticmethod 

194 def create_from_tree(tree, k=1., arch='one'): 

195 """ 

196 Creates a @see cl NeuralTreeNet instance from a 

197 :epkg:`DecisionTreeClassifier` 

198 

199 @param tree :epkg:`DecisionTreeClassifier` 

200 @param k slant of the sigmoïd 

201 @param arch architecture, see below 

202 @return @see cl NeuralTreeNet 

203 

204 The function only works for binary problems. 

205 Available architecture: 

206 * `'one'`: the method adds nodes with one output, there 

207 is no soecific definition of layers, 

208 * `'compact'`: the adds two nodes, the first computes 

209 the threshold, the second one computes the leaves 

210 output, a final node merges all outputs into one 

211 

212 See notebook :ref:`neuraltreerst` for examples. 

213 """ 

214 if arch == 'one': 

215 return NeuralTreeNet._create_from_tree_one(tree, k) 

216 if arch == 'compact': 

217 return NeuralTreeNet._create_from_tree_compact(tree, k) 

218 raise ValueError("Unknown arch value '{}'.".format(arch)) 

219 

220 @staticmethod 

221 def _create_from_tree_one(tree, k=1.): 

222 "Implements strategy one. See @see meth create_from_tree." 

223 

224 if tree.n_classes_ > 2: 

225 raise RuntimeError( 

226 "The function only support binary classification problem.") 

227 

228 n_nodes = tree.tree_.node_count 

229 children_left = tree.tree_.children_left 

230 children_right = tree.tree_.children_right 

231 feature = tree.tree_.feature 

232 threshold = tree.tree_.threshold 

233 value = tree.tree_.value.reshape((-1, 2)) 

234 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64) 

235 max_features_ = tree.max_features_ 

236 

237 root = NeuralTreeNet(tree.max_features_, empty=True) 

238 feat_index = numpy.arange(0, max_features_) 

239 predecessor = {} 

240 outputs = {i: [] for i in range(0, tree.n_classes_)} 

241 for i in range(n_nodes): 

242 

243 if children_left[i] != children_right[i]: 

244 # node with a threshold 

245 # right side 

246 coef = numpy.zeros((max_features_,), dtype=numpy.float64) 

247 coef[feature[i]] = -k 

248 node_th = NeuralTreeNode(coef, bias=k * threshold[i], 

249 activation='sigmoid4', tag="N%d-th" % i) 

250 root.append(node_th, feat_index) 

251 

252 if i in predecessor: 

253 pred = predecessor[i] 

254 node1 = pred 

255 node2 = node_th 

256 attr1 = root[node1.nodeid][1] 

257 attr2 = root[node2.nodeid][1] 

258 

259 coef = numpy.ones((2,), dtype=numpy.float64) * k 

260 node_true = NeuralTreeNode(coef, bias=-k * 1.5, 

261 activation='sigmoid4', 

262 tag="N%d-T" % i) 

263 root.append(node_true, [attr1['output'], attr2['output']]) 

264 

265 coef = numpy.zeros((2,), dtype=numpy.float64) 

266 coef[0] = k 

267 coef[1] = -k 

268 node_false = NeuralTreeNode(coef, bias=-k * 0.25, 

269 activation='sigmoid4', 

270 tag="N%d-F" % i) 

271 root.append(node_false, [attr1['output'], attr2['output']]) 

272 

273 predecessor[children_left[i]] = node_true 

274 predecessor[children_right[i]] = node_false 

275 else: 

276 coef = numpy.ones((1,), dtype=numpy.float64) * -1 

277 node_false = NeuralTreeNode( 

278 coef, bias=1, activation='identity', tag="N%d-F" % i) 

279 attr = root[node_th.nodeid][1] 

280 root.append(node_false, [attr['output']]) 

281 

282 predecessor[children_left[i]] = node_th 

283 predecessor[children_right[i]] = node_false 

284 

285 elif i in predecessor: 

286 # leave 

287 outputs[output_class[i]].append(predecessor[i]) 

288 

289 # final node 

290 output = [] 

291 index = [0] 

292 nb = [] 

293 for i in range(0, tree.n_classes_): 

294 output.extend(outputs[i]) 

295 nb.append(len(outputs[i])) 

296 index.append(len(outputs[i]) + index[-1]) 

297 coef = numpy.zeros((len(nb), len(output)), dtype=numpy.float64) 

298 for i in range(0, tree.n_classes_): 

299 coef[i, index[i]:index[i + 1]] = k 

300 feat = [root[n.nodeid][1]['output'] for n in output] 

301 root.append( 

302 NeuralTreeNode(coef, bias=(-k / 2) * len(feat), 

303 activation='softmax4', tag="Nfinal"), 

304 feat) 

305 

306 # final 

307 return root 

308 

309 @staticmethod 

310 def _create_from_tree_compact(tree, k=1.): 

311 "Implements strategy one. See @see meth create_from_tree." 

312 

313 if tree.n_classes_ > 2: 

314 raise RuntimeError( 

315 "The function only support binary classification problem.") 

316 

317 n_nodes = tree.tree_.node_count 

318 children_left = tree.tree_.children_left 

319 children_right = tree.tree_.children_right 

320 feature = tree.tree_.feature 

321 threshold = tree.tree_.threshold 

322 value = tree.tree_.value.reshape((-1, 2)) 

323 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64) 

324 max_features_ = tree.max_features_ 

325 feat_index = numpy.arange(0, max_features_) 

326 

327 root = NeuralTreeNet(tree.max_features_, empty=True) 

328 coef1 = [] 

329 bias1 = [] 

330 parents = {} 

331 rows = {} 

332 

333 # first pass: threshold 

334 

335 for i in range(n_nodes): 

336 if children_left[i] == children_right[i]: 

337 # leaves 

338 continue 

339 rows[i] = len(coef1) 

340 parents[children_left[i]] = i 

341 parents[children_right[i]] = i 

342 coef = numpy.zeros((max_features_,), dtype=numpy.float64) 

343 coef[feature[i]] = -k 

344 coef1.append(coef) 

345 bias1.append(k * threshold[i]) 

346 

347 coef1 = numpy.vstack(coef1) 

348 if len(bias1) == 1: 

349 bias1 = bias1[0] 

350 node1 = NeuralTreeNode( 

351 coef1 if coef1.shape[0] > 1 else coef1[0], bias=bias1, 

352 activation='sigmoid4', tag="threshold") 

353 root.append(node1, feat_index) 

354 th_index = numpy.arange(max_features_, max_features_ + coef1.shape[0]) 

355 

356 # second pass: decision path 

357 coef2 = [] 

358 bias2 = [] 

359 output = [] 

360 

361 for i in range(n_nodes): 

362 if children_left[i] != children_right[i]: 

363 # not a leave 

364 continue 

365 

366 path = [] 

367 last = i 

368 lr = "class", output_class[i] 

369 output.append(output_class[i]) 

370 while last is not None: 

371 path.append((last, lr)) 

372 if last not in parents: 

373 break 

374 par = parents[last] 

375 if children_right[par] == last: 

376 lr = 'right' 

377 elif children_left[par] == last: 

378 lr = 'left' 

379 else: 

380 raise RuntimeError( # pragma: no cover 

381 "Inconsistent tree structure.") 

382 last = par 

383 

384 coef = numpy.zeros((coef1.shape[0], ), dtype=numpy.float64) 

385 bias = 0. 

386 for ip, lr in path: 

387 if isinstance(lr, tuple): 

388 lr, value = lr 

389 if lr != 'class': 

390 raise RuntimeError( 

391 "algorithm issue") # pragma: no cover 

392 else: 

393 r = rows[ip] 

394 if lr == 'right': 

395 coef[r] = k 

396 bias -= k / 2 

397 else: 

398 coef[r] = -k 

399 bias += k / 2 

400 coef2.append(coef) 

401 bias2.append(bias) 

402 

403 coef2 = numpy.vstack(coef2) 

404 if len(bias2) == 1: 

405 bias2 = bias2[0] 

406 node2 = NeuralTreeNode( 

407 coef2 if coef2.shape[0] > 1 else coef2[0], bias=bias2, 

408 activation='sigmoid4', tag="pathes") 

409 root.append(node2, th_index) 

410 

411 # final node 

412 coef = numpy.zeros( 

413 (tree.n_classes_, coef2.shape[0]), dtype=numpy.float64) 

414 bias = [0. for i in range(tree.n_classes_)] 

415 for i, cls in enumerate(output): 

416 coef[cls, i] = -k 

417 coef[1 - cls, i] = k 

418 bias[cls] += k / 2 

419 bias[1 - cls] += -k / 2 

420 findex = numpy.arange(max_features_ + coef1.shape[0], 

421 max_features_ + coef1.shape[0] + coef2.shape[0]) 

422 root.append( 

423 NeuralTreeNode(coef, bias=bias, 

424 activation='softmax4', tag="final"), 

425 findex) 

426 

427 # end 

428 return root 

429 

430 def to_dot(self, X=None): 

431 """ 

432 Exports the neural network into :epkg:`dot`. 

433 

434 @param X input as an example 

435 """ 

436 y = None 

437 if X is not None: 

438 y = self.predict(X) 

439 rows = ['digraph Tree {', 

440 "node [shape=box, fontsize=10];", 

441 "edge [fontsize=8];"] 

442 for i in range(self.dim): 

443 if y is None: 

444 rows.append('{0} [label="X[{0}]"];'.format(i)) 

445 else: 

446 rows.append( 

447 '{0} [label="X[{0}]=\\n{1:1.2f}"];'.format(i, X[i])) 

448 

449 labels = {} 

450 

451 for i in range(0, len(self)): # pylint: disable=C0200 

452 o = self[i][1]['output'] 

453 if isinstance(o, int): 

454 lo = str(o) 

455 labels[o] = lo 

456 lof = "%s" 

457 else: 

458 lo = "s" + 'a'.join(map(str, o)) 

459 for oo in o: 

460 labels[oo] = '{}:f{}'.format(lo, oo) 

461 los = "|".join("<f{0}> {0}".format(oo) for oo in o) 

462 lof = "%s&#92;n" + los 

463 

464 a = "a={}\n".format(self[i][0].activation) 

465 stag = "" if self[i][0].tag is None else (self[i][0].tag + "\\n") 

466 bias = str(numpy.array(self[i][0].bias)).replace(" ", "&#92; ") 

467 if y is None: 

468 lab = lof % '{}{}id={} b={} s={}'.format( 

469 stag, a, i, bias, self[i][0].n_outputs) 

470 else: 

471 yo = numpy.array(y[o]) 

472 lab = lof % '{}{}id={} b={} s={}\ny={}'.format( 

473 stag, a, i, bias, self[i][0].n_outputs, yo) 

474 rows.append('{} [label="{}"];'.format( 

475 lo, lab.replace("\n", "&#92;n"))) 

476 for ii, inp in enumerate(self[i][1]['inputs']): 

477 if isinstance(o, int): 

478 w = self[i][0].input_weights[ii] 

479 if w == 0: 

480 c = ', color=grey, fontcolor=grey' 

481 elif w < 0: 

482 c = ', color=red, fontcolor=red' 

483 else: 

484 c = ', color=blue, fontcolor=blue' 

485 rows.append( 

486 '{} -> {} [label="{}"{}];'.format(inp, o, w, c)) 

487 continue 

488 

489 w = self[i][0].input_weights[:, ii] 

490 for oi, oo in enumerate(o): 

491 if w[oi] == 0: 

492 c = ', color=grey, fontcolor=grey' 

493 elif w[oi] < 0: 

494 c = ', color=red, fontcolor=red' 

495 else: 

496 c = ', color=blue, fontcolor=blue' 

497 rows.append('{} -> {} [label="{}|{}"{}];'.format( 

498 labels.get(inp, inp), labels[oo], oi, w[oi], c)) 

499 

500 rows.append('}') 

501 return '\n'.join(rows) 

502 

503 @property 

504 def shape(self): 

505 "Returns the shape of the coefficients." 

506 return (sum(n.coef.size for n in self.nodes), ) 

507 

508 @property 

509 def training_weights(self): 

510 "Returns the weights." 

511 sh = self.shape 

512 res = numpy.empty(sh[0], dtype=numpy.float64) 

513 pos = 0 

514 for n in self.nodes: 

515 s = n.coef.size 

516 res[pos: pos + s] = ( 

517 n.coef if len(n.coef.shape) == 1 else n.coef.ravel()) 

518 pos += s 

519 return res 

520 

521 def update_training_weights(self, X, add=True): # pylint: disable=W0237 

522 """ 

523 Updates weights. 

524 

525 :param grad: vector to add to the weights such as gradient 

526 :param add: addition or replace 

527 """ 

528 pos = 0 

529 if add: 

530 for n in self.nodes: 

531 s = n.coef.size 

532 n.coef += X[pos: pos + s].reshape(n.coef.shape) 

533 pos += s 

534 else: 

535 for n in self.nodes: 

536 s = n.coef.size 

537 numpy.copyto(n.coef, X[pos: pos + s].reshape(n.coef.shape)) 

538 pos += s 

539 

540 def fill_cache(self, X): 

541 """ 

542 Creates a cache with intermediate results. 

543 """ 

544 big_cache = {} 

545 res = numpy.zeros((self.size_,), dtype=numpy.float64) 

546 res[:self.dim] = X 

547 for node, attr in zip(self.nodes, self.nodes_attr): 

548 cache = node.fill_cache(res[attr['inputs']]) 

549 big_cache[node.nodeid] = cache 

550 res[attr['output']] = cache['aX'] 

551 big_cache[-1] = res 

552 return big_cache 

553 

554 def _get_output_node_attr(self, nb_last): 

555 """ 

556 Retrieves the output nodes. 

557 *nb_last* is the number of expected outputs. 

558 """ 

559 neurones = set(self.output_to_node_[i][0].nodeid 

560 for i in range(self.size_ - nb_last, self.size_)) 

561 if len(neurones) != 1: 

562 raise RuntimeError( # pragma: no cover 

563 "Only one output node is implemented not {}".format( 

564 len(neurones))) 

565 return self.output_to_node_[self.size_ - 1] 

566 

567 def _common_loss_dloss(self, X, y, cache=None): 

568 """ 

569 Common beginning to methods *loss*, *dlossds*, 

570 *dlossdw*. 

571 """ 

572 last = 1 if len(y.shape) <= 1 else y.shape[1] 

573 if cache is not None and -1 in cache: 

574 res = cache[-1] 

575 else: 

576 res = self.predict(X) 

577 if len(res.shape) == 2: 

578 pred = res[:, -last:] 

579 else: 

580 pred = res[-last:] 

581 last_node, last_attr = self._get_output_node_attr(last) 

582 return res, pred, last_node, last_attr 

583 

584 def loss(self, X, y, cache=None): 

585 """ 

586 Computes the loss due to prediction error. Returns a float. 

587 """ 

588 res, _, last_node, last_attr = self._common_loss_dloss( 

589 X, y, cache=cache) 

590 if len(res.shape) <= 1: 

591 return last_node.loss(res[last_attr['inputs']], y) # pylint: disable=E1120 

592 return last_node.loss(res[:, last_attr['inputs']], y) # pylint: disable=E1120 

593 

594 def dlossds(self, X, y, cache=None): 

595 """ 

596 Computes the loss derivative against the inputs. 

597 """ 

598 res, _, last_node, last_attr = self._common_loss_dloss( 

599 X, y, cache=cache) 

600 if len(res.shape) <= 1: 

601 return last_node.dlossds(res[last_attr['inputs']], y) # pylint: disable=E1120 

602 return last_node.dlossds(res[:, last_attr['inputs']], y) # pylint: disable=E1120 

603 

604 def gradient_backward(self, graddx, X, inputs=False, cache=None): 

605 """ 

606 Computes the gradient in X. 

607 

608 :param graddx: existing gradient against the inputs 

609 :param X: computes the gradient in X 

610 :param inputs: if False, derivative against the coefficients, 

611 otherwise against the inputs. 

612 :param cache: cache intermediate results to avoid more computation 

613 :return: gradient 

614 """ 

615 if cache is None: 

616 cache = self.fill_cache(X) 

617 shape = self.training_weights.shape 

618 pred = self.predict(X) 

619 

620 whole_gradx = numpy.zeros(pred.shape, dtype=numpy.float64) 

621 whole_gradw = numpy.zeros(shape, dtype=numpy.float64) 

622 if len(graddx.shape) == 0: 

623 whole_gradx[-1] = graddx 

624 else: 

625 whole_gradx[-graddx.shape[0]:] = graddx 

626 

627 for node, attr in zip(self.nodes[::-1], self.nodes_attr[::-1]): 

628 ch = cache[node.nodeid] 

629 

630 node_graddx = whole_gradx[attr['output']] 

631 xi = pred[attr['inputs']] 

632 

633 temp_gradw = node.gradient_backward( 

634 node_graddx, xi, inputs=False, cache=ch) 

635 temp_gradx = node.gradient_backward( 

636 node_graddx, xi, inputs=True, cache=ch) 

637 

638 whole_gradw[attr['first_coef']:attr['first_coef'] + 

639 attr['coef_size']] += temp_gradw.reshape((attr['coef_size'],)) 

640 whole_gradx[attr['inputs'] 

641 ] += temp_gradx.reshape((len(attr['inputs']),)) 

642 

643 if inputs: 

644 return whole_gradx 

645 return whole_gradw