Coverage for src/mlstatpy/ml/neural

1# -*- coding: utf-8 -*-

2"""

3@file

4@brief Conversion from tree to neural network.

5"""

6from io import BytesIO

7import pickle

8import numpy

9from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin

10from sklearn.tree import BaseDecisionTree

11from ._neural_tree_api import _TrainingAPI

12from ._neural_tree_node import NeuralTreeNode

15def label_class_to_softmax_output(y_label):

16 """

17 Converts a binary class label into a matrix

18 with two columns of probabilities.

20 .. runpython::

21 :showcode:

23 import numpy

24 from mlstatpy.ml.neural_tree import label_class_to_softmax_output

26 y_label = numpy.array([0, 1, 0, 0])

27 soft_y = label_class_to_softmax_output(y_label)

28 print(soft_y)

29 """

30 if len(y_label.shape) != 1:

31 raise ValueError(

32 f"y_label must be a vector but has shape {y_label.shape}.")

33 y = numpy.empty((y_label.shape[0], 2), dtype=numpy.float64)

34 y[:, 0] = (y_label < 0.5).astype(numpy.float64)

35 y[:, 1] = 1 - y[:, 0]

36 return y

39class NeuralTreeNet(_TrainingAPI):

40 """

41 Node ensemble.

43 :param dim: space dimension

44 :param empty: empty network, other adds an identity node

46 .. runpython::

47 :showcode:

49 import numpy

50 from mlstatpy.ml.neural_tree import NeuralTreeNode, NeuralTreeNet

52 w1 = numpy.array([-0.5, 0.8, -0.6])

54 neu = NeuralTreeNode(w1[1:], bias=w1[0], activation='sigmoid')

55 net = NeuralTreeNet(2, empty=True)

56 net.append(neu, numpy.arange(2))

58 ide = NeuralTreeNode(numpy.array([1.]),

59 bias=numpy.array([0.]),

60 activation='identity')

62 net.append(ide, numpy.arange(2, 3))

64 X = numpy.abs(numpy.random.randn(10, 2))

65 pred = net.predict(X)

66 print(pred)

67 """

69 def __init__(self, dim, empty=True):

70 self.dim = dim

71 if empty:

72 self.nodes = []

73 self.nodes_attr = []

74 else:

75 self.nodes = [

76 NeuralTreeNode(

77 numpy.ones((dim,), dtype=numpy.float64),

78 bias=numpy.float64(0.),

79 activation='identity', nodeid=0)]

80 self.nodes_attr = [dict(inputs=numpy.arange(0, dim), output=dim,

81 coef_size=self.nodes[0].coef.size,

82 first_coef=0)]

83 self._update_members()

85 def copy(self):

86 st = BytesIO()

87 pickle.dump(self, st)

88 cop = BytesIO(st.getvalue())

89 return pickle.load(cop)

91 def _update_members(self, node=None, attr=None):

92 "Updates internal members."

93 if node is None or attr is None:

94 if len(self.nodes_attr) == 0:

95 self.size_ = self.dim

96 else:

97 self.size_ = max(d['output'] for d in self.nodes_attr) + 1

98 self.output_to_node_ = {}

99 self.input_to_node_ = {}

100 for node2, attr2 in zip(self.nodes, self.nodes_attr):

101 if isinstance(attr2['output'], list):

102 for o in attr2['output']:

103 self.output_to_node_[o] = node2, attr2

104 else:

105 self.output_to_node_[attr2['output']] = node2, attr2

106 for i in attr2['inputs']:

107 self.input_to_node_[i] = node2, attr2

108 else:

109 if len(node.input_weights.shape) == 1:

110 self.size_ += 1

111 else:

112 self.size_ += node.input_weights.shape[0]

113 if isinstance(attr['output'], list):

114 for o in attr['output']:

115 self.output_to_node_[o] = node, attr

116 else:

117 self.output_to_node_[attr['output']] = node, attr

118 for i in attr['inputs']:

119 self.input_to_node_[i] = node, attr

120

121 def __repr__(self):

122 "usual"

123 return "%s(%d)" % (self.__class__.__name__, self.dim)

124

125 def clear(self):

126 "Clear all nodes"

127 del self.nodes[:]

128 del self.nodes_attr[:]

129 self._update_members()

130

131 def append(self, node, inputs):

132 """

133 Appends a node into the graph.

134

135 :param node: node to add

136 :param inputs: index of input nodes

137 """

138 if len(node.input_weights.shape) == 1:

139 if node.input_weights.shape[0] != len(inputs):

140 raise RuntimeError(

141 f"Dimension mismatch between weights "

142 f"[{node.input_weights.shape[0]}] "

143 f"and inputs [{len(inputs)}].")

144 node.nodeid = len(self.nodes)

145 self.nodes.append(node)

146 first_coef = (

147 0 if len(self.nodes_attr) == 0 else

148 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size'])

149 attr = dict(inputs=numpy.array(inputs), output=self.size_,

150 coef_size=node.coef.size, first_coef=first_coef)

151 self.nodes_attr.append(attr)

152 elif len(node.input_weights.shape) == 2:

153 if node.input_weights.shape[1] != len(inputs):

154 raise RuntimeError( # pragma: no cover

155 f"Dimension mismatch between weights "

156 f"[{node.input_weights.shape[1]}] "

157 f"and inputs [{len(inputs)}], tag={node.tag!r}, "

158 f"node={node!r}.")

159 node.nodeid = len(self.nodes)

160 self.nodes.append(node)

161 first_coef = (

162 0 if len(self.nodes_attr) == 0 else

163 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size'])

164 attr = dict(inputs=numpy.array(inputs),

165 output=list(range(self.size_, self.size_ +

166 node.input_weights.shape[0])),

167 coef_size=node.coef.size, first_coef=first_coef)

168 self.nodes_attr.append(attr)

169 else:

170 raise RuntimeError( # pragma: no cover

171 f"Coefficients should have 1 or 2 dimension not "

172 f"{node.input_weights.shape}.")

173 self._update_members(node, attr)

174

175 def __getitem__(self, i):

176 "Retrieves node and attributes for node i."

177 return self.nodes[i], self.nodes_attr[i]

178

179 def __len__(self):

180 "Returns the number of nodes"

181 return len(self.nodes)

182

183 def _predict_one(self, X):

184 res = numpy.zeros((self.size_,), dtype=numpy.float64)

185 res[:self.dim] = X

186 for node, attr in zip(self.nodes, self.nodes_attr):

187 res[attr['output']] = node.predict(res[attr['inputs']])

188 return res

189

190 def predict(self, X):

191 if len(X.shape) == 2:

192 res = numpy.zeros((X.shape[0], self.size_))

193 for i, x in enumerate(X):

194 res[i, :] = self._predict_one(x)

195 return res

196 return self._predict_one(X)

197

198 @staticmethod

199 def create_from_tree(tree, k=1., arch='one'):

200 """

201 Creates a @see cl NeuralTreeNet instance from a

202 :epkg:`DecisionTreeClassifier`

203

204 :param tree: :epkg:`DecisionTreeClassifier`

205 :param k: slant of the sigmoïd

206 :param arch: architecture, see below

207 :return: @see cl NeuralTreeNet

208

209 The function only works for binary problems.

210 Available architecture:

211 * `'one'`: the method adds nodes with one output, there

212 is no soecific definition of layers,

213 * `'compact'`: the adds two nodes, the first computes

214 the threshold, the second one computes the leaves

215 output, a final node merges all outputs into one

216

217 See notebook :ref:`neuraltreerst` for examples.

218 """

219 if arch == 'one':

220 return NeuralTreeNet._create_from_tree_one(tree, k)

221 if arch == 'compact':

222 return NeuralTreeNet._create_from_tree_compact(tree, k)

223 raise ValueError(f"Unknown arch value '{arch}'.")

224

225 @staticmethod

226 def _create_from_tree_one(tree, k=1.):

227 "Implements strategy 'one'. See @see meth create_from_tree."

228

229 if not isinstance(tree, BaseDecisionTree):

230 raise TypeError( # pragma: no cover

231 f"Only decision tree as supported not {type(tree)!r}.")

232 if not isinstance(tree, ClassifierMixin):

233 raise TypeError( # pragma: no cover

234 f"Only a classifier can be converted by this function "

235 f"not {type(tree)!r}, arch='compact' should be used.")

236 if tree.n_classes_ > 2:

237 raise RuntimeError( # pragma: no cover

238 "The function only supports binary classification problem.")

239

240 n_nodes = tree.tree_.node_count

241 children_left = tree.tree_.children_left

242 children_right = tree.tree_.children_right

243 feature = tree.tree_.feature

244 threshold = tree.tree_.threshold

245 value = tree.tree_.value.reshape((-1, 2))

246 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64)

247 max_features_ = tree.max_features_

248

249 root = NeuralTreeNet(tree.max_features_, empty=True)

250 feat_index = numpy.arange(0, max_features_)

251 predecessor = {}

252 outputs = {i: [] for i in range(0, tree.n_classes_)}

253 for i in range(n_nodes):

254

255 if children_left[i] != children_right[i]:

256 # node with a threshold

257 # right side

258 coef = numpy.zeros((max_features_,), dtype=numpy.float64)

259 coef[feature[i]] = -k

260 node_th = NeuralTreeNode(coef, bias=k * threshold[i],

261 activation='sigmoid4', tag="N%d-th" % i)

262 root.append(node_th, feat_index)

263

264 if i in predecessor:

265 pred = predecessor[i]

266 node1 = pred

267 node2 = node_th

268 attr1 = root[node1.nodeid][1]

269 attr2 = root[node2.nodeid][1]

270

271 coef = numpy.ones((2,), dtype=numpy.float64) * k

272 node_true = NeuralTreeNode(coef, bias=-k * 1.5,

273 activation='sigmoid4',

274 tag="N%d-T" % i)

275 root.append(node_true, [attr1['output'], attr2['output']])

276

277 coef = numpy.zeros((2,), dtype=numpy.float64)

278 coef[0] = k

279 coef[1] = -k

280 node_false = NeuralTreeNode(coef, bias=-k * 0.25,

281 activation='sigmoid4',

282 tag="N%d-F" % i)

283 root.append(node_false, [attr1['output'], attr2['output']])

284

285 predecessor[children_left[i]] = node_true

286 predecessor[children_right[i]] = node_false

287 else:

288 coef = numpy.ones((1,), dtype=numpy.float64) * -1

289 node_false = NeuralTreeNode(

290 coef, bias=1, activation='identity', tag="N%d-F" % i)

291 attr = root[node_th.nodeid][1]

292 root.append(node_false, [attr['output']])

293

294 predecessor[children_left[i]] = node_th

295 predecessor[children_right[i]] = node_false

296

297 elif i in predecessor:

298 # leave

299 outputs[output_class[i]].append(predecessor[i])

300

301 # final node

302 output = []

303 index = [0]

304 nb = []

305 for i in range(0, tree.n_classes_):

306 output.extend(outputs[i])

307 nb.append(len(outputs[i]))

308 index.append(len(outputs[i]) + index[-1])

309 coef = numpy.zeros((len(nb), len(output)), dtype=numpy.float64)

310 for i in range(0, tree.n_classes_):

311 coef[i, index[i]:index[i + 1]] = k

312 feat = [root[n.nodeid][1]['output'] for n in output]

313 root.append(

314 NeuralTreeNode(coef, bias=(-k / 2) * len(feat),

315 activation='softmax4', tag="Nfinal"),

316 feat)

317

318 # final

319 return root

320

321 @staticmethod

322 def _create_from_tree_compact(tree, k=1.):

323 "Implements strategy 'compact'. See @see meth create_from_tree."

324 if not isinstance(tree, BaseDecisionTree):

325 raise TypeError( # pragma: no cover

326 f"Only decision tree as supported not {type(tree)!r}.")

327 if isinstance(tree, ClassifierMixin):

328 is_classifier = True

329 if tree.n_classes_ > 2:

330 raise RuntimeError( # pragma: no cover

331 "The function only supports binary classification problem.")

332 else:

333 is_classifier = False

334 if tree.n_outputs_ != 1:

335 raise RuntimeError( # pragma: no cover

336 "The function only supports single regression problem.")

337

338 n_nodes = tree.tree_.node_count

339 children_left = tree.tree_.children_left

340 children_right = tree.tree_.children_right

341 feature = tree.tree_.feature

342 threshold = tree.tree_.threshold

343 if is_classifier:

344 value = tree.tree_.value.reshape((-1, 2))

345 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64)

346 else:

347 output_value = tree.tree_.value.ravel()

348 max_features_ = tree.max_features_

349 feat_index = numpy.arange(0, max_features_)

350

351 root = NeuralTreeNet(tree.max_features_, empty=True)

352 coef1 = []

353 bias1 = []

354 parents = {}

355 rows = {}

356

357 # first pass: threshold

358

359 for i in range(n_nodes):

360 if children_left[i] == children_right[i]:

361 # leaves

362 continue

363 rows[i] = len(coef1)

364 parents[children_left[i]] = i

365 parents[children_right[i]] = i

366 coef = numpy.zeros((max_features_,), dtype=numpy.float64)

367 coef[feature[i]] = -k

368 coef1.append(coef)

369 bias1.append(k * threshold[i])

370

371 coef1 = numpy.vstack(coef1)

372 if len(bias1) == 1:

373 bias1 = bias1[0]

374 node1 = NeuralTreeNode(

375 coef1 if coef1.shape[0] > 1 else coef1[0], bias=bias1,

376 activation='sigmoid4', tag="threshold")

377 root.append(node1, feat_index)

378 th_index = numpy.arange(max_features_, max_features_ + coef1.shape[0])

379

380 # second pass: decision path

381 coef2 = []

382 bias2 = []

383 output = []

384 paths = []

385

386 for i in range(n_nodes):

387 if children_left[i] != children_right[i]:

388 # not a leave

389 continue

390

391 path = []

392 last = i

393 if is_classifier:

394 lr = "class", output_class[i]

395 output.append(output_class[i])

396 else:

397 lr = "reg", output_value[i]

398 output.append(output_value[i])

399 while last is not None:

400 path.append((last, lr))

401 if last not in parents:

402 break

403 par = parents[last]

404 if children_right[par] == last:

405 lr = 'right'

406 elif children_left[par] == last:

407 lr = 'left'

408 else:

409 raise RuntimeError( # pragma: no cover

410 "Inconsistent tree structure.")

411 last = par

412

413 coef = numpy.zeros((coef1.shape[0], ), dtype=numpy.float64)

414 # This bias is different from the one implemented in

415 # _create_from_tree_one where bias=0.

416 bias = - k * (len(path) - 2) / 2

417 for ip, lr in path:

418 if isinstance(lr, tuple):

419 lr, value = lr

420 if lr not in ('class', 'reg'):

421 raise RuntimeError( # pragma: no cover

422 "algorithm issue")

423 else:

424 r = rows[ip]

425 # coefficients are the opposite in _create_from_tree_one

426 if lr == 'right':

427 coef[r] = -k

428 bias += k / 2

429 else:

430 coef[r] = k

431 bias -= k / 2

432 coef2.append(coef)

433 bias2.append(bias)

434 paths.append(path)

435

436 coef2 = numpy.vstack(coef2)

437 if len(bias2) == 1:

438 bias2 = bias2[0]

439 node2 = NeuralTreeNode(

440 coef2 if coef2.shape[0] > 1 else coef2[0], bias=bias2,

441 activation='sigmoid4', tag="pathes")

442 root.append(node2, th_index)

443

444 # final node

445 n_outputs = tree.n_classes_ if is_classifier else tree.n_outputs_

446

447 index1 = max_features_ + coef1.shape[0]

448 index2 = index1 + coef2.shape[0]

449 findex = numpy.arange(index1, index2)

450

451 if is_classifier:

452 # coefficients are the opposite in _create_from_tree_one

453 coef = numpy.zeros(

454 (n_outputs, coef2.shape[0]), dtype=numpy.float64)

455 bias = numpy.zeros(n_outputs, dtype=numpy.float64)

456 for i, cls in enumerate(output):

457 coef[cls, i] = k

458 coef[1 - cls, i] = -k

459 bias[cls] -= k / 2

460 bias[1 - cls] += k / 2

461 root.append(

462 NeuralTreeNode(coef, bias=bias,

463 activation='softmax4', tag="final"),

464 findex)

465 else:

466 coef = numpy.array(output, dtype=numpy.float64)

467 bias = numpy.zeros(n_outputs, dtype=numpy.float64)

468 for i, reg in enumerate(output):

469 coef[i] = reg

470 root.append(

471 NeuralTreeNode(coef, bias=bias,

472 activation='identity', tag="final"),

473 findex)

474

475 # end

476 return root

477

478 def to_dot(self, X=None):

479 """

480 Exports the neural network into :epkg:`dot`.

481

482 :param X: input as an example

483 """

484 y = None

485 if X is not None:

486 y = self.predict(X)

487 rows = ['digraph Tree {',

488 "node [shape=box, fontsize=10];",

489 "edge [fontsize=8];"]

490 for i in range(self.dim):

491 if y is None:

492 rows.append('{0} [label="X[{0}]"];'.format(i))

493 else:

494 rows.append(

495 '{0} [label="X[{0}]=\\n{1:1.2f}"];'.format(i, X[i]))

496

497 labels = {}

498

499 for i in range(0, len(self)): # pylint: disable=C0200

500 o = self[i][1]['output']

501 if isinstance(o, int):

502 lo = str(o)

503 labels[o] = lo

504 lof = "%s"

505 else:

506 lo = "s" + 'a'.join(map(str, o))

507 for oo in o:

508 labels[oo] = f'{lo}:f{oo}'

509 los = "|".join("<f{0}> {0}".format(oo) for oo in o)

510 lof = "%s\n" + los

511

512 a = f"a={self[i][0].activation}\n"

513 stag = "" if self[i][0].tag is None else (self[i][0].tag + "\\n")

514 bias = str(numpy.array(self[i][0].bias)).replace(" ", "\ ")

515 if y is None:

516 lab = lof % f'{stag}{a}id={i} b={bias} s={self[i][0].n_outputs}'

517 else:

518 yo = numpy.array(y[o])

519 lab = lof % '{}{}id={} b={} s={}\ny={}'.format(

520 stag, a, i, bias, self[i][0].n_outputs, yo)

521 rows.append('{} [label="{}"];'.format(

522 lo, lab.replace("\n", "\n")))

523 for ii, inp in enumerate(self[i][1]['inputs']):

524 if isinstance(o, int):

525 w = self[i][0].input_weights[ii]

526 if w == 0:

527 c = ', color=grey, fontcolor=grey'

528 elif w < 0:

529 c = ', color=red, fontcolor=red'

530 else:

531 c = ', color=blue, fontcolor=blue'

532 rows.append(

533 f'{inp} -> {o} [label="{w}"{c}];')

534 continue

535

536 w = self[i][0].input_weights[:, ii]

537 for oi, oo in enumerate(o):

538 if w[oi] == 0:

539 c = ', color=grey, fontcolor=grey'

540 elif w[oi] < 0:

541 c = ', color=red, fontcolor=red'

542 else:

543 c = ', color=blue, fontcolor=blue'

544 rows.append('{} -> {} [label="{}|{}"{}];'.format(

545 labels.get(inp, inp), labels[oo], oi, w[oi], c))

546

547 rows.append('}')

548 return '\n'.join(rows)

549

550 @property

551 def shape(self):

552 "Returns the shape of the coefficients."

553 return (sum(n.coef.size for n in self.nodes), )

554

555 @property

556 def training_weights(self):

557 "Returns the weights."

558 sh = self.shape

559 res = numpy.empty(sh[0], dtype=numpy.float64)

560 pos = 0

561 for n in self.nodes:

562 s = n.coef.size

563 res[pos: pos + s] = (

564 n.coef if len(n.coef.shape) == 1 else n.coef.ravel())

565 pos += s

566 return res

567

568 def update_training_weights(self, X, add=True): # pylint: disable=W0237

569 """

570 Updates weights.

571

572 :param grad: vector to add to the weights such as gradient

573 :param add: addition or replace

574 """

575 pos = 0

576 if add:

577 for n in self.nodes:

578 s = n.coef.size

579 n.coef += X[pos: pos + s].reshape(n.coef.shape)

580 pos += s

581 else:

582 for n in self.nodes:

583 s = n.coef.size

584 numpy.copyto(n.coef, X[pos: pos + s].reshape(n.coef.shape))

585 pos += s

586

587 def fill_cache(self, X):

588 """

589 Creates a cache with intermediate results.

590 """

591 big_cache = {}

592 res = numpy.zeros((self.size_,), dtype=numpy.float64)

593 res[:self.dim] = X

594 for node, attr in zip(self.nodes, self.nodes_attr):

595 cache = node.fill_cache(res[attr['inputs']])

596 big_cache[node.nodeid] = cache

597 res[attr['output']] = cache['aX']

598 big_cache[-1] = res

599 return big_cache

600

601 def _get_output_node_attr(self, nb_last):

602 """

603 Retrieves the output nodes.

604 *nb_last* is the number of expected outputs.

605 """

606 neurones = set(self.output_to_node_[i][0].nodeid

607 for i in range(self.size_ - nb_last, self.size_))

608 if len(neurones) != 1:

609 raise RuntimeError( # pragma: no cover

610 f"Only one output node is implemented not {len(neurones)}")

611 return self.output_to_node_[self.size_ - 1]

612

613 def _common_loss_dloss(self, X, y, cache=None):

614 """

615 Common beginning to methods *loss*, *dlossds*,

616 *dlossdw*.

617 """

618 last = 1 if len(y.shape) <= 1 else y.shape[1]

619 if cache is not None and -1 in cache:

620 res = cache[-1]

621 else:

622 res = self.predict(X)

623 if len(res.shape) == 2:

624 pred = res[:, -last:]

625 else:

626 pred = res[-last:]

627 last_node, last_attr = self._get_output_node_attr(last)

628 return res, pred, last_node, last_attr

629

630 def loss(self, X, y, cache=None):

631 """

632 Computes the loss due to prediction error. Returns a float.

633 """

634 res, _, last_node, last_attr = self._common_loss_dloss(

635 X, y, cache=cache)

636 if len(res.shape) <= 1:

637 return last_node.loss(res[last_attr['inputs']], y) # pylint: disable=E1120

638 return last_node.loss(res[:, last_attr['inputs']], y) # pylint: disable=E1120

639

640 def dlossds(self, X, y, cache=None):

641 """

642 Computes the loss derivative against the inputs.

643 """

644 res, _, last_node, last_attr = self._common_loss_dloss(

645 X, y, cache=cache)

646 if len(res.shape) <= 1:

647 return last_node.dlossds(res[last_attr['inputs']], y) # pylint: disable=E1120

648 return last_node.dlossds(res[:, last_attr['inputs']], y) # pylint: disable=E1120

649

650 def gradient_backward(self, graddx, X, inputs=False, cache=None):

651 """

652 Computes the gradient in X.

653

654 :param graddx: existing gradient against the inputs

655 :param X: computes the gradient in X

656 :param inputs: if False, derivative against the coefficients,

657 otherwise against the inputs.

658 :param cache: cache intermediate results to avoid more computation

659 :return: gradient

660 """

661 if cache is None:

662 cache = self.fill_cache(X)

663 shape = self.training_weights.shape

664 pred = self.predict(X)

665

666 whole_gradx = numpy.zeros(pred.shape, dtype=numpy.float64)

667 whole_gradw = numpy.zeros(shape, dtype=numpy.float64)

668 if len(graddx.shape) == 0:

669 whole_gradx[-1] = graddx

670 else:

671 whole_gradx[-graddx.shape[0]:] = graddx

672

673 for node, attr in zip(self.nodes[::-1], self.nodes_attr[::-1]):

674 ch = cache[node.nodeid]

675

676 node_graddx = whole_gradx[attr['output']]

677 xi = pred[attr['inputs']]

678

679 temp_gradw = node.gradient_backward(

680 node_graddx, xi, inputs=False, cache=ch)

681 temp_gradx = node.gradient_backward(

682 node_graddx, xi, inputs=True, cache=ch)

683

684 whole_gradw[attr['first_coef']:attr['first_coef'] +

685 attr['coef_size']] += temp_gradw.reshape((attr['coef_size'],))

686 whole_gradx[attr['inputs']

687 ] += temp_gradx.reshape((len(attr['inputs']),))

688

689 if inputs:

690 return whole_gradx

691 return whole_gradw

692

693

694class BaseNeuralTreeNet(BaseEstimator):

695 """

696 Classifier or regressor following :epkg:`scikit-learn` API.

697

698 :param estimator: instance of @see cl NeuralTreeNet.

699 :param X: training set

700 :param y: training labels

701 :param optimizer: optimizer, by default, it is

702 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`.

703 :param max_iter: number maximum of iterations

704 :param early_th: early stopping threshold

705 :param verbose: more verbose

706 :param lr: to overwrite *learning_rate_init* if

707 *optimizer* is None (unused otherwise)

708 :param lr_schedule: to overwrite *lr_schedule* if

709 *optimizer* is None (unused otherwise)

710 :param l1: L1 regularization if *optimizer* is None

711 (unused otherwise)

712 :param l2: L2 regularization if *optimizer* is None

713 (unused otherwise)

714 :param momentum: used if *optimizer* is None

715 """

716

717 def __init__(self, estimator,

718 optimizer=None, max_iter=100, early_th=None, verbose=False,

719 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9):

720 if not isinstance(estimator, NeuralTreeNet):

721 raise ValueError( # pragma: no cover

722 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.")

723 BaseEstimator.__init__(self)

724 self.estimator = None

725 self.estimator_ = estimator

726 self.optimizer = None

727 self.max_iter = max_iter

728 self.early_th = early_th

729 self.verbose = verbose

730 self.lr = lr

731 self.lr_schedule = lr_schedule

732 self.l1 = l1

733 self.l2 = l2

734 self.momentum = momentum

735

736 def decision_function(self, X):

737 """

738 Returns the classification probabilities.

739

740 :param X: inputs

741 :return: probabilities

742 """

743 return self.estimator_.predict(X)

744

745 def fit(self, X, y, sample_weights=None):

746 """

747 Trains the estimator.

748

749 :param X: input features

750 :param y: expected classes (binary)

751 :param sample_weights: sample weights

752 :return: self

753 """

754 if sample_weights is not None:

755 raise NotImplementedError( # pragma: no cover

756 "sample_weights is not supported yet.")

757 if isinstance(self, ClassifierMixin):

758 ny = label_class_to_softmax_output(y) if len(y.shape) == 1 else y

759 else:

760 ny = y

761 self.estimator_.fit(X, ny, optimizer=self.optimizer, max_iter=self.max_iter,

762 early_th=self.early_th, verbose=self.verbose,

763 lr=self.lr, lr_schedule=self.lr_schedule,

764 l1=self.l1, l2=self.l2, momentum=self.momentum)

765 return self

766

767 @staticmethod

768 def onnx_shape_calculator():

769 """

770 Shape calculator when converting this model into ONNX.

771 See :epkg:`skearn-onnx`.

772 """

773 from skl2onnx.common.data_types import Int64TensorType

774

775 def shape_calculator(operator):

776 op = operator.raw_operator

777 input_type = operator.inputs[0].type.__class__

778 input_dim = operator.inputs[0].get_first_dimension()

779 output_type = input_type(

780 [input_dim, op.estimator_.nodes[-1].ndim_out])

781 if isinstance(op, ClassifierMixin):

782 operator.outputs[0].type = Int64TensorType([input_dim, 1])

783 operator.outputs[1].type = output_type

784 else:

785 operator.outputs[0].type = output_type

786

787 return shape_calculator

788

789 @staticmethod

790 def onnx_converter():

791 """

792 Converts this model into ONNX.

793 """

794 from skl2onnx.common.data_types import guess_numpy_type

795 from skl2onnx.algebra.onnx_ops import ( # pylint: disable=E0611

796 OnnxIdentity, OnnxArgMax, OnnxAdd, OnnxMatMul,

797 OnnxSigmoid, OnnxMul, OnnxSoftmax)

798

799 def converter(scope, operator, container):

800 op = operator.raw_operator

801 net = op.estimator_

802 out = operator.outputs

803 opv = container.target_opset

804

805 X = operator.inputs[0]

806 dtype = guess_numpy_type(X.type)

807

808 res = {'inputs': X}

809 last = None

810 for node, attr in zip(net.nodes, net.nodes_attr):

811

812 # verification

813 coef = (node.coef.reshape((1, -1)) if len(node.coef.shape) == 1

814 else node.coef)

815 if len(coef.shape) != 2:

816 raise RuntimeError( # pragma: no cover

817 f"coef must be a 2D matrix not {coef.shape!r}.")

818 if coef.shape[1] < 2:

819 raise RuntimeError( # pragma: no cover

820 f"coef must be a 2D matrix with at least 2 columns "

821 f"not {coef.shape!r}.")

822

823 # input, output, names

824 name = ('inputs' if attr['inputs'][0] == 0 else

825 "r_%s" % ("_".join(map(str, attr['inputs']))))

826 if name not in res:

827 raise KeyError( # pragma: no cover

828 f"Unable to find {name!r} in {set(res)}.")

829 output_name = (

830 "r_%d" % attr['output'] if isinstance(attr['output'], int)

831 else "r_%s" % ("_".join(map(str, attr['output']))))

832 x = res[name]

833

834 # conversion of one node

835 tr = OnnxAdd(OnnxMatMul(x, coef[:, 1:].T.astype(dtype),

836 op_version=opv),

837 coef[:, 0].astype(dtype), op_version=opv)

838

839 # activation

840 if node.activation == "sigmoid4":

841 final = OnnxSigmoid(OnnxMul(tr, numpy.array([4], dtype=dtype),

842 op_version=opv),

843 op_version=opv)

844 elif node.activation == "sigmoid":

845 final = OnnxSigmoid(tr, op_version=opv)

846 elif node.activation == "softmax4":

847 final = OnnxSoftmax(OnnxMul(tr, numpy.array([4], dtype=dtype),

848 op_version=opv),

849 op_version=opv)

850 elif node.activation == "softmax":

851 final = OnnxSoftmax(tr, op_version=opv)

852 elif node.activation == "identity":

853 final = OnnxIdentity(tr, op_version=opv)

854 else:

855 raise NotImplementedError(

856 f"Unable to convert activation {node.activation!r} "

857 f"function into ONNX.")

858

859 res[output_name] = final

860 last = final

861

862 if isinstance(op, ClassifierMixin):

863 prob = OnnxIdentity(last, op_version=opv,

864 output_names=[out[1]])

865 prob.add_to(scope, container)

866 labels = OnnxArgMax(prob, axis=1, keepdims=1, op_version=opv,

867 output_names=[out[0]])

868 labels.add_to(scope, container)

869 else:

870 pred = OnnxIdentity(last, op_version=opv,

871 output_names=[out[0]])

872 pred.add_to(scope, container)

873

874 return converter

875

876

877class NeuralTreeNetClassifier(ClassifierMixin, BaseNeuralTreeNet):

878 """

879 Classifier following :epkg:`scikit-learn` API.

880

881 :param estimator: instance of @see cl NeuralTreeNet.

882 :param X: training set

883 :param y: training labels

884 :param optimizer: optimizer, by default, it is

885 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`.

886 :param max_iter: number maximum of iterations

887 :param early_th: early stopping threshold

888 :param verbose: more verbose

889 :param lr: to overwrite *learning_rate_init* if

890 *optimizer* is None (unused otherwise)

891 :param lr_schedule: to overwrite *lr_schedule* if

892 *optimizer* is None (unused otherwise)

893 :param l1: L1 regularization if *optimizer* is None

894 (unused otherwise)

895 :param l2: L2 regularization if *optimizer* is None

896 (unused otherwise)

897 :param momentum: used if *optimizer* is None

898 """

899

900 def __init__(self, estimator,

901 optimizer=None, max_iter=100, early_th=None, verbose=False,

902 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9):

903 if not isinstance(estimator, NeuralTreeNet):

904 raise ValueError( # pragma: no cover

905 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.")

906 ClassifierMixin.__init__(self)

907 BaseNeuralTreeNet.__init__(

908 self, estimator=estimator, optimizer=optimizer, max_iter=max_iter,

909 early_th=early_th, verbose=verbose, lr=lr,

910 lr_schedule=lr_schedule, l1=l1, l2=l2, momentum=momentum)

911

912 def predict(self, X):

913 """

914 Returns the predicted classes.

915

916 :param X: inputs

917 :return: classes

918 """

919 probas = self.predict_proba(X)

920 return numpy.argmax(probas, axis=1)

921

922 def predict_proba(self, X):

923 """

924 Returns the classification probabilities.

925

926 :param X: inputs

927 :return: probabilities

928 """

929 return self.decision_function(X)[:, -2:]

930

931

932class NeuralTreeNetRegressor(RegressorMixin, BaseNeuralTreeNet):

933 """

934 Regressor following :epkg:`scikit-learn` API.

935

936 :param estimator: instance of @see cl NeuralTreeNet.

937 :param X: training set

938 :param y: training labels

939 :param optimizer: optimizer, by default, it is

940 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`.

941 :param max_iter: number maximum of iterations

942 :param early_th: early stopping threshold

943 :param verbose: more verbose

944 :param lr: to overwrite *learning_rate_init* if

945 *optimizer* is None (unused otherwise)

946 :param lr_schedule: to overwrite *lr_schedule* if

947 *optimizer* is None (unused otherwise)

948 :param l1: L1 regularization if *optimizer* is None

949 (unused otherwise)

950 :param l2: L2 regularization if *optimizer* is None

951 (unused otherwise)

952 :param momentum: used if *optimizer* is None

953 """

954

955 def __init__(self, estimator,

956 optimizer=None, max_iter=100, early_th=None, verbose=False,

957 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9):

958 if not isinstance(estimator, NeuralTreeNet):

959 raise ValueError( # pragma: no cover

960 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.")

961 RegressorMixin.__init__(self)

962 BaseNeuralTreeNet.__init__(

963 self, estimator=estimator, optimizer=optimizer, max_iter=max_iter,

964 early_th=early_th, verbose=verbose, lr=lr,

965 lr_schedule=lr_schedule, l1=l1, l2=l2, momentum=momentum)

966

967 def predict(self, X):

968 """

969 Returns the predicted classes.

970

971 :param X: inputs

972 :return: classes

973 """

974 return self.decision_function(X)[:, -1:]

Coverage for src/mlstatpy/ml/neural_tree.py: 98%

487 statements