Coverage for onnxcustom/utils/orttraining_helper.py: 99%

242 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 01:42 +0100

1# pylint: disable=C0415,E1101 

2""" 

3@file 

4@brief ONNX manipulations to help build ONNX gradient graphs. 

5""" 

6from collections import OrderedDict 

7import numpy 

8from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE 

9from onnx.numpy_helper import to_array, from_array 

10from onnx.helper import ( 

11 make_node, make_graph, make_model, make_tensor_value_info, 

12 set_model_props) 

13from onnx import TensorProto # pylint: disable=E0611 

14 

15 

16def _unique_name(existing_names, name): 

17 """ 

18 Returns a name different from any name in *existing_names*. 

19 

20 :param existing_names: set of names 

21 :param name: current 

22 :return: unique name 

23 """ 

24 if name not in existing_names: 

25 existing_names.add(name) 

26 return name 

27 name0 = name 

28 i = 2 

29 while name in existing_names: 

30 name = "%s_%d" % (name0, i) 

31 i += 1 

32 existing_names.add(name) 

33 return name 

34 

35 

36def _loss_l1(existing_names, elem, shape, 

37 output_name, label_name, 

38 weight_name, loss_name): 

39 """ 

40 Implements loss l1. 

41 """ 

42 diff_name = _unique_name(existing_names, "loss_diff") 

43 diff2_name = _unique_name(existing_names, "loss_diff") 

44 nodes = [make_node('Sub', [output_name, label_name], [diff_name]), 

45 make_node('Abs', [diff_name], [diff2_name])] 

46 if weight_name is not None: 

47 res_name = _unique_name(existing_names, "loss_diff_weight") 

48 nodes.append( 

49 make_node('Mul', [diff2_name, weight_name], [res_name])) 

50 else: 

51 res_name = diff2_name 

52 nodes.append(make_node('ReduceSum', [res_name], [loss_name])) 

53 

54 inputs = [make_tensor_value_info(label_name, elem, shape)] 

55 if weight_name is not None: 

56 inputs.append( 

57 make_tensor_value_info(weight_name, elem, [shape[0]])) 

58 return ( 

59 [], inputs, nodes, 

60 [make_tensor_value_info(loss_name, elem, [1, 1])]) 

61 

62 

63def _loss_l2(existing_names, elem, shape, 

64 output_name, label_name, 

65 weight_name, loss_name): 

66 """ 

67 Implements loss l2. 

68 """ 

69 diff_name = _unique_name(existing_names, "loss_diff") 

70 diff2_name = _unique_name(existing_names, "loss_diff") 

71 nodes = [make_node('Sub', [output_name, label_name], [diff_name]), 

72 make_node('Mul', [diff_name, diff_name], [diff2_name])] 

73 if weight_name is not None: 

74 res_name = _unique_name(existing_names, "loss_diff_weight") 

75 nodes.append( 

76 make_node('Mul', [diff2_name, weight_name], [res_name])) 

77 else: 

78 res_name = diff2_name 

79 nodes.append(make_node('ReduceSum', [res_name], [loss_name])) 

80 

81 inputs = [make_tensor_value_info(label_name, elem, shape)] 

82 if weight_name is not None: 

83 inputs.append( 

84 make_tensor_value_info(weight_name, elem, [shape[0]])) 

85 return ( 

86 [], inputs, nodes, 

87 [make_tensor_value_info(loss_name, elem, [1, 1])]) 

88 

89 

90def _loss_elastic(existing_names, elem, shape, 

91 output_name, label_name, 

92 weight_name, loss_name, 

93 l1_weight=0.5, l2_weight=0.5): 

94 """ 

95 Implements mixture of losses l1 and l2. 

96 """ 

97 l1_name = _unique_name(existing_names, "l1_name") 

98 l2_name = _unique_name(existing_names, "l2_name") 

99 dtype = TENSOR_TYPE_TO_NP_TYPE[elem] 

100 onx_l1_weight = from_array( 

101 numpy.array([l1_weight], dtype=dtype), name=l1_name) 

102 onx_l2_weight = from_array( 

103 numpy.array([l2_weight], dtype=dtype), name=l2_name) 

104 inits = [onx_l1_weight, onx_l2_weight] 

105 

106 diff_name = _unique_name(existing_names, "loss_diff") 

107 diff1_name = _unique_name(existing_names, "loss_l1") 

108 diff2_name = _unique_name(existing_names, "loss_l2") 

109 wl1_name = _unique_name(existing_names, "loss_l1") 

110 wl2_name = _unique_name(existing_names, "loss_l2") 

111 final_loss = _unique_name(existing_names, "final_loss") 

112 nodes = [make_node('Sub', [output_name, label_name], [diff_name]), 

113 make_node('Mul', [diff_name, diff_name], [diff2_name]), 

114 make_node('Abs', [diff_name], [diff1_name]), 

115 make_node('Mul', [diff1_name, l1_name], [wl1_name]), 

116 make_node('Mul', [diff2_name, l2_name], [wl2_name]), 

117 make_node('Add', [wl1_name, wl2_name], [final_loss]), 

118 ] 

119 if weight_name is not None: 

120 res_name = _unique_name(existing_names, "loss_diff_weight") 

121 nodes.append( 

122 make_node('Mul', [final_loss, weight_name], [res_name])) 

123 else: 

124 res_name = final_loss 

125 nodes.append(make_node('ReduceSum', [res_name], [loss_name])) 

126 

127 inputs = [make_tensor_value_info(label_name, elem, shape)] 

128 if weight_name is not None: 

129 inputs.append( 

130 make_tensor_value_info(weight_name, elem, [shape[0]])) 

131 return ( 

132 inits, inputs, nodes, 

133 [make_tensor_value_info(loss_name, elem, [1, 1])]) 

134 

135 

136def _loss_log(existing_names, elem, shape, 

137 output_name, label_name, 

138 weight_name, loss_name, 

139 eps=1e-6): 

140 """ 

141 This only works for a binary classification. 

142 The log loss is `'log(yt, yp) = (1-yt)\\log(1-yp) - yt\\log(yp)`, 

143 this only works for a binary classification where *yp* is the 

144 predicted probability, *yt* is the expected probability. 

145 *yt* is expected to be binary, *yp* is a matrix with two 

146 columns, the sum on every line is 1. 

147 Parameter *eps* is used to avoid computing *log(0)*. 

148 """ 

149 if output_name == 'output_label': 

150 raise RuntimeError( # pragma: no cover 

151 f"output_name={output_name!r}, log loss does not work on labels.") 

152 dtype = TENSOR_TYPE_TO_NP_TYPE[elem] 

153 one_name = _unique_name(existing_names, "one_name") 

154 eps_name = _unique_name(existing_names, "eps_name") 

155 eps1_name = _unique_name(existing_names, "eps1_name") 

156 axes_name = _unique_name(existing_names, "axes_name") 

157 

158 eps_init = from_array(numpy.array([eps], dtype=dtype), name=eps_name) 

159 one_init = from_array(numpy.array([1], dtype=dtype), name=one_name) 

160 eps1_init = from_array( 

161 numpy.array([1 - eps], dtype=dtype), name=eps1_name) 

162 axes_init = from_array( 

163 numpy.array([1], dtype=numpy.int64), name=axes_name) 

164 

165 clip_name = _unique_name(existing_names, "clip_name") 

166 clip_red_name = _unique_name(existing_names, "clip_red_name") 

167 new_output_name = _unique_name(existing_names, "new_output_name") 

168 cast_name = _unique_name(existing_names, "cast_name") 

169 log_name = _unique_name(existing_names, "log_name") 

170 subl_name = _unique_name(existing_names, "subl_name") 

171 conc_name = _unique_name(existing_names, "conc_name") 

172 mul_name = _unique_name(existing_names, "mul_name") 

173 like_name = _unique_name(existing_names, "like_name") 

174 

175 nodes = [ 

176 make_node( 

177 'Clip', [output_name, eps_name, eps1_name], [clip_name]), 

178 make_node( 

179 'ReduceSum', [clip_name, axes_name], [clip_red_name], keepdims=1), 

180 make_node('Div', [clip_name, clip_red_name], [new_output_name]), 

181 make_node('Log', [new_output_name], [log_name]), 

182 make_node('Cast', [label_name], [cast_name], to=elem), 

183 make_node('Sub', [one_name, cast_name], [subl_name]), 

184 make_node('Concat', [subl_name, cast_name], [conc_name], axis=1), 

185 make_node('Mul', [log_name, conc_name], [mul_name]), 

186 make_node( 

187 'ReduceSum', [mul_name, axes_name], [like_name], keepdims=1)] 

188 

189 inputs = [make_tensor_value_info(label_name, TensorProto.INT64, shape)] 

190 

191 if weight_name is not None: 

192 inputs.append( 

193 make_tensor_value_info(weight_name, elem, [shape[0]])) 

194 likew_name = _unique_name(existing_names, "likew_name") 

195 nodes.append( 

196 make_node('Mul', [like_name, weight_name], [likew_name])) 

197 like_name = likew_name 

198 

199 shape_name = _unique_name(existing_names, "shape_name") 

200 onx_shape = from_array( 

201 numpy.array([1, 1], dtype=numpy.int64), name=shape_name) 

202 reduced_loss = _unique_name(existing_names, "reduced_loss") 

203 neg_reduced_loss = _unique_name(existing_names, "neg_reduced_loss") 

204 nodes.extend([ 

205 make_node('ReduceMean', [like_name], [reduced_loss]), 

206 make_node('Neg', [reduced_loss], [neg_reduced_loss]), 

207 make_node('Reshape', [neg_reduced_loss, shape_name], [loss_name])]) 

208 

209 return ( 

210 [eps_init, eps1_init, one_init, axes_init, onx_shape], 

211 inputs, nodes, [make_tensor_value_info(loss_name, elem, [1, 1])]) 

212 

213 

214def penalty_loss_onnx(name, dtype, l1=None, l2=None, existing_names=None): 

215 """ 

216 Returns onnx nodes to compute 

217 :math:`|w| \\alpha + w^2 \\beta` 

218 where :math:`\\alpha=l1` and :math:`\\beta=l2`. 

219 

220 :param name: name of weights 

221 :param dtype: numpy dtype 

222 :param l1: coefficient for L1 norm 

223 :param l2: coefficient for L2 norm 

224 :param existing_names: names already taken in the ONNX graph 

225 :return: initializer, nodes 

226 """ 

227 suffix = name 

228 cst_shape = _unique_name(existing_names, f"shape_{suffix}") 

229 new_name = _unique_name(existing_names, f"reshaped_{suffix}") 

230 inits = [from_array( 

231 numpy.array([-1], dtype=numpy.int64), name=cst_shape)] 

232 nodes = [make_node('Reshape', [name, cst_shape], [new_name])] 

233 name = new_name 

234 

235 if l1 is None or l1 == 0: 

236 if l2 is None or l2 == 0: 

237 raise ValueError( # pragma: no cover 

238 f"l1 and l2 cannot be null or None at the same time, name={name!r}.") 

239 l2_name = _unique_name(existing_names, f"l2_weight_{suffix}") 

240 inits.extend([from_array( 

241 numpy.array([l2], dtype=dtype), name=l2_name)]) 

242 mul_name = _unique_name(existing_names, f"reduced0_{suffix}") 

243 red_name = _unique_name(existing_names, f"reduced_{suffix}") 

244 pen_name = _unique_name(existing_names, f"penalty_{suffix}") 

245 nodes.extend([ 

246 make_node('Mul', [name, name], [mul_name]), 

247 make_node('ReduceSum', [mul_name], [red_name]), 

248 make_node('Mul', [red_name, l2_name], [pen_name])]) 

249 return inits, nodes 

250 

251 if l2 is None or l2 == 0: 

252 l1_name = _unique_name(existing_names, f"l1_weight_{suffix}") 

253 inits.extend([from_array( 

254 numpy.array([l1], dtype=dtype), name=l1_name)]) 

255 red_name = _unique_name(existing_names, f"reduced_{suffix}") 

256 abs_name = _unique_name(existing_names, f"absolute_{suffix}") 

257 pen_name = _unique_name(existing_names, f"penalty_{suffix}") 

258 nodes.extend([ 

259 make_node('Abs', [name], [abs_name]), 

260 make_node('ReduceSum', [abs_name], [red_name]), 

261 make_node('Mul', [red_name, l1_name], [pen_name])]) 

262 return inits, nodes 

263 

264 l1_name = _unique_name(existing_names, f"l1_weight_{suffix}") 

265 l2_name = _unique_name(existing_names, f"l2_weight_{suffix}") 

266 inits.extend([ 

267 from_array(numpy.array([l1], dtype=dtype), name=l1_name), 

268 from_array(numpy.array([l2], dtype=dtype), name=l2_name)]) 

269 

270 red_name1 = _unique_name(existing_names, f"reduced1_{suffix}") 

271 mul_name = _unique_name(existing_names, f"reducedm_{suffix}") 

272 red_name2 = _unique_name(existing_names, f"reduced2_{suffix}") 

273 abs_name = _unique_name(existing_names, f"absolute_{suffix}") 

274 pen_name1 = _unique_name(existing_names, f"penalty1_{suffix}") 

275 pen_name2 = _unique_name(existing_names, f"penalty2_{suffix}") 

276 pen_name = _unique_name(existing_names, f"penalty_{suffix}") 

277 nodes.extend([ 

278 make_node('Mul', [name, name], [mul_name]), 

279 make_node('ReduceSum', [mul_name], [red_name2]), 

280 make_node('Mul', [red_name2, l2_name], [pen_name2]), 

281 make_node('Abs', [name], [abs_name]), 

282 make_node('ReduceSum', [abs_name], [red_name1]), 

283 make_node('Mul', [red_name1, l1_name], [pen_name1]), 

284 make_node('Add', [pen_name1, pen_name2], [pen_name])]) 

285 

286 return inits, nodes 

287 

288 

289def get_train_initializer(onx): 

290 """ 

291 Returns the list of initializers to train. 

292 

293 :return: dictionary `{name: (value, tensor)}` 

294 

295 The function walk through the list of initializers and 

296 returns all tensors with elements from types float or double. 

297 """ 

298 res = OrderedDict() 

299 for init in onx.graph.initializer: 

300 if init.data_type in ( 

301 TensorProto.FLOAT16, # pylint: disable=E1101 

302 TensorProto.FLOAT, # pylint: disable=E1101 

303 TensorProto.DOUBLE): # pylint: disable=E1101 

304 res[init.name] = (to_array(init), init) 

305 return res 

306 

307 

308def _rewrite_op_no_grad(onx): 

309 """ 

310 Rewrites operators with no gradient. 

311 """ 

312 set_types = set(n.op_type for n in onx.graph.node) 

313 if "Reciprocal" in set_types: 

314 from skl2onnx.algebra.onnx_ops import OnnxDiv # pylint: disable=E0611 

315 from skl2onnx.common.data_types import FloatTensorType 

316 from .onnx_rewriter import onnx_rewrite_operator 

317 

318 opset = None 

319 for op in onx.opset_import: 

320 if op.domain in ('', 'ai.onnx'): 

321 opset = op.version 

322 if opset is None: # pragma: no cover 

323 from .. import get_max_opset 

324 opset = get_max_opset() 

325 

326 node = OnnxDiv(numpy.array([1], dtype=numpy.float32), 

327 'X', output_names=['Y'], 

328 op_version=opset) 

329 rewrite_onx = node.to_onnx( 

330 inputs={'X': FloatTensorType()}, 

331 outputs={'Y': FloatTensorType()}, 

332 target_opset=opset) 

333 onx = onnx_rewrite_operator(onx, 'Reciprocal', rewrite_onx) 

334 

335 return onx 

336 

337 

338def add_loss_output(onx, score_name='squared_error', 

339 loss_name='loss', label_name='label', 

340 weight_name=None, penalty=None, 

341 output_index=None, **kwargs): 

342 """ 

343 Modifies an ONNX graph to add operators to score and allow training. 

344 

345 :param onx: onx graph 

346 :param score_name: name of the score 

347 :param loss_name: name of the output loss 

348 :param label_name: name of the label input 

349 :param weight_name: None or any value to consider weight 

350 while computing loss 

351 :param penalty: dictionary similar to the 

352 following one `{ weight_name: {'l1': alpha, 'l2': beta} }` 

353 or `{ weight_name: beta}`, 

354 it adds a L1 and/or L2 penalty to one input or initializer, 

355 penalty = :math:`|w| \\alpha + w^2 \\beta` 

356 :param output_index: the output used to compute the loss, 

357 if None, the function assumes there is only one output, 

358 it must be specified if there are more than 1, 

359 it can be an integer or a string (output name) 

360 :param kwargs: additional arguments for losses (see below) 

361 :return: modified graph 

362 

363 Possible values for *score_name*: 

364 

365 * `'squared_error'` or `'l2`': :math:`\\sum_i{(f(x_i)-y_i)^2}` or 

366 :math:`\\sum_i{w_i (f(x_i)-y_i)^2}` if *weight_name* 

367 is not None 

368 * `'absolute_error'` or `'l1`': :math:`\\sum_i{|f(x_i)-y_i|}` or 

369 :math:`\\sum_i{w_i |f(x_i)-y_i|}` if *weight_name* 

370 is not None 

371 * `'elastic'`: mixture of losses, kwargs must define 

372 *l1_weight* and *l2_weight*, undefined, default value are 0.5 

373 * `'log'`: log loss :math:`(1-yt)\\log(1-yp) - yt\\log(yp)`, 

374 this only works for a binary classification where *yp* is the 

375 predicted probability, *yt* is the expected probability. 

376 *yt* is expected to be binary, *yp* is a matrix with two 

377 columns, the sum on every line is 1. 

378 

379 See example :ref:`l-orttraining-nn-gpu`. 

380 Next example shows the loss with L1 and L2 loss. 

381 

382 .. gdot:: 

383 :script: DOT-SECTION 

384 

385 import numpy 

386 from sklearn.datasets import make_regression 

387 from sklearn.model_selection import train_test_split 

388 from sklearn.linear_model import LinearRegression 

389 from mlprodict.onnx_conv import to_onnx 

390 from mlprodict.onnxrt import OnnxInference 

391 from onnxcustom import __max_supported_opset__ as opset 

392 from onnxcustom.utils.orttraining_helper import add_loss_output 

393 from onnxcustom.training.optimizers import OrtGradientOptimizer 

394 

395 X, y = make_regression( # pylint: disable=W0632 

396 100, n_features=10, bias=2, random_state=0) 

397 X = X.astype(numpy.float32) 

398 y = y.astype(numpy.float32) 

399 w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) 

400 X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) 

401 reg = LinearRegression() 

402 reg.fit(X_train, y_train, sample_weight=w_train) 

403 reg.coef_ = reg.coef_.reshape((1, -1)) 

404 onx = to_onnx(reg, X_train, target_opset=opset, 

405 black_op={'LinearRegressor'}) 

406 

407 onx_loss = add_loss_output( 

408 onx, weight_name='weight', score_name='elastic', 

409 l1_weight=0.1, l2_weight=0.9) 

410 

411 print("DOT-SECTION", OnnxInference(onx_loss).to_dot()) 

412 

413 Next example shows how to add a L2 loss with L1 and L2 penalties 

414 on the coefficients. 

415 

416 .. gdot:: 

417 :script: DOT-SECTION 

418 

419 import numpy 

420 from sklearn.datasets import make_regression 

421 from sklearn.model_selection import train_test_split 

422 from sklearn.linear_model import LinearRegression 

423 from mlprodict.onnx_conv import to_onnx 

424 from mlprodict.onnxrt import OnnxInference 

425 from onnxcustom import __max_supported_opset__ as opset 

426 from onnxcustom.utils.orttraining_helper import add_loss_output 

427 from onnxcustom.training.optimizers import OrtGradientOptimizer 

428 

429 X, y = make_regression( # pylint: disable=W0632 

430 100, n_features=10, bias=2, random_state=0) 

431 X = X.astype(numpy.float32) 

432 y = y.astype(numpy.float32) 

433 w = (numpy.random.rand(y.shape[0]) + 1).astype(X.dtype) 

434 X_train, _, y_train, __, w_train, ___ = train_test_split(X, y, w) 

435 reg = LinearRegression() 

436 reg.fit(X_train, y_train, sample_weight=w_train) 

437 reg.coef_ = reg.coef_.reshape((1, -1)) 

438 onx = to_onnx(reg, X_train, target_opset=opset, 

439 black_op={'LinearRegressor'}) 

440 

441 onx_loss = add_loss_output( 

442 onx, weight_name='weight', score_name='elastic', 

443 penalty={'coef': {'l1': 0.5, 'l2':0.5}, 

444 'intercept': {'l1': 0.5, 'l2':0.5}}) 

445 

446 print("DOT-SECTION", OnnxInference(onx_loss).to_dot()) 

447 """ 

448 from mlprodict.onnx_tools.optim import onnx_remove_node_unused 

449 

450 # rename every intermediate output call label 

451 def _replace(ens): 

452 for i in range(len(ens)): # pylint: disable=C0200 

453 if ens[i] == 'label': 

454 ens[i] = '_label_' 

455 

456 for node in onx.graph.node: 

457 if "_label_" in node.input or "_label_" in node.output: 

458 raise RuntimeError( # pragma: no cover 

459 "One intermediate result contains '_label_'. " 

460 "It should be removed manually.\n%r" % node) 

461 _replace(node.input) 

462 _replace(node.output) 

463 

464 if output_index is None: 

465 if len(onx.graph.output) != 1: 

466 raise ValueError( # pragma: no cover 

467 "Unable to guess the output to compare to the " 

468 "expacted labels among %r." % ( 

469 [o.name for o in onx.graph.output])) 

470 outputs = onx.graph.output 

471 output_index = 0 

472 elif isinstance(output_index, int): 

473 outputs = [onx.graph.output[output_index]] 

474 elif isinstance(output_index, str): 

475 outputs = [(i, o) for i, o in enumerate(onx.graph.output) 

476 if o.name == output_index] 

477 if len(outputs) != 1: 

478 raise ValueError( # pragma: no cover 

479 "Unable to find output %r in %r." % ( 

480 output_index, [o.name for o in onx.graph.output])) 

481 output_index = outputs[0][0] 

482 outputs = [outputs[0][1]] 

483 else: 

484 raise TypeError( # pragma: no cover 

485 f"output_index must be an integer or a str not {type(output_index)!r}.") 

486 

487 existing_names = [] 

488 for node in onx.graph.node: 

489 existing_names.extend(node.output) 

490 existing_names.extend(node.input) 

491 existing_names = set(existing_names) 

492 

493 output_onx = onx.graph.output[output_index] 

494 output_name = output_onx.name 

495 elem = output_onx.type.tensor_type.elem_type 

496 if elem == 0: 

497 raise TypeError( # pragma: no cover 

498 f"Unable to guess input tensor type from {output_onx!r}.") 

499 shape = [] 

500 for d in output_onx.type.tensor_type.shape.dim: 

501 shape.append(d.dim_value if d.dim_value > 0 else None) 

502 

503 if score_name in ('squared_error', 'l2'): 

504 inits, inputs, nodes, outputs = _loss_l2( 

505 existing_names, elem, shape, output_name, label_name, 

506 weight_name, loss_name) 

507 elif score_name in ('absolute_error', 'l1'): 

508 inits, inputs, nodes, outputs = _loss_l1( 

509 existing_names, elem, shape, output_name, label_name, 

510 weight_name, loss_name) 

511 elif score_name == 'elastic': 

512 inits, inputs, nodes, outputs = _loss_elastic( 

513 existing_names, elem, shape, output_name, label_name, 

514 weight_name, loss_name, **kwargs) 

515 elif score_name == 'log': 

516 shape = (None, 1) 

517 inits, inputs, nodes, outputs = _loss_log( 

518 existing_names, elem, shape, output_name, label_name, 

519 weight_name, loss_name, **kwargs) 

520 else: 

521 raise NotImplementedError( # pragma: no cover 

522 f"Unexpected {score_name!r} value for score_name.") 

523 

524 if penalty is not None: 

525 final_name = nodes[-1].output[0] 

526 loss_name = _unique_name(existing_names, "loss_diff") 

527 nodes[-1].output[0] = loss_name 

528 names = [] 

529 for k, v in penalty.items(): 

530 if isinstance(v, float): 

531 v = {'l2': v} 

532 inits_to_add, nodes_to_add = penalty_loss_onnx( 

533 k, dtype=TENSOR_TYPE_TO_NP_TYPE[elem], 

534 existing_names=existing_names, **v) 

535 names.append(nodes_to_add[-1].output[0]) 

536 nodes.extend(nodes_to_add) 

537 inits.extend(inits_to_add) 

538 # Operator Sum does not have a gradient. 

539 if len(names) == 1: 

540 pen_name = names[0] 

541 else: 

542 current = names[0] 

543 for i in range(1, len(names)): 

544 new_name = _unique_name(existing_names, "sumop") 

545 nodes.append( 

546 make_node('Add', [current, names[i]], [new_name])) 

547 current = new_name 

548 pen_name = current 

549 

550 cst_shape = _unique_name(existing_names, "shapevect") 

551 inits.append(from_array( 

552 numpy.array([-1, 1], dtype=numpy.int64), name=cst_shape)) 

553 loss_reshape = _unique_name(existing_names, "loss_reshape") 

554 pen_reshape = _unique_name(existing_names, "penalty_reshape") 

555 nodes.extend([ 

556 make_node("Reshape", [pen_name, cst_shape], [pen_reshape]), 

557 make_node("Reshape", [loss_name, cst_shape], [loss_reshape])]) 

558 

559 nodes.append( 

560 make_node('Add', [pen_reshape, loss_reshape], [final_name])) 

561 

562 inits = list(onx.graph.initializer) + inits 

563 graph = make_graph( 

564 list(onx.graph.node) + nodes, 

565 onx.graph.name, 

566 list(onx.graph.input) + inputs, 

567 outputs + [onx.graph.output[output_index]], 

568 inits) 

569 onnx_model = make_model(graph) 

570 onnx_model.ir_version = onx.ir_version 

571 onnx_model.producer_name = onx.producer_name 

572 onnx_model.producer_version = onx.producer_version 

573 onnx_model.domain = onx.domain 

574 onnx_model.model_version = onx.model_version 

575 onnx_model.doc_string = onx.doc_string 

576 if len(onx.metadata_props) > 0: 

577 values = {p.key: p.value for p in onx.metadata_props} 

578 set_model_props(onnx_model, values) 

579 

580 # fix opset import 

581 del onnx_model.opset_import[:] # pylint: disable=E1101 

582 for oimp in onx.opset_import: 

583 op_set = onnx_model.opset_import.add() # pylint: disable=E1101 

584 op_set.domain = oimp.domain 

585 op_set.version = oimp.version 

586 return _rewrite_op_no_grad(onnx_remove_node_unused(onnx_model))