Coverage for mlprodict/onnxrt/onnx_inference.py: 94%

767 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1# pylint: disable=C0302,R0912 

2""" 

3@file 

4@brief Implements a class able to compute the predictions 

5from on an :epkg:`ONNX` model. 

6""" 

7from collections import OrderedDict 

8from io import BytesIO 

9from time import perf_counter 

10import warnings 

11import textwrap 

12import pprint 

13from keyword import iskeyword 

14import numpy 

15from scipy.sparse import coo_matrix 

16from onnx import ( 

17 load, load_model, shape_inference, 

18 ModelProto, GraphProto, FunctionProto) 

19from onnx.helper import make_model 

20from ..tools.code_helper import make_callable, print_code 

21from ..onnx_tools.model_checker import check_onnx 

22from ..onnx_tools.onnx2py_helper import ( 

23 _var_as_dict, numpy_min, numpy_max) 

24from ..onnx_tools.onnx_manipulations import ( 

25 select_model_inputs_outputs, enumerate_model_node_outputs, 

26 overwrite_opset, insert_results_into_onnx) 

27from ..onnx_tools.optim import onnx_remove_node_unused 

28from .onnx_inference_node import OnnxInferenceNode 

29from .onnx_inference_exports import OnnxInferenceExport 

30from .onnx_shape_inference import OnnxShapeInference 

31from .ops_shape.shape_excs import ( 

32 ShapeInferenceMissing, NotImplementedShapeInferenceError, 

33 ShapeInferenceException, ShapeInferenceDimensionError) 

34 

35 

36class OnnxInference: 

37 """ 

38 Loads an :epkg:`ONNX` file or object or stream. 

39 Computes the output of the :epkg:`ONNX` graph. 

40 Several runtimes are available. 

41 

42 * ``'python'``: the runtime implements every onnx operator 

43 needed to run a :epkg:`scikit-learn` model by using :epkg:`numpy` 

44 or C++ code. 

45 * ``'python_compiled'``: it is the same runtime than the previous 

46 one except every operator is called from a compiled function 

47 (@see me _build_compile_run) instead for a method going through 

48 the list of operator 

49 * ``'onnxruntime1'``: uses :epkg:`onnxruntime` (or `onnxruntime1-cuda`, ...) 

50 * ``'onnxruntime2'``: this mode is mostly used to debug as 

51 python handles calling every operator but :epkg:`onnxruntime` 

52 is called for every of them, this process may fail due to 

53 wrong inference type specially of the graph includes 

54 custom nodes, in that case, it is better to compute the output 

55 of intermediates nodes. It is much slower as fo every output, every 

56 node is computed but more robust. 

57 

58 :param onnx_or_bytes_or_stream: :epkg:`onnx` object, 

59 bytes, or filename or stream 

60 :param runtime: runtime options 

61 :param skip_run: do not build the runtime 

62 :param inplace: use inplace computation as much as possible 

63 :param input_inplace: the computation is allowed 

64 to overwrite the input, see :meth:`_guess_inplace 

65 <mlprodict.onnxrt.onnx_inference.OnnxInference._guess_inplace>` 

66 :param ir_version: if not None, overwrite the default version 

67 :param target_opset: used to overwrite *target_opset* 

68 :param runtime_options: specific options for the runtime 

69 :param inside_loop: tells the runtime the graph is meant to 

70 be repeated multiple times (in that case, inputs and 

71 outputs may share the same name) 

72 :param static_inputs: Loop can use static variables, 

73 variables from the graph which runs the loop 

74 (enumerate of strings) 

75 :param new_outputs: if the loading fails, it might worth 

76 cutting the graph, if not None, the graph will 

77 be cut to have these new_outputs as the final outputs 

78 :param new_opset: overwrite the main opset and replaces 

79 by this new one 

80 :param existing_functions: a model may contain several local functions, 

81 this parameter is used when a local function is calling another 

82 local function previously defined. 

83 

84 Among the possible runtime_options, there are: 

85 * *enable_profiling*: enables profiling for :epkg:`onnxruntime` 

86 * *session_options*: an instance of *SessionOptions* from 

87 :epkg:`onnxruntime` 

88 * *ir_version*: change ir_version 

89 

90 .. versionchanged:: 0.9 

91 Parameters *existing_functions* was added. 

92 Removes *device* parameter. See runtime. 

93 Runtime `onnxruntime1-cuda` was added. 

94 """ 

95 

96 def __init__(self, onnx_or_bytes_or_stream, runtime=None, 

97 skip_run=False, inplace=True, 

98 input_inplace=False, ir_version=None, 

99 target_opset=None, runtime_options=None, 

100 session_options=None, inside_loop=False, 

101 static_inputs=None, new_outputs=None, new_opset=None, 

102 existing_functions=None): 

103 if isinstance(onnx_or_bytes_or_stream, bytes): 

104 self.obj = load_model(BytesIO(onnx_or_bytes_or_stream)) 

105 elif isinstance(onnx_or_bytes_or_stream, BytesIO): 

106 self.obj = load_model(onnx_or_bytes_or_stream) 

107 elif isinstance(onnx_or_bytes_or_stream, str): 

108 self.obj = load(onnx_or_bytes_or_stream) 

109 elif hasattr(onnx_or_bytes_or_stream, 'graph'): 

110 self.obj = onnx_or_bytes_or_stream 

111 elif isinstance(onnx_or_bytes_or_stream, GraphProto): 

112 self.obj = make_model(onnx_or_bytes_or_stream, 

113 producer_name='mlprodict') 

114 elif isinstance(onnx_or_bytes_or_stream, FunctionProto): 

115 self.obj = onnx_or_bytes_or_stream 

116 else: 

117 raise TypeError("Unable to handle type {}.".format( # pragma: no cover 

118 type(onnx_or_bytes_or_stream))) 

119 if ir_version is not None: 

120 self.obj.ir_version = ir_version 

121 if new_outputs is not None: 

122 self.obj = select_model_inputs_outputs( 

123 self.obj, outputs=new_outputs, infer_shapes=True) 

124 if new_opset is not None: 

125 self.obj = overwrite_opset(self.obj, new_opset) 

126 

127 self.runtime = runtime 

128 self.skip_run = skip_run 

129 self.input_inplace = input_inplace 

130 self.inplace = inplace 

131 self.force_target_opset = target_opset 

132 self.runtime_options = runtime_options 

133 self.inside_loop = inside_loop 

134 self.static_inputs = static_inputs 

135 self._init(existing_functions) 

136 

137 def __getstate__(self): 

138 """ 

139 To pickle the object. 

140 """ 

141 return {'onnx': self.obj.SerializeToString(), 

142 'runtime': self.runtime, 

143 'runtime_options': self.runtime_options, 

144 'skip_run': self.skip_run, 

145 'input_inplace': self.input_inplace, 

146 'inplace': self.inplace, 

147 'force_target_opset': self.force_target_opset, 

148 'static_inputs': self.static_inputs, 

149 'inside_loop': self.inside_loop} 

150 

151 def __setstate__(self, state): 

152 """ 

153 To unpickle the object. 

154 """ 

155 onx = state['onnx'] 

156 self.obj = load_model(BytesIO(onx)) 

157 self.runtime = state['runtime'] 

158 self.runtime_options = state['runtime_options'] 

159 self.skip_run = state['skip_run'] 

160 self.input_inplace = state['input_inplace'] 

161 self.inplace = state['inplace'] 

162 self.force_target_opset = state['force_target_opset'] 

163 self.static_inputs = state['static_inputs'] 

164 self.inside_loop = state['inside_loop'] 

165 self._init() 

166 

167 def _init(self, existing_functions=None): 

168 """ 

169 Prepares the instance to deliver predictions. 

170 """ 

171 self.graph_ = self.to_sequence(existing_functions) 

172 self.functions_ = self.graph_['functions'] 

173 self.outputs_ = self.graph_['outputs'] 

174 self.inputs_ = self.graph_['inputs'] 

175 self.attributes_ = self.graph_['attributes'] 

176 is_function_proto = isinstance(self.obj, FunctionProto) 

177 if is_function_proto: 

178 obj_graph = self.obj 

179 else: 

180 obj_graph = self.obj.graph 

181 

182 for ino in [obj_graph.input, obj_graph.output]: 

183 for xy in ino: 

184 if isinstance(xy, str): 

185 shape = None 

186 else: 

187 shape = xy.type.tensor_type.shape 

188 for d in shape.dim: 

189 if (d.dim_value == 0 and "0" in str(d) and 

190 'dim_param' not in str(d)): 

191 if len(shape.dim) <= 1: 

192 shape = None 

193 break 

194 # d.dim_value returns 0 whether is is 0 or empty. 

195 # it may be a parameter as well 

196 # raise RuntimeError( # pragma: no cover 

197 # "Wrong ONNX file, one input or output has " 

198 # "an empty shape: {}.".format(xy)) 

199 

200 self.target_opset_ = self.graph_['targets'] 

201 if self.force_target_opset is not None: 

202 if isinstance(self.force_target_opset, dict): 

203 self.target_opset_ = self.force_target_opset # pragma: no cover 

204 else: 

205 self.target_opset_ = {'': self.force_target_opset} 

206 self.ir_version_ = self.graph_['ir_version'] 

207 

208 if not self.skip_run: 

209 if self.runtime is not None and self.runtime.startswith('onnxruntime1'): 

210 # Loads the onnx with onnxruntime as a single file. 

211 del self.graph_ 

212 from .ops_whole.session import OnnxWholeSession 

213 self._whole = OnnxWholeSession( 

214 self.obj, self.runtime, self.runtime_options) 

215 self._run = self._run_whole_runtime 

216 else: 

217 self.sequence_ = self.graph_['sequence'] 

218 self.inits_ = self.graph_['inits'] 

219 self.statics_ = self.graph_['statics'] 

220 dtype = self._guess_input_dtype() 

221 variables = self.inits_.copy() 

222 for node in self.sequence_: 

223 domain = node.onnx_node.domain 

224 target_opset = self.target_opset_.get(domain, None) 

225 keyf = domain, node.onnx_node.op_type 

226 if keyf in self.functions_: 

227 node.setup_runtime(self.graph_['functions'][keyf]) 

228 elif self.runtime in ('onnxruntime2', 'empty'): 

229 node.setup_runtime( 

230 self.runtime, variables, self.__class__, 

231 target_opset=target_opset, dtype=dtype, 

232 domain=domain, ir_version=self.ir_version_, 

233 runtime_options=self.runtime_options, 

234 existing_functions=self.functions_, 

235 build_inference_node_function=lambda fct: 

236 OnnxInference( 

237 fct, runtime=self.runtime, 

238 skip_run=self.skip_run, 

239 inplace=self.inplace, 

240 runtime_options=self.runtime_options, 

241 inside_loop=self.inside_loop, 

242 static_inputs=self.static_inputs)) 

243 else: 

244 node.setup_runtime( 

245 self.runtime, variables, self.__class__, 

246 target_opset=target_opset, domain=domain, 

247 ir_version=self.ir_version_, 

248 runtime_options=self.runtime_options, 

249 existing_functions=self.functions_, 

250 build_inference_node_function=lambda fct: 

251 OnnxInference( 

252 fct, runtime=self.runtime, 

253 skip_run=self.skip_run, 

254 inplace=self.inplace, 

255 runtime_options=self.runtime_options, 

256 inside_loop=self.inside_loop, 

257 static_inputs=self.static_inputs)) 

258 if hasattr(node, 'ops_') and hasattr(node.ops_, 'typed_outputs_'): 

259 for k, v in node.ops_.typed_outputs_: 

260 variables[k] = v 

261 self._run = self._run_sequence_runtime 

262 

263 if not self.skip_run and self.runtime in ('python', None): 

264 if is_function_proto: 

265 self.shapes_ = None 

266 else: 

267 self.shapes_ = self._set_shape_inference_runtime() 

268 if self.inplace: 

269 self.inplaces_ = self._guess_inplace(self.input_inplace) 

270 

271 self.exporters_ = OnnxInferenceExport(self) 

272 self.to_json = self.exporters_.to_json 

273 self.to_dot = self.exporters_.to_dot 

274 self.to_python = self.exporters_.to_python 

275 self.to_text = self.exporters_.to_text 

276 self.to_onnx_code = self.exporters_.to_onnx_code 

277 

278 if self.runtime in ('python_compiled', 'python_compiled_debug'): 

279 # switch the inference method to the compiled one 

280 _, fct, code = self._build_compile_run('debug' in self.runtime) 

281 setattr(self, '_run_compiled', fct) 

282 setattr(self, '_run_compiled_code', code) 

283 self._run = self._run_sequence_runtime_compiled 

284 

285 def _run_sequence_runtime_compiled( 

286 self, inputs, clean_right_away=False, intermediate=False, 

287 verbose=0, node_time=False, yield_ops=None, fLOG=None, 

288 context=None, attributes=None): 

289 """ 

290 Executes a compiled version of @see me _run_sequence_runtime, 

291 compiled with method @see me _build_compile_run. 

292 Every parameter with a default value is ignored. 

293 Switch to ``runtime='python'`` to enable those. 

294 """ 

295 try: 

296 return self._run_compiled( # pylint: disable=E1101 

297 inputs, yield_ops=yield_ops, context=context, 

298 attributes=attributes) 

299 except NameError as e: 

300 raise RuntimeError( # pragma: no cover 

301 "Unable to compute prediction due to %r. Code:\n%s" 

302 "" % (e, print_code( 

303 self._run_compiled_code))) from e # pylint: disable=E1101 

304 

305 def _guess_input_dtype(self): 

306 for _, v in self.graph_['inputs'].items(): 

307 if 'type' not in v: 

308 continue # pragma: no cover 

309 t = v['type'] 

310 if 'elem' not in t: 

311 continue 

312 if t['elem'] == 'double': 

313 return numpy.float64 

314 return numpy.float32 

315 

316 def __str__(self): 

317 """ 

318 usual 

319 """ 

320 rows = ['OnnxInference(...)'] 

321 if hasattr(self, '_run_compiled_code'): 

322 rows.append( 

323 textwrap.indent( 

324 self._run_compiled_code, ' ')) # pylint: disable=E1101 

325 else: 

326 rows.append(textwrap.indent(str(self.obj), ' ')) 

327 return "\n".join(rows) 

328 

329 def __repr__(self): 

330 """ 

331 usual 

332 """ 

333 return "OnnxInference(...)" # pragma: no cover 

334 

335 def check_onnx(self): 

336 """ 

337 Checks the model follow :epkg:`ONNX` conventions. 

338 """ 

339 check_onnx(self.obj) 

340 

341 def shape_inference(self): 

342 """ 

343 Infers the shape of the outputs 

344 with :epkg:`onnx` package. 

345 

346 @return A new :epkg:`ONNX` graph which defined outputs. 

347 """ 

348 return shape_inference.infer_shapes(self.obj) 

349 

350 @property 

351 def input_names(self): 

352 """ 

353 Returns the names of all inputs. 

354 It does not include the optional inputs. 

355 

356 .. versionchanged:: 0.6 

357 The list does not include optional inputs anymore. 

358 """ 

359 if hasattr(self.obj, 'graph'): 

360 inits = set(_.name for _ in self.obj.graph.initializer) 

361 return [_.name for _ in self.obj.graph.input if _.name not in inits] 

362 return list(self.obj.input) 

363 

364 @property 

365 def input_names_shapes(self): 

366 """ 

367 Returns the names and shapes of all inputs. 

368 This method assumes all inputs are tensors. 

369 It does not include the optional inputs. 

370 

371 .. versionchanged:: 0.6 

372 The list does not include optional inputs anymore. 

373 """ 

374 names = set(self.input_names) 

375 return [(_.name, _var_as_dict(_)['type']['shape']) 

376 for _ in self.obj.graph.input if _.name in names] 

377 

378 @property 

379 def optional_inputs(self): 

380 """ 

381 Returns the list of optional inputs 

382 (the model has an initalizer of the same name as one input). 

383 """ 

384 inits = (set(i.name for i in self.obj.graph.initializer) | 

385 set(i.name for i in self.obj.graph.sparse_initializer)) 

386 return set(self.input_names) & inits 

387 

388 @staticmethod 

389 def _get_type_property(info, prop): 

390 if prop in info: 

391 return info[prop] 

392 if 'kind' in info and info['kind'] == 'sequence': 

393 if prop == 'shape': 

394 return ('?', ) 

395 raise NotImplementedError( # pragma: no cover 

396 f"Unable to retrieve property {prop!r} from {info!r}.") 

397 

398 @property 

399 def input_names_shapes_types(self): 

400 """ 

401 Returns the names, shapes, types of all inputs. 

402 This method assumes all inputs are tensors. 

403 It does not include the optional inputs. 

404 

405 .. versionchanged:: 0.6 

406 The list does not include optional inputs anymore. 

407 """ 

408 f = OnnxInference._get_type_property 

409 names = set(self.input_names) 

410 if isinstance(self.obj, FunctionProto): 

411 return [(_.name, f(_var_as_dict(_)['type'], 'shape'), 

412 f"tensor({f(_var_as_dict(_)['type'], 'elem')})") 

413 for _ in self.obj.input if _.name in names] 

414 return [(_.name, f(_var_as_dict(_)['type'], 'shape'), 

415 f"tensor({f(_var_as_dict(_)['type'], 'elem')})") 

416 for _ in self.obj.graph.input if _.name in names] 

417 

418 @property 

419 def output_names(self): 

420 """ 

421 Returns the names of all outputs. 

422 """ 

423 if isinstance(self.obj, FunctionProto): 

424 return [_ for _ in self.obj.output] 

425 return [_.name for _ in self.obj.graph.output] 

426 

427 @property 

428 def output_names_shapes(self): 

429 """ 

430 Returns the names and shapes of all outputs. 

431 This method assumes all inputs are tensors. 

432 """ 

433 f = OnnxInference._get_type_property 

434 if isinstance(self.obj, FunctionProto): 

435 return [(_, None) for _ in self.obj.output] 

436 return [(_.name, f(_var_as_dict(_)['type'], 'shape')) 

437 for _ in self.obj.graph.output] 

438 

439 @property 

440 def output_names_shapes_types(self): 

441 """ 

442 Returns the names, shapes, types of all outputs. 

443 This method assumes all inputs are tensors. 

444 It does not include the optional outputs. 

445 

446 .. versionadd:: 0.7 

447 """ 

448 names = set(self.output_names) 

449 f = OnnxInference._get_type_property 

450 if isinstance(self.obj, FunctionProto): 

451 return [(_, None) for _ in self.obj.graph.output if _ in names] 

452 return [(_.name, f(_var_as_dict(_)['type'], 'shape'), 

453 f"tensor({f(_var_as_dict(_)['type'], 'elem')})") 

454 for _ in self.obj.graph.output if _.name in names] 

455 

456 def global_index(self, name): 

457 """ 

458 Maps every name to one integer to avoid using dictionaries 

459 when running the predictions. 

460 

461 @param name outputs name 

462 @return integer 

463 """ 

464 if not hasattr(self, '_global_index'): 

465 self._global_index = {} 

466 if name in self._global_index: 

467 return self._global_index[name] 

468 self._global_index[name] = len(self._global_index) 

469 return self._global_index[name] 

470 

471 def to_sequence(self, existing_functions=None): 

472 """ 

473 Produces a graph to facilitate the execution. 

474 

475 One example: 

476 

477 .. exref:: 

478 :title: Convert ONNX into graph 

479 

480 An example on how to convert an :epkg:`ONNX` 

481 graph into a graph. 

482 

483 .. runpython:: 

484 :showcode: 

485 :warningout: DeprecationWarning 

486 

487 import pprint 

488 import numpy 

489 from mlprodict.npy.xop import loadop 

490 from mlprodict.onnxrt import OnnxInference 

491 

492 OnnxAiOnnxMlLinearRegressor = loadop( 

493 ('ai.onnx.ml', 'LinearRegressor')) 

494 

495 pars = dict(coefficients=numpy.array([1., 2.]), 

496 intercepts=numpy.array([1.]), 

497 post_transform='NONE') 

498 onx = OnnxAiOnnxMlLinearRegressor( 

499 'X', output_names=['Y'], **pars) 

500 model_def = onx.to_onnx( 

501 {'X': pars['coefficients'].astype(numpy.float32)}, 

502 outputs={'Y': numpy.float32}, 

503 target_opset=12) 

504 oinf = OnnxInference(model_def) 

505 pprint.pprint(oinf.to_sequence()) 

506 

507 See an example of representation in notebook 

508 :ref:`onnxvisualizationrst`. 

509 """ 

510 inits = {} 

511 variables = {} 

512 outputs = {} 

513 nodes = {} 

514 statics = {} 

515 targets = {} 

516 functions = {} 

517 attributes = {} 

518 if existing_functions is not None: 

519 functions.update(existing_functions) 

520 is_function_proto = isinstance(self.obj, FunctionProto) 

521 if is_function_proto and self.obj.attribute: 

522 for att in self.obj.attribute: 

523 attributes[att] = None 

524 

525 for o in self.obj.opset_import: 

526 targets[o.domain] = o.version 

527 

528 if (hasattr(self.obj, 'functions') and len(self.obj.functions) > 0 and 

529 (self.runtime is None or not 

530 self.runtime.startswith('onnxruntime1'))): 

531 for fct in self.obj.functions: 

532 try: 

533 oinf = OnnxInference( 

534 fct, runtime=self.runtime, 

535 skip_run=self.skip_run, 

536 inplace=self.inplace, 

537 runtime_options=self.runtime_options, 

538 inside_loop=self.inside_loop, 

539 static_inputs=self.static_inputs, 

540 existing_functions=functions) 

541 except RuntimeError as e: 

542 raise RuntimeError( # pragma: no cover 

543 "Unable to instantiate function %r, %r." % ( 

544 fct.domain, fct.name)) from e 

545 functions[fct.domain, fct.name] = oinf 

546 

547 # static variables 

548 if self.static_inputs is not None: 

549 for n in self.static_inputs: 

550 statics[n] = {'name': n} 

551 self.global_index(n) 

552 

553 obj_graph = ( 

554 self.obj if isinstance(self.obj, FunctionProto) 

555 else self.obj.graph) 

556 

557 # inputs 

558 for obj in obj_graph.input: 

559 if is_function_proto: 

560 variables[obj] = {'name': obj} 

561 self.global_index(obj) 

562 else: 

563 variables[obj.name] = _var_as_dict(obj) 

564 self.global_index(obj.name) 

565 

566 # outputs 

567 for obj in obj_graph.output: 

568 if is_function_proto: 

569 outputs[obj] = {'name': obj} 

570 self.global_index(obj) 

571 else: 

572 if hasattr(obj, 'type') and str(obj.type) != '': 

573 outputs[obj.name] = _var_as_dict(obj) 

574 else: 

575 outputs[obj.name] = {'name': obj.name} 

576 self.global_index(obj.name) 

577 

578 # initializer 

579 if not is_function_proto: 

580 for obj in obj_graph.initializer: 

581 init_obj = _var_as_dict(obj) 

582 if init_obj is None: 

583 raise RuntimeError( # pragma: no cover 

584 f"Unable to convert an initializer\n{obj}") 

585 inits[obj.name] = init_obj 

586 self.global_index(obj.name) 

587 if 'value' not in inits[obj.name]: 

588 raise RuntimeError( # pragma: no cover 

589 "One initializer has no value: '{}'\n{}\n{}".format( 

590 obj.name, inits[obj.name], obj)) 

591 

592 # nodes 

593 for node in obj_graph.node: 

594 dobj = _var_as_dict(node) 

595 if dobj is None: 

596 raise RuntimeError( # pragma: no cover 

597 f"Unable to convert a node\n{node}") 

598 if 'atts' in dobj: 

599 atts = dobj['atts'] 

600 for k, v in atts.items(): 

601 if not isinstance(v, dict) or ( 

602 'value' not in v and 'ref_attr_name' not in v): 

603 raise RuntimeError( # pragma: no cover 

604 "A parameter has no (sparse) value '{}' " 

605 "for node '{}'\nv={}\ndobj=[{}]".format( 

606 k, node.name, v, node)) 

607 if node.name in nodes: # pragma: no cover 

608 i = 2 

609 while True: 

610 new_name = "%s_n%i" % (node.name, i) 

611 if new_name not in nodes: 

612 break 

613 i += 1 

614 else: 

615 new_name = node.name 

616 nodes[new_name] = OnnxInferenceNode(node, dobj, self.global_index) 

617 

618 # names 

619 names = {} 

620 for k, v in statics.items(): 

621 if (k, 0) in names: 

622 raise RuntimeError( # pragma: no cover 

623 f"Static variables '{k}' already exists (tag='{names[k, 0][0]}').") 

624 names[k, 0] = ('S', v) 

625 for k, v in inits.items(): 

626 if (k, 0) in names: 

627 raise RuntimeError( # pragma: no cover 

628 f"Initializer '{k}' already exists (tag='{names[k, 0][0]}').") 

629 names[k, 0] = ('C', v) 

630 for k, v in variables.items(): 

631 if (k, 0) in names: 

632 if k in inits: 

633 # Kind of default value for an input 

634 continue 

635 raise RuntimeError( # pragma: no cover 

636 f"Variable '{k}' already exists (tag='{names[k, 0][0]}').") 

637 names[k, 0] = ('I', v) 

638 for k, v in outputs.items(): 

639 if (k, 0) in names and (self.runtime != 'empty' and len(nodes) > 0): 

640 if not self.inside_loop or names[k, 0][0] != 'I': 

641 raise RuntimeError( # pragma: no cover 

642 f"Output '{k}' already exists (tag='{names[k, 0][0]}').") 

643 else: 

644 # For input, output sharing the same name, we marked the name 

645 # as an input. 

646 continue 

647 names[k, 0] = ('O', v) 

648 for k, v in nodes.items(): 

649 if (k, 1) in names: 

650 raise RuntimeError( # pragma: no cover 

651 "Node '{}' already exists (tag='{}'). " 

652 "Use inside_loop=True to bypass this exception.".format( 

653 k, names[k, 0][0])) 

654 names[k, 1] = ('N', v) 

655 

656 # ordering 

657 order = {} 

658 modif = 1 

659 intermediate = {} 

660 while modif > 0: 

661 modif = 0 

662 for (k, _), v in names.items(): 

663 if (k, 1) in order: 

664 # The operator node is already processed. 

665 continue 

666 if v[0] in {'I', 'C', 'S'}: 

667 if (k, 0) not in order: 

668 order[k, 0] = len(order) # A data node. 

669 modif += 1 

670 continue 

671 if v[0] == 'O': 

672 continue 

673 if all((inp, 0) in order for inp in v[1].inputs if inp != ''): 

674 # If all inputs are available, 

675 # We tell the operator node is processed. 

676 order[k, 1] = len(order) 

677 modif += 1 

678 for o in v[1].outputs: 

679 if o in (None, ''): 

680 # optional output 

681 continue 

682 if (o, 0) in order: 

683 raise RuntimeError( # pragma: no cover 

684 "Two nodes share the same output '{}' " 

685 "or an operator and an output " 

686 "share the same name. " 

687 "(node: {}).".format(o, v[1])) 

688 # We add a data node. 

689 order[o, 0] = len(order) 

690 intermediate[o] = None 

691 modif += 1 

692 

693 # compute 

694 rev = [(v, k[0], k[1]) for k, v in order.items()] 

695 rev.sort() 

696 sequence = [] 

697 for _, name, node_kind in rev: 

698 if name not in nodes: 

699 continue 

700 if node_kind == 0: 

701 # It is an output which shares the same name 

702 # as a node. 

703 continue 

704 node = nodes[name] 

705 node.set_order(len(sequence)) 

706 sequence.append(node) 

707 

708 # defines where an intermediare output is not needed 

709 last_used = {} 

710 for node in sequence: 

711 for inp in node.inputs: 

712 last_used[inp] = node.order 

713 for k, ord in last_used.items(): 

714 sequence[ord].add_variable_to_clean(k) 

715 

716 results = dict(inits=inits, inputs=variables, outputs=outputs, 

717 attributes=attributes, 

718 nodes=nodes, sequence=sequence, 

719 functions=functions, 

720 intermediate=intermediate, 

721 targets=targets, 

722 ir_version=( 

723 None if is_function_proto 

724 else self.obj.ir_version), 

725 statics=statics) 

726 if len(sequence) < len(nodes): 

727 # Not all node will be executed. 

728 raise RuntimeError( # pragma: no cover 

729 "Unable to run all nodes.\n--Nodes--\n%s\n--Sequence--\n%s" 

730 "\n--Inputs--\n%s\n--Inits--\n%s\n--Statics\n%s" 

731 "" % (pprint.pformat(nodes), pprint.pformat(sequence), 

732 pprint.pformat(list(variables)), 

733 pprint.pformat(list(inits)), 

734 pprint.pformat(list(statics)))) 

735 return results 

736 

737 ############# 

738 # inference # 

739 ############# 

740 

741 def run(self, inputs, clean_right_away=False, 

742 intermediate=False, verbose=0, node_time=False, 

743 overwrite_types=None, yield_ops=None, fLOG=None, 

744 context=None, attributes=None): 

745 """ 

746 Computes the predictions for this :epkg:`onnx` graph. 

747 

748 :param inputs: inputs as dictionary or a dataframe 

749 :param clean_right_away: clean the intermediate outputs 

750 as soon as they are not needed 

751 :param intermediate: returns a dictionary of intermediate 

752 variables instead of the results only 

753 :param verbose: display information while predicting 

754 :param node_time: measure time of each node 

755 :param overwrite_types: shape inference does not work all the time, 

756 this allows to force types when building intermediate 

757 results, see @see fn select_model_inputs_outputs 

758 :param yield_ops: dictionary to overwrite the output of 

759 operator *YieldOp* 

760 :param fLOG: logging function if *verbose > 0* 

761 :param context: local variables, needed when this object is a subgraph 

762 :param attributes: this uses when this class runs a :epkg:`FunctionProto` 

763 to store the values of the attributes of the function 

764 :return: outputs as dictionary 

765 and a second dictionary of the time spent 

766 in each node if *node_time* is True 

767 

768 .. exref:: 

769 :title: Computes predictions with any runtime 

770 

771 The following example compares predictions 

772 between :epkg:`scikit-learn` and this runtime 

773 for the python runtime. 

774 

775 .. runpython:: 

776 :showcode: 

777 :warningout: DeprecationWarning 

778 

779 import numpy 

780 from sklearn.linear_model import LinearRegression 

781 from sklearn.datasets import load_iris 

782 from sklearn.model_selection import train_test_split 

783 from mlprodict.onnxrt import OnnxInference 

784 from mlprodict.onnx_conv import to_onnx 

785 

786 iris = load_iris() 

787 X, y = iris.data, iris.target 

788 X_train, X_test, y_train, _ = train_test_split(X, y) 

789 clr = LinearRegression() 

790 clr.fit(X_train, y_train) 

791 

792 exp = clr.predict(X_test[:5]) 

793 print(exp) 

794 

795 model_def = to_onnx(clr, X_train.astype(numpy.float32), 

796 target_opset=12) 

797 oinf = OnnxInference(model_def) 

798 y = oinf.run({'X': X_test[:5]}) 

799 print(y) 

800 

801 The function returns all intermediate outputs 

802 if *intermediate* is True. In case of runtime 

803 *onnxruntime1*, if intermediate is True, 

804 the first class builds all :epkg:`ONNX` cut out 

805 to keep the one output and converted into 

806 *OnnxInference*. 

807 

808 .. versionchanged:: 0.9 

809 Parameter *attributes* was added. 

810 """ 

811 def retype(col_array): 

812 if (hasattr(col_array, 'categories') and 

813 hasattr(col_array, 'from_codes')): 

814 # isinstance(col_array, pandas.Categorical): 

815 return col_array.astype(numpy.int64) 

816 return col_array 

817 

818 if hasattr(inputs, 'columns') and hasattr(inputs, 'iloc'): 

819 # == isinstance(inputs, pandas.DataFrame) 

820 inputs = OrderedDict(( 

821 name, retype(numpy.expand_dims(inputs[name].values, axis=1))) 

822 for name in inputs.columns) 

823 if intermediate: 

824 if self.inplace: 

825 raise RuntimeError( # pragma: no cover 

826 "inplace must be False if intermediate is True, a container " 

827 "might be used by several nodes.") 

828 return self._run(inputs, clean_right_away=False, # pylint: disable=E1123 

829 intermediate=intermediate, 

830 verbose=verbose, node_time=node_time, 

831 overwrite_types=overwrite_types, 

832 yield_ops=yield_ops, fLOG=fLOG, 

833 context=context, attributes=attributes) 

834 if overwrite_types is not None: 

835 raise RuntimeError( # pragma: no cover 

836 "overwrite_types is not used if intermediate is False.") 

837 return self._run(inputs, clean_right_away=False, # pylint: disable=E1123 

838 intermediate=intermediate, 

839 verbose=verbose, node_time=node_time, 

840 yield_ops=yield_ops, fLOG=fLOG, 

841 context=context, attributes=attributes) 

842 

843 def run2onnx(self, inputs, verbose=0, fLOG=None, 

844 as_parameter=True, suffix='_DBG', 

845 param_name=None, node_type='DEBUG', 

846 domain='DEBUG', domain_opset=1, 

847 attributes=None): 

848 """ 

849 Executes the graphs with the given inputs, then adds the intermediate 

850 results into ONNX nodes in the original graph. Once saved, it can be 

851 looked with a tool such as :epkg:`netron`. 

852 

853 :param inputs: inputs as dictionary or a dataframe 

854 :param verbose: display information while predicting 

855 :param fLOG: logging function if *verbose > 0* 

856 :param as_parameter: add new nodes with results as one parameter 

857 (True) or as initializer (False) 

858 :param suffix: suffix to add to new results 

859 :param param_name: name of the parameter to add 

860 (by default the result name), it can be a function 

861 `param_name(reult_name) -> parameter_name` 

862 :param node_type: type of the new node 

863 :param domain: domain the new node 

864 :param domain_opset: opset for *domain* 

865 :param attributes: values for attributes if this class runs a 

866 :epkg:`FunctionProto` 

867 :return: outputs as dictionary 

868 and the onnx graph with new nodes 

869 

870 The following example shows how to use it. 

871 

872 .. gdot:: 

873 :script: DOT-SECTION 

874 

875 from sklearn.linear_model import LinearRegression 

876 from sklearn.datasets import load_iris 

877 from mlprodict.onnxrt import OnnxInference 

878 import numpy 

879 

880 iris = load_iris() 

881 X = iris.data[:, :2] 

882 y = iris.target 

883 lr = LinearRegression() 

884 lr.fit(X, y) 

885 

886 from mlprodict.onnx_conv import to_onnx 

887 model_onnx = to_onnx(lr, X.astype(numpy.float32)) 

888 oinf = OnnxInference(model_onnx, inplace=False) 

889 

890 model_onnx_debug = oinf.run2onnx({'X': X[:3].astype(numpy.float32)}) 

891 oinf_debug = OnnxInference(model_onnx_debug[1]) 

892 

893 print("DOT-SECTION", oinf_debug.to_dot()) 

894 

895 .. versionadded:: 0.7 

896 """ 

897 intermediate = self.run(inputs, verbose=verbose, fLOG=fLOG, 

898 intermediate=True, attributes=attributes) 

899 for name in self.input_names: 

900 del intermediate[name] 

901 new_onx = insert_results_into_onnx( 

902 self.obj, intermediate, as_parameter=as_parameter, 

903 suffix=suffix, param_name=param_name, node_type=node_type, 

904 domain=domain, domain_opset=domain_opset) 

905 return intermediate, new_onx 

906 

907 def display_sequence(self, verbose=1): 

908 """ 

909 Shows the sequence of nodes to run if ``runtime=='python'``. 

910 """ 

911 rows = [] 

912 rows.append(f"#node: {len(self.sequence_)}") 

913 for i, node in enumerate(self.sequence_): 

914 if verbose >= 1: 

915 rows.append(f"{i}: {str(node)}") 

916 return "\n".join(rows) 

917 

918 def _run_sequence_runtime(self, inputs, clean_right_away=False, 

919 intermediate=False, verbose=0, node_time=False, 

920 overwrite_types=None, yield_ops=None, 

921 fLOG=None, context=None, attributes=None): 

922 if overwrite_types is not None: 

923 raise NotImplementedError( # pragma: no cover 

924 "overwrite_types != None not implemented.") 

925 if clean_right_away: 

926 raise NotImplementedError( # pragma: no cover 

927 "clean_right_away=true not implemented.") 

928 

929 if node_time: 

930 mtime = [] 

931 if verbose != 0: 

932 printed = set() 

933 

934 if context is not None: 

935 for k in context: 

936 self.global_index(k) 

937 

938 if hasattr(self, "_values_init"): 

939 values = self._values_init.copy() # pylint: disable=E0203 

940 if context is not None: 

941 for k, v in context.items(): 

942 values[self._global_index[k]] = v 

943 else: 

944 values = [None] * len(self._global_index) 

945 if verbose >= 1 and fLOG is not None: 

946 if context is not None: 

947 for k, v in context.items(): 

948 if v is None: 

949 continue 

950 values[self._global_index[k]] = v 

951 if verbose < 3: 

952 fLOG( # pragma: no cover 

953 "+kI='{}': {} (dtype={} min={} max={})".format( 

954 k, v.shape, v.dtype, numpy_min(v), numpy_max(v))) 

955 else: 

956 fLOG( # pragma: no cover 

957 "+kI='{}': {} (dtype={} min={} max={}\n{}".format( 

958 k, v.shape, v.dtype, numpy_min(v), numpy_max(v), v)) 

959 for k, v in self.inits_.items(): 

960 values[self._global_index[k]] = v['value'] 

961 if verbose < 3: 

962 fLOG("+ki='{}': {} (dtype={} min={} max={})".format( 

963 k, v['value'].shape, v['value'].dtype, 

964 numpy_min(v['value']), numpy_max(v['value']))) 

965 else: 

966 fLOG("+ki='{}': {} (dtype={} min={} max={}\n{}".format( 

967 k, v['value'].shape, v['value'].dtype, 

968 numpy_min(v['value']), numpy_max(v['value']), 

969 v['value'])) 

970 printed.add(k) 

971 else: 

972 if context is not None: 

973 for k, v in context.items(): 

974 values[self._global_index[k]] = v 

975 for k, v in self.inits_.items(): 

976 values[self._global_index[k]] = v['value'] 

977 # stores the array to skip initialing a second time 

978 if verbose == 0 or fLOG is None: 

979 self._values_init = values.copy() 

980 

981 for name, value in inputs.items(): 

982 values[self._global_index[name]] = value 

983 

984 if verbose == 0 or fLOG is None: 

985 if node_time: 

986 for i, node in enumerate(self.sequence_): 

987 if yield_ops is not None and node.onnx_node.op_type == 'YieldOp': 

988 out = node.onnx_node.output[0] 

989 if out in yield_ops: 

990 values[out] = yield_ops[out] 

991 continue 

992 raise RuntimeError( # pragma: no cover 

993 "YieldOp output %r could not be found in " 

994 "yield_ops: %r (node=%r)." % ( 

995 out, list(sorted(yield_ops)), node.onnx_node)) 

996 t = perf_counter() 

997 node.run(values, attributes=attributes) 

998 t2 = perf_counter() 

999 mtime.append(dict(i=i, name=node.onnx_node.name, 

1000 op_type=node.onnx_node.op_type, 

1001 time=t2 - t)) 

1002 else: 

1003 for node in self.sequence_: 

1004 node.run(values, attributes=attributes) 

1005 else: 

1006 def dispsimple(arr): 

1007 if hasattr(arr, 'shape'): 

1008 if len(arr.shape) <= 1: 

1009 threshold = 8 

1010 else: 

1011 threshold = min( 

1012 50, min(50 // max(arr.shape[1], 1), 8) * arr.shape[1]) 

1013 if hasattr(arr, 'todense'): 

1014 fLOG( # pragma: no cover 

1015 numpy.array2string(arr.todense(), max_line_width=120, 

1016 suppress_small=True, threshold=threshold)) 

1017 else: 

1018 fLOG(numpy.array2string(arr, max_line_width=120, 

1019 suppress_small=True, 

1020 threshold=threshold)) 

1021 else: # pragma: no cover 

1022 s = str(arr) 

1023 if len(s) > 50: 

1024 s = s[:50] + "..." 

1025 fLOG(s) 

1026 

1027 if verbose >= 2: 

1028 for k in sorted(self._global_index): 

1029 if values[self._global_index[k]] is None: 

1030 continue 

1031 obj = values[self._global_index[k]] 

1032 if k not in printed: 

1033 printed.add(k) 

1034 if hasattr(obj, 'shape'): 

1035 fLOG("-kv='{}' shape={} dtype={} min={} max={}{}".format( 

1036 k, obj.shape, obj.dtype, numpy_min(obj), 

1037 numpy_max(obj), 

1038 ' (sparse)' if isinstance(obj, coo_matrix) else '')) 

1039 elif (isinstance(obj, list) and len(obj) > 0 and 

1040 not isinstance(obj[0], dict)): # pragma: no cover 

1041 fLOG(f"-kv='{k}' list len={len(obj)}") 

1042 if verbose >= 3 and len(obj) > 0: 

1043 fLOG(f"first={obj[0]} last={obj[-1]}") 

1044 else: # pragma: no cover 

1045 fLOG(f"-kv='{k}' type={type(obj)}") 

1046 

1047 keys = set(k for k in range(len(values)) if values[k] is not None) 

1048 if verbose >= 1: 

1049 fLOG("-- OnnxInference: run {} nodes with {} inputs".format( 

1050 len(self.sequence_), len(inputs))) 

1051 for i, node in enumerate(self.sequence_): 

1052 if verbose >= 1: 

1053 fLOG(node) 

1054 if yield_ops is not None and node.onnx_node.op_type == 'YieldOp': 

1055 out = node.onnx_node.output[0] 

1056 if out in yield_ops: 

1057 fLOG(f"+yo={out!r}") 

1058 values[node.outputs_indices[0]] = yield_ops[out] 

1059 else: 

1060 raise RuntimeError( # pragma: no cover 

1061 "YieldOp output %r could not be found in " 

1062 "yield_ops: %r (node=%r)." % ( 

1063 out, list(sorted(yield_ops)), node.onnx_node)) 

1064 elif node_time: 

1065 t = perf_counter() 

1066 node.run(values, attributes=attributes) 

1067 t2 = perf_counter() 

1068 mtime.append(dict(i=i, name=node.onnx_node.name, 

1069 op_type=node.onnx_node.op_type, 

1070 time=t2 - t)) 

1071 else: 

1072 node.run(values, verbose=verbose, fLOG=fLOG, 

1073 attributes=attributes) 

1074 added = 0 

1075 for k in range(len(values)): # pylint: disable=C0200 

1076 if values[k] is None: 

1077 continue 

1078 if k not in keys and k not in printed: 

1079 added += 1 

1080 printed.add(k) 

1081 name = list( 

1082 name for name in self._global_index # pylint: disable=C0206 

1083 if self._global_index[name] == k) 

1084 if verbose >= 1: 

1085 if isinstance(values[k], (numpy.ndarray, coo_matrix)): 

1086 name = name[0] 

1087 mini = numpy_min(values[k]) 

1088 maxi = numpy_max(values[k]) 

1089 fLOG("+kr{}'{}': {} (dtype={} min={} max={}{})".format( 

1090 "=" if len(values[k].shape) == 0 or min( 

1091 values[k].shape) > 0 else "*", 

1092 name, values[k].shape, values[k].dtype, 

1093 mini, maxi, 

1094 ' sparse' if isinstance(values[k], coo_matrix) else '')) 

1095 if verbose >= 3: 

1096 dispsimple(values[k]) 

1097 else: 

1098 fLOG(f"+kr='{name}': {type(values[k])}") 

1099 if verbose >= 3: # pragma: no cover 

1100 dispsimple(values[k]) 

1101 if added == 0 and verbose >= 1: 

1102 fLOG("? no new result") # pragma: no cover 

1103 

1104 if intermediate: 

1105 values = [(v, k, values[v]) for k, v in self._global_index.items()] 

1106 values.sort() 

1107 values = OrderedDict((k, v) for _, k, v in values) 

1108 return (values, mtime) if node_time else values 

1109 

1110 try: 

1111 res = {k: values[self._global_index[k]] for k in self.outputs_} 

1112 except KeyError as e: # pragma: no cover 

1113 raise RuntimeError("Unable to find one output [{}]\n in [{}]" 

1114 ".".format(", ".join(sorted(self.outputs_)), 

1115 ", ".join(sorted(values)))) from e 

1116 if verbose != 0: 

1117 # check input and output have the expected type 

1118 self._validate_outputs(res, verbose=verbose, fLOG=fLOG) 

1119 return (res, mtime) if node_time else res 

1120 

1121 def _validate_outputs(self, res, verbose=0, fLOG=None): 

1122 """ 

1123 Checks the output have the expected type. 

1124 The function returns the list of mismatches. 

1125 

1126 :param res: results in a dictionary 

1127 :param verbose: verbosity 

1128 :param fLOG: logging function 

1129 :return: dictionary 

1130 """ 

1131 if verbose >= 2: 

1132 fLOG(f'[VALIDATE] type {type(self.obj)!r}') 

1133 if isinstance(self.obj, ModelProto): 

1134 from mlprodict.onnx_tools.onnx2py_helper import ( 

1135 guess_proto_dtype, get_tensor_elem_type, get_tensor_shape) 

1136 outputs = {o.name: o for o in self.obj.graph.output} 

1137 rows = [] 

1138 mis = {} 

1139 for k, v in res.items(): 

1140 if k not in outputs: 

1141 rows.append( 

1142 f"Result {k!r} cannot be found in {set(outputs)!r}.") 

1143 continue 

1144 try: 

1145 expected = get_tensor_elem_type(outputs[k]) 

1146 except TypeError: 

1147 expected = None 

1148 shape = get_tensor_shape(outputs[k]) 

1149 if v is None: 

1150 rows.append( 

1151 f"Result {k!r} is None instead of {expected!r}.") 

1152 continue 

1153 dtype = guess_proto_dtype(v.dtype) 

1154 if expected != dtype: 

1155 mis[k] = f"dtype {dtype!r} != {expected!r}" 

1156 rows.append( 

1157 "Result %r have unexpected element type %r " 

1158 "instead of %r." % ( 

1159 k, dtype, expected)) 

1160 if shape is None or len(shape) == 0: 

1161 continue 

1162 if len(shape) != len(v.shape): 

1163 mis[k] = f"shape {v.shape!r} != {shape!r}" 

1164 rows.append( 

1165 "Result %r have unexpected shape length %r " 

1166 "instead of %r." % ( 

1167 k, v.shape, shape)) 

1168 continue 

1169 for a, b in zip(v.shape, shape): 

1170 if b is None or isinstance(b, str): 

1171 continue 

1172 if a != b: 

1173 mis[k] = f"shape {v.shape!r} != {shape!r}" 

1174 rows.append( 

1175 "Result %r have unexpected shape %r " 

1176 "instead of %r." % ( 

1177 k, v.shape, shape)) 

1178 break 

1179 if len(rows) > 0: 

1180 if verbose < 0: 

1181 raise RuntimeError( # pragma: no cover 

1182 "Validation failed.\n- %s" % "\n- ".join(rows)) 

1183 else: 

1184 fLOG("[VALIDATE] validation failed.\n- %s" % 

1185 "\n- ".join(rows)) 

1186 if verbose >= 2: # pragma: no cover 

1187 fLOG(f'[VALIDATE] mis={mis!r}') 

1188 return mis 

1189 

1190 if isinstance(self.obj, FunctionProto): 

1191 outputs = set(self.obj.output) 

1192 got = set(res) 

1193 if got != outputs: 

1194 if verbose < 0: # pragma: no cover 

1195 raise RuntimeError( 

1196 "Unexpected mismatch between outputs %r and " 

1197 "expected outputs %r." % (got, outputs)) 

1198 else: # pragma: no cover 

1199 fLOG( 

1200 f"CHECK: expected outputs {outputs!r} != outputs {got!r}") 

1201 mis = {k: None for k in set(got) - got & outputs} 

1202 if verbose >= 2: 

1203 fLOG(f'[VALIDATE] mis={mis!r}') 

1204 return mis 

1205 if verbose >= 2: 

1206 fLOG('[VALIDATE] mis={}') 

1207 return {} 

1208 

1209 raise TypeError( # pragma: no cover 

1210 f"Unexpected type {type(self.obj)!r} for self.obj.") 

1211 

1212 def build_intermediate(self, outputs=None, verbose=0, overwrite_types=None, 

1213 fLOG=None): 

1214 """ 

1215 Builds every possible :epkg:`ONNX` file 

1216 which computes one specific intermediate output 

1217 from the inputs. 

1218 

1219 :param outputs: subsets of outputs to get, 

1220 None to get all outputs, 

1221 :param overwrite_types: shape inference does not work all the time, 

1222 this allows to force types when building intermediate 

1223 results, see @see fn select_model_inputs_outputs 

1224 :param verbose: displays intermediate information 

1225 :param fLOG: logging function 

1226 :return: :epkg:`*py:collections:OrderedDict` 

1227 

1228 .. versionchanged: 0.6 

1229 """ 

1230 if verbose > 0: 

1231 fLOG('[build_intermediate] BEGIN.') # pragma: no cover 

1232 if outputs is not None: 

1233 if isinstance(outputs, str): 

1234 outputs = [outputs] 

1235 if not isinstance(outputs, set): 

1236 outputs = set(outputs) 

1237 ord = OrderedDict() 

1238 for output in enumerate_model_node_outputs(self.obj, order=False): 

1239 if outputs is not None and output not in outputs: 

1240 continue 

1241 subonx = select_model_inputs_outputs( 

1242 self.obj, outputs=output, infer_shapes=True, 

1243 overwrite=overwrite_types) 

1244 subonx = onnx_remove_node_unused(subonx) 

1245 if verbose > 0: 

1246 fLOG( # pragma: no cover 

1247 f'[build_intermediate] + {output}') 

1248 ord[output] = OnnxInference(subonx, runtime=self.runtime, 

1249 skip_run=self.skip_run, 

1250 runtime_options=self.runtime_options, 

1251 inplace=self.inplace, 

1252 input_inplace=self.input_inplace) 

1253 if verbose > 0: 

1254 fLOG( # pragma: no cover 

1255 '[build_intermediate] END.') 

1256 return ord 

1257 

1258 def _run_whole_runtime(self, inputs, clean_right_away=False, 

1259 intermediate=False, verbose=0, node_time=False, 

1260 overwrite_types=None, yield_ops=None, fLOG=None, 

1261 context=None, attributes=None): 

1262 # node_time is unused, context is unused 

1263 if clean_right_away: 

1264 raise RuntimeError( # pragma: no cover 

1265 "clean_right_away=true does not work with this runtime.") 

1266 if intermediate: 

1267 if hasattr(self, "intermediate_onnx_inference_"): 

1268 inter_run = self.intermediate_onnx_inference_ # pylint: disable=E0203 

1269 else: 

1270 if verbose > 0: 

1271 fLOG( # pragma: no cover 

1272 "-- OnnxInference: build intermediate") 

1273 inter_run = self.build_intermediate( 

1274 verbose=verbose, fLOG=fLOG, overwrite_types=overwrite_types) 

1275 self.intermediate_onnx_inference_ = inter_run 

1276 graph = self.to_sequence() 

1277 self.inits_ = graph['inits'] 

1278 

1279 if verbose >= 1: 

1280 fLOG( # pragma: no cover 

1281 "-- OnnxInference: run {} nodes".format( 

1282 len(self.intermediate_onnx_inference_))) 

1283 values = OrderedDict(inputs) 

1284 for k, v in self.inits_.items(): 

1285 values[k] = v['value'] 

1286 if verbose >= 2: # pragma: no cover 

1287 for k in sorted(values): 

1288 fLOG( 

1289 f"-k='{k}' shape={values[k].shape} dtype={values[k].dtype}") 

1290 for node, oinf in self.intermediate_onnx_inference_.items(): 

1291 if verbose >= 4: # pragma: no cover 

1292 fLOG(f'[intermediate] {node!r}') 

1293 if verbose >= 5: # pragma: no cover 

1294 fLOG(oinf.obj) 

1295 if yield_ops is not None and node.onnx_node.op_type == 'YieldOp': 

1296 out = node.onnx_node.output[0] 

1297 if out in yield_ops: 

1298 values[out] = yield_ops[out] 

1299 continue 

1300 raise RuntimeError( # pragma: no cover 

1301 "YieldOp output %r could not be found in " 

1302 "yield_ops: %r (node=%r)." % ( 

1303 out, list(sorted(yield_ops)), node.onnx_node)) 

1304 output = oinf.run(inputs, attributes=attributes)[node] 

1305 values[node] = output 

1306 if verbose >= 1: 

1307 if verbose >= 4: # pragma: no cover 

1308 for k, v in inputs.items(): 

1309 if isinstance(output, numpy.ndarray): 

1310 fLOG("-i='{}': {} (dtype={}) {}".format( 

1311 k, v.shape, v.dtype, v.ravel().tolist())) 

1312 else: 

1313 fLOG( 

1314 f"-i='{k}': {v.shape} (dtype={v.dtype}) - ?") 

1315 if isinstance(output, numpy.ndarray): 

1316 fLOG("+k='{}': {} (dtype={})".format( # pragma: no cover 

1317 node, output.shape, output.dtype)) 

1318 if verbose >= 2: # pragma: no cover 

1319 fLOG(output) 

1320 else: 

1321 fLOG("+k='{}': {}".format( # pragma: no cover 

1322 node, type(output))) 

1323 if verbose >= 2: # pragma: no cover 

1324 fLOG(output) 

1325 return values 

1326 

1327 if verbose != 0: 

1328 warnings.warn( 

1329 "verbose option not implemented if runtime is 'onnxruntime1'") 

1330 res = self._whole.run(inputs) 

1331 return {k: v for k, v in zip(self.outputs_, res)} 

1332 

1333 def __getitem__(self, item): 

1334 """ 

1335 Returns the ONNX verions of a node. 

1336 """ 

1337 if isinstance(item, tuple): 

1338 node_name, att_name = item 

1339 else: 

1340 node_name = item 

1341 att_name = None 

1342 

1343 node_ = None 

1344 for node in self.obj.graph.node: 

1345 if node.name == node_name: 

1346 node_ = node 

1347 break 

1348 

1349 if node_ is None: 

1350 raise IndexError( # pragma: no cover 

1351 "Unable to get node name '{}'.\n{}".format( 

1352 node_name, "\n".join(node.name for node in self.obj.graph.node))) 

1353 

1354 if att_name is None: 

1355 return node_ 

1356 

1357 for att in node_.attribute: 

1358 if att.name == att_name: 

1359 return att 

1360 

1361 raise IndexError( # pragma: no cover 

1362 f"Unable to find attribute '{att_name}' from node '{node_name}'.") 

1363 

1364 def switch_initializers_dtype(self, model=None, 

1365 dtype_in=numpy.float32, 

1366 dtype_out=numpy.float64): 

1367 """ 

1368 Switches all initializers to ``numpy.float64``. If *model* 

1369 is None, a simple cast is done. Otherwise, the function assumes 

1370 the model is a :epkg:`scikit-learn` pipeline. 

1371 This only works if the runtime is ``'python'``. 

1372 

1373 @param model :epkg:`scikit-learn` model or None 

1374 @param dtype_in previous type 

1375 @param dtype_out next type 

1376 @return done operations 

1377 """ 

1378 from ..onnx_tools.optim.sklearn_helper import enumerate_fitted_arrays, pairwise_array_distances 

1379 

1380 if self.runtime != 'python': # pragma: no cover 

1381 raise RuntimeError("Initializers can be casted only if the " 

1382 "runtime is 'python' not '{}'.".format(self.runtime)) 

1383 

1384 if hasattr(self, '_values_init'): 

1385 del self._values_init 

1386 

1387 # first pass: simple cast 

1388 done = [] 

1389 initializer = self.inits_ 

1390 for k, v in initializer.items(): 

1391 if isinstance(v['value'], numpy.ndarray): 

1392 if v['value'].dtype == dtype_in: 

1393 v['value'] = v['value'].astype(dtype_out) 

1394 done.append(("pass1", "+", "init", k, v['value'])) 

1395 else: 

1396 done.append(("pass1", "-", "init", k, 

1397 v['value'])) # pragma: no cover 

1398 for k, v in self.graph_['nodes'].items(): 

1399 res = v.switch_initializers_dtype(dtype_in=dtype_in, 

1400 dtype_out=dtype_out) 

1401 for r in res: 

1402 done.append(("pass1", "node", k) + r) 

1403 for k, v in self.graph_['intermediate'].items(): 

1404 if v is None: 

1405 continue 

1406 res = v.switch_initializers_dtype(dtype_in=dtype_in, 

1407 dtype_out=dtype_out) 

1408 for r in res: 

1409 done.append(("pass1", "sub", k) + r) 

1410 

1411 if model is not None: 

1412 # Second pass, we compare all arrays from the model 

1413 # to the arrays in the converted models. 

1414 def dist(a): 

1415 cast = a.astype(dtype_in).astype(dtype_out) 

1416 d = pairwise_array_distances([cast], [a])[0, 0] 

1417 return d 

1418 

1419 done_ = [(c, c[-1]) for c in done] 

1420 moda_ = [(a, a[-2][-1]) for a in enumerate_fitted_arrays(model) 

1421 if dist(a[-2][-1]) > 0] 

1422 aconv = [_[-1] for _ in done_] 

1423 amoda = [_[-1] for _ in moda_] 

1424 distances = pairwise_array_distances(aconv, amoda) 

1425 

1426 for i in range(distances.shape[0]): 

1427 j = numpy.argmin(distances[i]) 

1428 d = distances[i, j] 

1429 if d < 0.1: 

1430 numpy.copyto(aconv[i], amoda[j]) 

1431 done.append(("pass2", d) + done_[i][0]) 

1432 

1433 return done 

1434 

1435 def _set_shape_inference_runtime(self): 

1436 """ 

1437 Set shapes based on shape inference 

1438 relying on the runtime. 

1439 The values are stored in every node. 

1440 """ 

1441 try: 

1442 rt = OnnxShapeInference(self.obj) 

1443 except (ShapeInferenceMissing, NotImplementedShapeInferenceError, 

1444 ShapeInferenceDimensionError, NotImplementedError): 

1445 # an operator is missing, shape cannot be computed. 

1446 return {name: None for name in self.output_names} 

1447 except KeyError: 

1448 # subgraphs or functions are not yet handled. 

1449 # it should be removed later. 

1450 return {name: None for name in self.output_names} 

1451 except NameError: 

1452 # loop subgraphs or function are not yet handled. 

1453 # they may overwrite results. 

1454 return {name: None for name in self.output_names} 

1455 except (ShapeInferenceException, RuntimeError, IndexError) as e: 

1456 raise ShapeInferenceException( # pragma: no cover 

1457 f"Unable to run ShapeInference for\n{str(self.obj)}") from e 

1458 out = rt.run() 

1459 values = out.get() 

1460 return values 

1461 

1462 def infer_shapes(self): 

1463 """ 

1464 Computes expected shapes. 

1465 

1466 :return: dictionary of shapes 

1467 """ 

1468 return self._set_shape_inference_runtime() 

1469 

1470 def _guess_inplace(self, input_inplace=False): 

1471 """ 

1472 Looks into every node of the graph to see 

1473 if there is a way to do the computation 

1474 inplace. By default (*input_inplace=False*), 

1475 the function assumes inputs cannot be modified 

1476 so the first node cannot do inplace computation. 

1477 This function only works with the python runtime. 

1478 

1479 @param input_inplace the computation is allowed 

1480 to overwrite the input 

1481 

1482 This function checks that one node is used only 

1483 once and then can be modified by the next node. 

1484 Nodes `A`, `C` can be overwritten by the computation. 

1485 Node `B` cannot as it is used by two nodes. 

1486 

1487 .. blockdiag:: 

1488 

1489 diagram { 

1490 A -> B -> C -> E; 

1491 B -> D; 

1492 } 

1493 

1494 It does not handle specific case such node `B` being 

1495 overwritten by node `C` but without changing its shape 

1496 and node `D` only needs the shape of `B`. Then `B` could 

1497 be overwritten as well. 

1498 """ 

1499 forbid = {} 

1500 values = OrderedDict() 

1501 for k in self.statics_: 

1502 values[k] = dict(inplace=False, to=[], fr=[]) 

1503 for k in self.inputs_: 

1504 values[k] = dict(inplace=input_inplace, to=[], fr=[]) 

1505 for k in self.inits_: 

1506 values[k] = dict(inplace=False, to=[], fr=[]) 

1507 for node in self.sequence_: 

1508 for n in node.inputs: 

1509 if n == '': 

1510 continue 

1511 values[n]['to'].append(node) 

1512 for n in node.outputs: 

1513 if node.op_type == 'Constant': 

1514 # We cannot modify constant. 

1515 forbid[n] = node 

1516 if n not in values: 

1517 values[n] = dict(inplace=None, to=[], fr=[]) 

1518 values[n]['fr'].append(node) 

1519 

1520 # checks the number of outputs 

1521 outputs = set(self.output_names) 

1522 modif = 1 

1523 while modif > 0: 

1524 modif = 0 

1525 for n, v in values.items(): 

1526 if v['inplace'] is not None: 

1527 continue 

1528 if n in forbid: 

1529 continue 

1530 if len(v['to']) == 1: 

1531 v['inplace'] = True 

1532 modif += 1 

1533 

1534 # convey the information to every node 

1535 inplaces = {} 

1536 for n, v in values.items(): 

1537 if v['inplace']: 

1538 inplaces[n] = v 

1539 for node in v['to']: 

1540 if n in outputs: 

1541 continue 

1542 node.enable_inplace_compute(n) 

1543 

1544 return inplaces 

1545 

1546 def _build_compile_run(self, debug=False): 

1547 """ 

1548 Rewrite the run function in python, 

1549 compiles it, and adds it as a method. 

1550 

1551 @param debug insert debugging code 

1552 @return method name, callable object 

1553 

1554 .. exref:: 

1555 :title: Run a model with runtime 'python_compiled' 

1556 

1557 The following code trains a model and compute 

1558 the predictions with runtime ``'python_compiled'``. 

1559 It converts the onnx graph into a python function 

1560 which calls every operator. Its code is printed 

1561 below. 

1562 

1563 .. runpython:: 

1564 :showcode: 

1565 :warningout: DeprecationWarning 

1566 

1567 import numpy 

1568 from sklearn.datasets import load_iris 

1569 from sklearn.model_selection import train_test_split 

1570 from sklearn.ensemble import AdaBoostClassifier 

1571 from sklearn.tree import DecisionTreeClassifier 

1572 from mlprodict.onnx_conv import to_onnx 

1573 from mlprodict.onnxrt import OnnxInference 

1574 

1575 iris = load_iris() 

1576 X, y = iris.data, iris.target 

1577 X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11) 

1578 y_train = y_train.astype(numpy.float32) 

1579 clr = AdaBoostClassifier( 

1580 base_estimator=DecisionTreeClassifier(max_depth=3), 

1581 n_estimators=3) 

1582 clr.fit(X_train, y_train) 

1583 

1584 model_def = to_onnx(clr, X_train.astype(numpy.float32), 

1585 target_opset=12) 

1586 

1587 oinf2 = OnnxInference(model_def, runtime='python_compiled') 

1588 print(oinf2.run({'X': X_test[:5]})) 

1589 

1590 # prints out the python function equivalent 

1591 # to the onnx graph 

1592 print(oinf2) 

1593 """ 

1594 

1595 def clean_name(name): 

1596 res = name.replace(":", "_").replace('.', '_').replace('/', '_') 

1597 if iskeyword(res): 

1598 res += '_' 

1599 return res 

1600 

1601 # inits 

1602 inputs = self.input_names 

1603 code = [ 

1604 'def compiled_run(dict_inputs, yield_ops=None, context=None, attributes=None):'] 

1605 code.append(" if yield_ops is not None:") 

1606 code.append(" raise NotImplementedError" 

1607 "('yields_ops should be None.')") 

1608 if debug: 

1609 code.append(" printed = {}") 

1610 

1611 context = {} 

1612 

1613 # static variables 

1614 for k in sorted(self.statics_): 

1615 code.append(f" # static: {k}") 

1616 code.append(f" {clean_name(k)} = dict_inputs['{k}']") 

1617 if debug: # pragma: no cover 

1618 code.append( 

1619 f" debug_print('i.{clean_name(k)}', {k}, printed)") 

1620 

1621 # initializers 

1622 for k, v in sorted(self.inits_.items()): 

1623 if k.startswith("_OPT_"): 

1624 raise RuntimeError( # pragma: no cover 

1625 "The runtime cannot handle any constant name " 

1626 "starting with '_OPT_': '{}'.".format(k)) 

1627 if k in inputs: 

1628 context["_OPT_" + clean_name(k)] = v['value'] 

1629 code.append(f" # init: _OPT_{clean_name(k)} ({k})") 

1630 if debug: # pragma: no cover 

1631 code.append( 

1632 " debug_print('c.[_OPT_{0}]', _OPT_{1}, printed)".format( 

1633 clean_name(k), k)) 

1634 else: 

1635 context[clean_name(k)] = v['value'] 

1636 code.append(f" # init: {clean_name(k)} ({k})") 

1637 if debug: 

1638 code.append( 

1639 f" debug_print('c.[{clean_name(k)}]', {k}, printed)") 

1640 

1641 # method signature 

1642 code.append(" # inputs") 

1643 for inp in inputs: 

1644 if '_OPT_' + inp in context: 

1645 # optional inputs 

1646 code.append( 

1647 " {0} = dict_inputs.get('{1}', _OPT_{0})".format( 

1648 clean_name(inp), inp)) 

1649 else: 

1650 code.append(f" {clean_name(inp)} = dict_inputs['{inp}']") 

1651 if debug: 

1652 code.append( 

1653 f" debug_print('i.{clean_name(inp)}', {inp}, printed)") 

1654 

1655 # code 

1656 for i, node in enumerate(self.sequence_): 

1657 name = f"n{i}_{node.ops_.__class__.__name__.lower()}" 

1658 if node.ops_ is None: 

1659 context[name] = node.function_ 

1660 # The code of the function should be added but only once. 

1661 raise NotImplementedError( 

1662 "Not implemented for models including functions.") 

1663 else: 

1664 context[name] = node.ops_._run 

1665 if (node.ops_.__class__.__name__ == 'Loop' and 

1666 node.ops_.need_context()): 

1667 # Adding context. 

1668 ctx = "{%s}" % ", ".join( 

1669 "'%s': %s" % (n, n) for n in node.ops_.additional_inputs) 

1670 code.append(' ({1}, ) = {2}({0}, context={3})'.format( 

1671 ', '.join(map(clean_name, node.inputs)), 

1672 ', '.join(map(clean_name, node.outputs)), 

1673 name, ctx)) 

1674 else: 

1675 code.append(' ({1}, ) = {2}({0})'.format( 

1676 ', '.join(map(clean_name, node.inputs)), 

1677 ', '.join(map(clean_name, node.outputs)), 

1678 name)) 

1679 if debug: 

1680 code.append(f" print('''# {code[-1][4:]}''')") 

1681 for o in node.outputs: 

1682 code.append( 

1683 f" debug_print('o.{clean_name(o)}', {o}, printed)") 

1684 

1685 # return 

1686 code.append(' return {') 

1687 for out in self.output_names: 

1688 code.append(f" '{out}': {clean_name(out)},") 

1689 code.append(' }') 

1690 final_code = '\n'.join(code) 

1691 

1692 # compile the outcome 

1693 context['self'] = self 

1694 try: 

1695 obj = compile(final_code, "<string>", 'exec') 

1696 except SyntaxError as e: # pragma: no cover 

1697 raise SyntaxError( 

1698 f"Unable to compile\n#####\n{final_code}") from e 

1699 fcts_obj = [_ for _ in obj.co_consts 

1700 if _ is not None and not isinstance(_, (bool, str, int))] 

1701 fct = make_callable( 

1702 "compiled_run", fcts_obj[0], final_code, context, debug) 

1703 

1704 # end 

1705 return "compiled_run", fct, final_code 

1706 

1707 def reduce_size(self, pickable=False): 

1708 """ 

1709 Reduces the memory footprint as much as possible. 

1710 

1711 @param pickable keeps a pickle object? 

1712 """ 

1713 import gc 

1714 del self.graph_ 

1715 if not pickable: 

1716 del self.obj 

1717 if self.runtime in ('python_compiled', 'python_compiled_debug'): 

1718 del self.sequence_ 

1719 gc.collect() 

1720 

1721 def get_profiling(self, as_df=False): 

1722 """ 

1723 Returns the profiling after a couple of execution. 

1724 

1725 :param as_df: return the results as a dataframe (True) 

1726 :return: dataframe or list of dictionaries 

1727 

1728 .. versionadded:: 0.6 

1729 """ 

1730 if (self.runtime_options is None or 

1731 not self.runtime_options.get('enable_profiling', False)): 

1732 raise RuntimeError( 

1733 "Profiling is available if options 'enable_profiling' " 

1734 "is set to true in 'runtime_options' but is %r." % self.runtime_options) 

1735 prof = None 

1736 if hasattr(self, '_whole'): 

1737 prof = self._whole.get_profiling() 

1738 if prof is None: 

1739 raise NotImplementedError( # pragma: no cover 

1740 "profiling is only implemented for runtime 'onnxruntime1'.") 

1741 if as_df: 

1742 import pandas 

1743 return pandas.DataFrame(prof) 

1744 return prof # pragma: no cover 

1745 

1746 def get_execution_order(self): 

1747 """ 

1748 This function returns a dictionary `{(kind, name): (order, op)}`, 

1749 *name* can be a node name or a result name. In that case, 

1750 it gets the execution order than the node which created it. 

1751 The function returns None if the order is not available 

1752 (the selected runtime does not return it). *kind* is either 

1753 `'node'` or `'node'`. If two nodes have the same name, 

1754 returned order is the last one. Initializers gets an execution 

1755 order equal to -1, inputs to 0, all others results are >= 1. 

1756 

1757 .. versionadded:: 0.7 

1758 """ 

1759 if not hasattr(self, "sequence_"): 

1760 return None 

1761 

1762 res = {} 

1763 for k, v in self.inits_.items(): 

1764 res['res', k] = (-1, v) 

1765 for name, shape in self.input_names_shapes: 

1766 res['res', name] = (0, shape) 

1767 

1768 for i, node in enumerate(self.sequence_): 

1769 key = ('node', node.onnx_node.name) 

1770 res[key] = (i + 1, node) 

1771 for out in node.onnx_node.output: 

1772 key = ('res', out) 

1773 if key in res: 

1774 raise RuntimeError( # pragma: no cover 

1775 "Output %r of node name %r already registered." 

1776 "" % (out, node.onnx_node.name)) 

1777 res[key] = (i + 1, None) 

1778 

1779 return res