Coverage for mlprodict/testing/test_utils/tests_helper.py: 98%

277 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1""" 

2@file 

3@brief Inspired from sklearn-onnx, handles two backends. 

4""" 

5import pickle 

6import os 

7import warnings 

8import traceback 

9import time 

10import sys 

11import numpy 

12import pandas 

13from sklearn.datasets import ( 

14 make_classification, make_multilabel_classification, 

15 make_regression) 

16from sklearn.model_selection import train_test_split 

17from sklearn.preprocessing import MultiLabelBinarizer 

18from .utils_backend import compare_backend 

19from .utils_backend_common import ( 

20 extract_options, evaluate_condition, is_backend_enabled, 

21 OnnxBackendMissingNewOnnxOperatorException) 

22 

23 

24def _has_predict_proba(model): 

25 if hasattr(model, "voting") and model.voting == "hard": 

26 return False 

27 return hasattr(model, "predict_proba") 

28 

29 

30def _has_decision_function(model): 

31 if hasattr(model, "voting"): 

32 return False 

33 return hasattr(model, "decision_function") 

34 

35 

36def _has_transform_model(model): 

37 if hasattr(model, "voting"): 

38 return False 

39 return hasattr(model, "fit_transform") and hasattr(model, "score") 

40 

41 

42def fit_classification_model(model, n_classes, is_int=False, 

43 pos_features=False, label_string=False, 

44 random_state=42, is_bool=False, 

45 n_features=20): 

46 """ 

47 Fits a classification model. 

48 """ 

49 X, y = make_classification(n_classes=n_classes, n_features=n_features, 

50 n_samples=500, 

51 random_state=random_state, 

52 n_informative=7) 

53 if label_string: 

54 y = numpy.array(['cl%d' % cl for cl in y]) 

55 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) 

56 if pos_features: 

57 X = numpy.abs(X) 

58 if is_bool: 

59 X = X.astype(bool) 

60 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

61 random_state=42) 

62 model.fit(X_train, y_train) 

63 return model, X_test 

64 

65 

66def fit_multilabel_classification_model(model, n_classes=5, n_labels=2, 

67 n_samples=400, n_features=20, 

68 is_int=False): 

69 """ 

70 Fits a classification model. 

71 """ 

72 X, y = make_multilabel_classification( 

73 n_classes=n_classes, n_labels=n_labels, n_features=n_features, 

74 n_samples=n_samples, random_state=42)[:2] 

75 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) 

76 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

77 random_state=42) 

78 model.fit(X_train, y_train) 

79 return model, X_test 

80 

81 

82def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False, 

83 factor=1., n_features=10, n_samples=500, 

84 n_informative=10): 

85 """ 

86 Fits a regression model. 

87 """ 

88 X, y = make_regression(n_features=n_features, n_samples=n_samples, 

89 n_targets=n_targets, random_state=42, 

90 n_informative=n_informative)[:2] 

91 y *= factor 

92 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) 

93 if is_bool: 

94 X = X.astype(bool) 

95 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

96 random_state=42) 

97 model.fit(X_train, y_train) 

98 return model, X_test 

99 

100 

101def fit_classification_model_simple(model, n_classes, is_int=False, 

102 pos_features=False): 

103 """ 

104 Fits a classification model. 

105 """ 

106 X, y = make_classification(n_classes=n_classes, n_features=10, 

107 n_samples=500, n_redundant=0, 

108 n_repeated=0, 

109 random_state=42, n_informative=9) 

110 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) 

111 if pos_features: 

112 X = numpy.abs(X) 

113 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

114 random_state=42) 

115 model.fit(X_train, y_train) 

116 return model, X_test 

117 

118 

119def _raw_score_binary_classification(model, X): 

120 scores = model.decision_function(X) 

121 if len(scores.shape) == 1: 

122 scores = scores.reshape(-1, 1) 

123 if len(scores.shape) != 2 or scores.shape[1] != 1: 

124 raise RuntimeError( # pragma: no cover 

125 f"Unexpected shape {scores.shape} for a binary classifiation") 

126 return numpy.hstack([-scores, scores]) 

127 

128 

129def _save_model_dump(model, folder, basename, names): 

130 if hasattr(model, "save"): # pragma: no cover 

131 dest = os.path.join(folder, basename + ".model.keras") 

132 names.append(dest) 

133 model.save(dest) 

134 else: 

135 dest = os.path.join(folder, basename + ".model.pkl") 

136 names.append(dest) 

137 with open(dest, "wb") as f: 

138 try: 

139 pickle.dump(model, f) 

140 except AttributeError as e: # pragma no cover 

141 print( 

142 f"[dump_data_and_model] cannot pickle model '{dest}' due to {e}.") 

143 

144 

145def dump_data_and_model( # pylint: disable=R0912 

146 data, model, onnx_model=None, basename="model", folder=None, 

147 inputs=None, backend=('python', 'onnxruntime'), 

148 context=None, allow_failure=None, methods=None, 

149 dump_error_log=None, benchmark=None, comparable_outputs=None, 

150 intermediate_steps=False, fail_evenif_notimplemented=False, 

151 verbose=False, classes=None, check_error=None, disable_optimisation=False): 

152 """ 

153 Saves data with pickle, saves the model with pickle and *onnx*, 

154 runs and saves the predictions for the given model. 

155 This function is used to test a backend (runtime) for *onnx*. 

156 

157 :param data: any kind of data 

158 :param model: any model 

159 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it 

160 only if the model accepts one float vector 

161 :param basename: three files are writen ``<basename>.data.pkl``, 

162 ``<basename>.model.pkl``, ``<basename>.model.onnx`` 

163 :param folder: files are written in this folder, 

164 it is created if it does not exist, if *folder* is None, 

165 it looks first in environment variable ``ONNXTESTDUMP``, 

166 otherwise, it is placed into ``'temp_dump'``. 

167 :param inputs: standard type or specific one if specified, only used is 

168 parameter *onnx* is None 

169 :param backend: backend used to compare expected output and runtime output. 

170 Two options are currently supported: None for no test, 

171 `'onnxruntime'` to use module :epkg:`onnxruntime`, 

172 ``python`` to use the python runtiume. 

173 :param context: used if the model contains a custom operator such 

174 as a custom Keras function... 

175 :param allow_failure: None to raise an exception if comparison fails 

176 for the backends, otherwise a string which is then evaluated to check 

177 whether or not the test can fail, example: 

178 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"`` 

179 :param dump_error_log: if True, dumps any error message in a file 

180 ``<basename>.err``, if it is None, it checks the environment 

181 variable ``ONNXTESTDUMPERROR`` 

182 :param benchmark: if True, runs a benchmark and stores the results 

183 into a file ``<basename>.bench``, if None, it checks the environment 

184 variable ``ONNXTESTBENCHMARK`` 

185 :param verbose: additional information 

186 :param methods: ONNX may produce one or several results, each of them 

187 is equivalent to the output of a method from the model class, 

188 this parameter defines which methods is equivalent to ONNX outputs. 

189 If not specified, it falls back into a default behaviour implemented 

190 for classifiers, regressors, clustering. 

191 :param comparable_outputs: compares only these outputs 

192 :param intermediate_steps: displays intermediate steps 

193 in case of an error 

194 :param fail_evenif_notimplemented: the test is considered as failing 

195 even if the error is due to onnxuntime missing the implementation 

196 of a new operator defiend in ONNX. 

197 :param classes: classes names 

198 (only for classifier, mandatory if option 'nocl' is used) 

199 :param check_error: do not raise an exception if the error message 

200 contains this text 

201 :param disable_optimisation: disable all optimisations *onnxruntime* 

202 could do 

203 :return: the created files 

204 

205 Some convention for the name, 

206 *Bin* for a binary classifier, *Mcl* for a multiclass 

207 classifier, *Reg* for a regressor, *MRg* for a multi-regressor. 

208 The name can contain some flags. Expected outputs refer to the 

209 outputs computed with the original library, computed outputs 

210 refer to the outputs computed with a ONNX runtime. 

211 

212 * ``-CannotLoad``: the model can be converted but the runtime 

213 cannot load it 

214 * ``-Dec3``: compares expected and computed outputs up to 

215 3 decimals (5 by default) 

216 * ``-Dec4``: compares expected and computed outputs up to 

217 4 decimals (5 by default) 

218 * ``-NoProb``: The original models computed probabilites for two classes 

219 *size=(N, 2)* but the runtime produces a vector of size *N*, the test 

220 will compare the second column to the column 

221 * ``-Out0``: only compares the first output on both sides 

222 * ``-Reshape``: merges all outputs into one single vector and resizes 

223 it before comparing 

224 * ``-SkipDim1``: before comparing expected and computed output, 

225 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)* 

226 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix 

227 

228 If the *backend* is not None, the function either raises an exception 

229 if the comparison between the expected outputs and the backend outputs 

230 fails or it saves the backend output and adds it to the results. 

231 """ 

232 # delayed import because too long 

233 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed 

234 

235 runtime_test = dict(model=model, data=data) 

236 

237 if folder is None: 

238 folder = os.environ.get("ONNXTESTDUMP", "temp_dump") 

239 if dump_error_log is None: 

240 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in ( 

241 "1", 1, "True", "true", True) 

242 if benchmark is None: 

243 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in ( 

244 "1", 1, "True", "true", True) 

245 if not os.path.exists(folder): 

246 os.makedirs(folder) 

247 

248 lambda_original = None 

249 if isinstance(data, (numpy.ndarray, pandas.DataFrame)): 

250 dataone = data[:1].copy() 

251 else: 

252 dataone = data 

253 

254 if methods is not None: 

255 prediction = [] 

256 for method in methods: 

257 if callable(method): 

258 call = lambda X, model=model: method( 

259 model, X) # pragma: no cover 

260 else: 

261 try: 

262 call = getattr(model, method) 

263 except AttributeError as e: # pragma no cover 

264 if method == 'decision_function_binary': 

265 call = ( 

266 lambda X, model=model: 

267 _raw_score_binary_classification(model, X)) 

268 else: 

269 raise e 

270 if callable(call): 

271 prediction.append(call(data)) 

272 # we only take the last one for benchmark 

273 lambda_original = lambda: call(dataone) 

274 else: 

275 raise RuntimeError( # pragma: no cover 

276 f"Method '{method}' is not callable.") 

277 else: 

278 if hasattr(model, "predict"): 

279 if _has_predict_proba(model): 

280 # Classifier 

281 prediction = [model.predict(data), model.predict_proba(data)] 

282 lambda_original = lambda: model.predict_proba(dataone) 

283 elif _has_decision_function(model): 

284 # Classifier without probabilities 

285 prediction = [model.predict(data), 

286 model.decision_function(data)] 

287 lambda_original = ( 

288 lambda: model.decision_function(dataone)) 

289 elif _has_transform_model(model): 

290 # clustering 

291 try: 

292 prediction = [model.predict(data), model.transform(data)] 

293 lambda_original = lambda: model.transform(dataone) 

294 except ValueError: 

295 # 0.23 enforced type checking. 

296 d64 = data.astype(numpy.float64) 

297 prediction = [model.predict(d64), model.transform(d64)] 

298 dataone64 = dataone.astype(numpy.float64) 

299 lambda_original = lambda: model.transform(dataone64) 

300 else: 

301 # Regressor or VotingClassifier 

302 prediction = [model.predict(data)] 

303 lambda_original = lambda: model.predict(dataone) 

304 

305 elif hasattr(model, "transform"): 

306 options = extract_options(basename) 

307 SklCol = options.get("SklCol", False) 

308 if SklCol: 

309 prediction = model.transform(data.ravel()) # pragma: no cover 

310 lambda_original = lambda: model.transform( 

311 dataone.ravel()) # pragma: no cover 

312 else: 

313 prediction = model.transform(data) 

314 lambda_original = lambda: model.transform(dataone) 

315 else: 

316 raise TypeError( # pragma: no cover 

317 f"Model has no predict or transform method: {type(model)}") 

318 

319 runtime_test["expected"] = prediction 

320 

321 names = [] 

322 dest = os.path.join(folder, basename + ".expected.pkl") 

323 names.append(dest) 

324 with open(dest, "wb") as f: 

325 pickle.dump(prediction, f) 

326 

327 dest = os.path.join(folder, basename + ".data.pkl") 

328 names.append(dest) 

329 with open(dest, "wb") as f: 

330 pickle.dump(data, f) 

331 

332 _save_model_dump(model, folder, basename, names) 

333 

334 if dump_error_log: # pragma: no cover 

335 error_dump = os.path.join(folder, basename + ".err") 

336 

337 if onnx_model is None: # pragma: no cover 

338 array = numpy.array(data) 

339 if inputs is None: 

340 if array.dtype == numpy.float64: 

341 inputs = [("input", DoubleTensorType(list(array.shape)))] 

342 else: 

343 inputs = [("input", FloatTensorType(list(array.shape)))] 

344 onnx_model, _ = convert_model(model, basename, inputs) 

345 

346 dest = os.path.join(folder, basename + ".model.onnx") 

347 names.append(dest) 

348 with open(dest, "wb") as f: 

349 f.write(onnx_model.SerializeToString()) 

350 if verbose: # pragma: no cover 

351 print(f"[dump_data_and_model] created '{dest}'.") 

352 

353 runtime_test["onnx"] = dest 

354 

355 # backend 

356 if backend is not None: 

357 if isinstance(backend, tuple): 

358 backend = list(backend) 

359 if not isinstance(backend, list): 

360 backend = [backend] 

361 for b in backend: 

362 if not is_backend_enabled(b): 

363 continue # pragma: no cover 

364 if isinstance(allow_failure, str): 

365 allow = evaluate_condition( 

366 b, allow_failure) # pragma: no cover 

367 else: 

368 allow = allow_failure 

369 if allow is None and not check_error: 

370 output, lambda_onnx = compare_backend( 

371 b, runtime_test, options=extract_options(basename), 

372 context=context, verbose=verbose, 

373 comparable_outputs=comparable_outputs, 

374 intermediate_steps=intermediate_steps, 

375 disable_optimisation=disable_optimisation, 

376 classes=classes) 

377 elif check_error: 

378 try: 

379 output, lambda_onnx = compare_backend( 

380 b, runtime_test, options=extract_options(basename), 

381 context=context, verbose=verbose, 

382 comparable_outputs=comparable_outputs, 

383 intermediate_steps=intermediate_steps, 

384 disable_optimisation=disable_optimisation, 

385 classes=classes) 

386 except Exception as e: # pragma: no cover 

387 if check_error in str(e): 

388 warnings.warn(str(e)) 

389 continue 

390 raise e 

391 else: 

392 try: 

393 output, lambda_onnx = compare_backend( 

394 b, runtime_test, 

395 options=extract_options(basename), 

396 context=context, verbose=verbose, 

397 comparable_outputs=comparable_outputs, 

398 intermediate_steps=intermediate_steps, 

399 classes=classes) 

400 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover 

401 if fail_evenif_notimplemented: 

402 raise e 

403 warnings.warn(str(e)) 

404 continue 

405 except AssertionError as e: # pragma no cover 

406 if dump_error_log: 

407 with open(error_dump, "w", encoding="utf-8") as f: 

408 f.write(str(e) + "\n--------------\n") 

409 traceback.print_exc(file=f) 

410 if isinstance(allow, bool) and allow: 

411 warnings.warn("Issue with '{0}' due to {1}".format( 

412 basename, 

413 str(e).replace("\n", " -- "))) 

414 continue 

415 raise e 

416 

417 if output is not None: 

418 dest = os.path.join(folder, 

419 basename + f".backend.{b}.pkl") 

420 names.append(dest) 

421 with open(dest, "wb") as f: 

422 pickle.dump(output, f) 

423 if (benchmark and lambda_onnx is not None and 

424 lambda_original is not None): 

425 # run a benchmark 

426 obs = compute_benchmark({ 

427 "onnxrt": lambda_onnx, 

428 "original": lambda_original 

429 }) 

430 df = pandas.DataFrame(obs) 

431 df["input_size"] = sys.getsizeof(dataone) 

432 dest = os.path.join(folder, basename + ".bench") 

433 df.to_csv(dest, index=False) 

434 

435 return names 

436 

437 

438def convert_model(model, name, input_types): 

439 """ 

440 Runs the appropriate conversion method. 

441 

442 :param model: model, *scikit-learn*, *keras*, 

443 or *coremltools* object 

444 :param name: model name 

445 :param input_types: input types 

446 :return: *onnx* model 

447 """ 

448 from skl2onnx import convert_sklearn # delayed 

449 

450 model, prefix = convert_sklearn(model, name, input_types), "Sklearn" 

451 if model is None: # pragma: no cover 

452 raise RuntimeError(f"Unable to convert model of type '{type(model)}'.") 

453 return model, prefix 

454 

455 

456def dump_one_class_classification( 

457 model, suffix="", folder=None, allow_failure=None, 

458 comparable_outputs=None, verbose=False, benchmark=False, 

459 methods=None): 

460 """ 

461 Trains and dumps a model for a One Class outlier problem. 

462 The function trains a model and calls 

463 :func:`dump_data_and_model`. 

464 

465 Every created filename will follow the pattern: 

466 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

467 """ 

468 from skl2onnx.common.data_types import FloatTensorType # delayed 

469 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]] 

470 X = numpy.array(X, dtype=numpy.float32) 

471 y = [1, 1, 1] 

472 model.fit(X, y) 

473 model_onnx, prefix = convert_model(model, "one_class", 

474 [("input", FloatTensorType([None, 2]))]) 

475 dump_data_and_model( 

476 X, model, model_onnx, folder=folder, 

477 allow_failure=allow_failure, 

478 basename=prefix + "One" + model.__class__.__name__ + suffix, 

479 verbose=verbose, comparable_outputs=comparable_outputs, 

480 benchmark=benchmark, methods=methods) 

481 

482 

483def dump_binary_classification( 

484 model, suffix="", folder=None, allow_failure=None, 

485 comparable_outputs=None, verbose=False, label_string=False, 

486 benchmark=False, methods=None, nrows=None): 

487 """ 

488 Trains and dumps a model for a binary classification problem. 

489 The function trains a model and calls 

490 :func:`dump_data_and_model`. 

491 

492 Every created filename will follow the pattern: 

493 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

494 """ 

495 from skl2onnx.common.data_types import FloatTensorType # delayed 

496 X = [[0, 1], [1, 1], [2, 0]] 

497 X = numpy.array(X, dtype=numpy.float32) 

498 if label_string: 

499 y = ["A", "B", "A"] 

500 else: 

501 y = numpy.array([0, 1, 0], numpy.int64) 

502 model.fit(X, y) 

503 model_onnx, prefix = convert_model(model, "binary classifier", 

504 [("input", FloatTensorType([None, 2]))]) 

505 if nrows == 2: 

506 for nr in range(X.shape[0] - 1): 

507 dump_data_and_model( 

508 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure, 

509 basename=prefix + "Bin" + model.__class__.__name__ + suffix, 

510 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods) 

511 else: 

512 dump_data_and_model( 

513 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

514 basename=prefix + "Bin" + model.__class__.__name__ + suffix, 

515 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods) 

516 

517 X, y = make_classification(10, n_features=4, random_state=42) 

518 X = X[:, :2] 

519 model.fit(X, y) 

520 model_onnx, prefix = convert_model(model, "binary classifier", 

521 [("input", FloatTensorType([None, 2]))]) 

522 xt = X.astype(numpy.float32) 

523 if nrows is not None: 

524 xt = xt[:nrows] 

525 dump_data_and_model( 

526 xt, model, model_onnx, 

527 allow_failure=allow_failure, folder=folder, 

528 basename=prefix + "RndBin" + model.__class__.__name__ + suffix, 

529 verbose=verbose, comparable_outputs=comparable_outputs, 

530 benchmark=benchmark, methods=methods) 

531 

532 

533def dump_multiple_classification( 

534 model, suffix="", folder=None, allow_failure=None, verbose=False, 

535 label_string=False, first_class=0, comparable_outputs=None, 

536 benchmark=False, methods=None): 

537 """ 

538 Trains and dumps a model for a binary classification problem. 

539 The function trains a model and calls 

540 :func:`dump_data_and_model`. 

541 

542 Every created filename will follow the pattern: 

543 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

544 """ 

545 from skl2onnx.common.data_types import FloatTensorType # delayed 

546 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] 

547 X = numpy.array(X, dtype=numpy.float32) 

548 y = [0, 1, 2, 1, 1, 2] 

549 y = [i + first_class for i in y] 

550 if label_string: 

551 y = ["l%d" % i for i in y] 

552 model.fit(X, y) 

553 if verbose: # pragma: no cover 

554 print( 

555 f"[dump_multiple_classification] model '{model.__class__.__name__}'") 

556 model_onnx, prefix = convert_model(model, "multi-class classifier", 

557 [("input", FloatTensorType([None, 2]))]) 

558 if verbose: # pragma: no cover 

559 print("[dump_multiple_classification] model was converted") 

560 dump_data_and_model( 

561 X.astype(numpy.float32), model, model_onnx, folder=folder, 

562 allow_failure=allow_failure, 

563 basename=prefix + "Mcl" + model.__class__.__name__ + suffix, 

564 verbose=verbose, comparable_outputs=comparable_outputs, 

565 methods=methods) 

566 

567 X, y = make_classification(40, n_features=4, random_state=42, 

568 n_classes=3, n_clusters_per_class=1) 

569 X = X[:, :2] 

570 model.fit(X, y) 

571 if verbose: # pragma: no cover 

572 print( 

573 f"[dump_multiple_classification] model '{model.__class__.__name__}'") 

574 model_onnx, prefix = convert_model(model, "multi-class classifier", 

575 [("input", FloatTensorType([None, 2]))]) 

576 if verbose: # pragma: no cover 

577 print("[dump_multiple_classification] model was converted") 

578 dump_data_and_model( 

579 X[:10].astype(numpy.float32), model, model_onnx, folder=folder, 

580 allow_failure=allow_failure, 

581 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix, 

582 verbose=verbose, comparable_outputs=comparable_outputs, 

583 benchmark=benchmark, methods=methods) 

584 

585 

586def dump_multilabel_classification( 

587 model, suffix="", folder=None, allow_failure=None, verbose=False, 

588 label_string=False, first_class=0, comparable_outputs=None, 

589 benchmark=False, backend=('python', 'onnxruntime')): 

590 """ 

591 Trains and dumps a model for a binary classification problem. 

592 The function trains a model and calls 

593 :func:`dump_data_and_model`. 

594 

595 Every created filename will follow the pattern: 

596 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

597 """ 

598 from skl2onnx.common.data_types import FloatTensorType # delayed 

599 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] 

600 X = numpy.array(X, dtype=numpy.float32) 

601 if label_string: 

602 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]] 

603 else: 

604 y = [[0 + first_class], [1 + first_class], [2 + first_class], 

605 [0 + first_class, 1 + first_class], 

606 [1 + first_class], [2 + first_class]] 

607 y = MultiLabelBinarizer().fit_transform(y) 

608 model.fit(X, y) 

609 if verbose: # pragma: no cover 

610 print( 

611 f"[make_multilabel_classification] model '{model.__class__.__name__}'") 

612 model_onnx, prefix = convert_model(model, "multi-label-classifier", 

613 [("input", FloatTensorType([None, 2]))]) 

614 if verbose: # pragma: no cover 

615 print("[make_multilabel_classification] model was converted") 

616 dump_data_and_model( 

617 X.astype(numpy.float32), model, model_onnx, folder=folder, 

618 allow_failure=allow_failure, 

619 basename=prefix + "Mcl" + model.__class__.__name__ + suffix, 

620 verbose=verbose, comparable_outputs=comparable_outputs, 

621 backend=backend) 

622 

623 X, y = make_multilabel_classification( # pylint: disable=W0632 

624 40, n_features=4, random_state=42, n_classes=3) 

625 X = X[:, :2] 

626 model.fit(X, y) 

627 if verbose: # pragma: no cover 

628 print( 

629 f"[make_multilabel_classification] model '{model.__class__.__name__}'") 

630 model_onnx, prefix = convert_model(model, "multi-class classifier", 

631 [("input", FloatTensorType([None, 2]))]) 

632 if verbose: # pragma: no cover 

633 print("[make_multilabel_classification] model was converted") 

634 dump_data_and_model( 

635 X[:10].astype(numpy.float32), model, model_onnx, folder=folder, 

636 allow_failure=allow_failure, 

637 basename=prefix + "RndMla" + model.__class__.__name__ + suffix, 

638 verbose=verbose, comparable_outputs=comparable_outputs, 

639 benchmark=benchmark, backend=backend) 

640 

641 

642def dump_multiple_regression( 

643 model, suffix="", folder=None, allow_failure=None, 

644 comparable_outputs=None, verbose=False, benchmark=False): 

645 """ 

646 Trains and dumps a model for a multi regression problem. 

647 The function trains a model and calls 

648 :func:`dump_data_and_model`. 

649 

650 Every created filename will follow the pattern: 

651 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

652 """ 

653 from skl2onnx.common.data_types import FloatTensorType # delayed 

654 X = [[0, 1], [1, 1], [2, 0]] 

655 X = numpy.array(X, dtype=numpy.float32) 

656 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32) 

657 model.fit(X, y) 

658 model_onnx, prefix = convert_model(model, "multi-regressor", 

659 [("input", FloatTensorType([None, 2]))]) 

660 dump_data_and_model( 

661 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

662 basename=prefix + "MRg" + model.__class__.__name__ + suffix, 

663 verbose=verbose, comparable_outputs=comparable_outputs, 

664 benchmark=benchmark) 

665 

666 

667def dump_single_regression(model, suffix="", folder=None, allow_failure=None, 

668 comparable_outputs=None, benchmark=False): 

669 """ 

670 Trains and dumps a model for a regression problem. 

671 The function trains a model and calls 

672 :func:`dump_data_and_model`. 

673 

674 Every created filename will follow the pattern: 

675 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

676 """ 

677 from skl2onnx.common.data_types import FloatTensorType # delayed 

678 X = [[0, 1], [1, 1], [2, 0]] 

679 X = numpy.array(X, dtype=numpy.float32) 

680 y = numpy.array([100, -10, 50], dtype=numpy.float32) 

681 model.fit(X, y) 

682 model_onnx, prefix = convert_model(model, "single regressor", 

683 [("input", FloatTensorType([None, 2]))]) 

684 dump_data_and_model( 

685 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

686 basename=prefix + "Reg" + model.__class__.__name__ + suffix, 

687 comparable_outputs=comparable_outputs) 

688 

689 

690def timeit_repeat(fct, number, repeat): 

691 """ 

692 Returns a series of *repeat* time measures for 

693 *number* executions of *code* assuming *fct* 

694 is a function. 

695 """ 

696 res = [] 

697 for _ in range(0, repeat): 

698 t1 = time.perf_counter() 

699 for __ in range(0, number): 

700 fct() 

701 t2 = time.perf_counter() 

702 res.append(t2 - t1) 

703 return res 

704 

705 

706def timeexec(fct, number, repeat): 

707 """ 

708 Measures the time for a given expression. 

709 

710 :param fct: function to measure (as a string) 

711 :param number: number of time to run the expression 

712 (and then divide by this number to get an average) 

713 :param repeat: number of times to repeat the computation 

714 of the above average 

715 :return: dictionary 

716 """ 

717 rep = timeit_repeat(fct, number=number, repeat=repeat) 

718 ave = sum(rep) / (number * repeat) 

719 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5 

720 fir = rep[0] / number 

721 fir3 = sum(rep[:3]) / (3 * number) 

722 las3 = sum(rep[-3:]) / (3 * number) 

723 rep.sort() 

724 mini = rep[len(rep) // 20] / number 

725 maxi = rep[-len(rep) // 20] / number 

726 return dict(average=ave, deviation=std, first=fir, first3=fir3, 

727 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number) 

728 

729 

730def compute_benchmark(fcts, number=10, repeat=100): 

731 """ 

732 Compares the processing time several functions. 

733 

734 :param fcts: dictionary ``{'name': fct}`` 

735 :param number: number of time to run the expression 

736 (and then divide by this number to get an average) 

737 :param repeat: number of times to repeat the computation 

738 of the above average 

739 :return: list of [{'name': name, 'time': ...}] 

740 """ 

741 obs = [] 

742 for name, fct in fcts.items(): 

743 res = timeexec(fct, number=number, repeat=repeat) 

744 res["name"] = name 

745 obs.append(res) 

746 return obs 

747 

748 

749def binary_array_to_string(mat): 

750 """ 

751 Used to compare decision path. 

752 """ 

753 if not isinstance(mat, numpy.ndarray): 

754 raise NotImplementedError( # pragma: no cover 

755 "Not implemented for other types than arrays.") 

756 if len(mat.shape) != 2: 

757 raise NotImplementedError( # pragma: no cover 

758 "Not implemented for other arrays than matrices.") 

759 res = [[str(i) for i in row] for row in mat.tolist()] 

760 return [''.join(row) for row in res]