Coverage for mlprodict/testing/test_utils/tests

1"""

2@file

3@brief Inspired from sklearn-onnx, handles two backends.

4"""

5import pickle

6import os

7import warnings

8import traceback

9import time

10import sys

11import numpy

12import pandas

13from sklearn.datasets import (

14 make_classification, make_multilabel_classification,

15 make_regression)

16from sklearn.model_selection import train_test_split

17from sklearn.preprocessing import MultiLabelBinarizer

18from .utils_backend import compare_backend

19from .utils_backend_common import (

20 extract_options, evaluate_condition, is_backend_enabled,

21 OnnxBackendMissingNewOnnxOperatorException)

24def _has_predict_proba(model):

25 if hasattr(model, "voting") and model.voting == "hard":

26 return False

27 return hasattr(model, "predict_proba")

30def _has_decision_function(model):

31 if hasattr(model, "voting"):

32 return False

33 return hasattr(model, "decision_function")

36def _has_transform_model(model):

37 if hasattr(model, "voting"):

38 return False

39 return hasattr(model, "fit_transform") and hasattr(model, "score")

42def fit_classification_model(model, n_classes, is_int=False,

43 pos_features=False, label_string=False,

44 random_state=42, is_bool=False,

45 n_features=20):

46 """

47 Fits a classification model.

48 """

49 X, y = make_classification(n_classes=n_classes, n_features=n_features,

50 n_samples=500,

51 random_state=random_state,

52 n_informative=7)

53 if label_string:

54 y = numpy.array(['cl%d' % cl for cl in y])

55 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)

56 if pos_features:

57 X = numpy.abs(X)

58 if is_bool:

59 X = X.astype(bool)

60 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

61 random_state=42)

62 model.fit(X_train, y_train)

63 return model, X_test

66def fit_multilabel_classification_model(model, n_classes=5, n_labels=2,

67 n_samples=400, n_features=20,

68 is_int=False):

69 """

70 Fits a classification model.

71 """

72 X, y = make_multilabel_classification(

73 n_classes=n_classes, n_labels=n_labels, n_features=n_features,

74 n_samples=n_samples, random_state=42)[:2]

75 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)

76 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

77 random_state=42)

78 model.fit(X_train, y_train)

79 return model, X_test

82def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False,

83 factor=1., n_features=10, n_samples=500,

84 n_informative=10):

85 """

86 Fits a regression model.

87 """

88 X, y = make_regression(n_features=n_features, n_samples=n_samples,

89 n_targets=n_targets, random_state=42,

90 n_informative=n_informative)[:2]

91 y *= factor

92 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)

93 if is_bool:

94 X = X.astype(bool)

95 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

96 random_state=42)

97 model.fit(X_train, y_train)

98 return model, X_test

100

101def fit_classification_model_simple(model, n_classes, is_int=False,

102 pos_features=False):

103 """

104 Fits a classification model.

105 """

106 X, y = make_classification(n_classes=n_classes, n_features=10,

107 n_samples=500, n_redundant=0,

108 n_repeated=0,

109 random_state=42, n_informative=9)

110 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)

111 if pos_features:

112 X = numpy.abs(X)

113 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

114 random_state=42)

115 model.fit(X_train, y_train)

116 return model, X_test

117

118

119def _raw_score_binary_classification(model, X):

120 scores = model.decision_function(X)

121 if len(scores.shape) == 1:

122 scores = scores.reshape(-1, 1)

123 if len(scores.shape) != 2 or scores.shape[1] != 1:

124 raise RuntimeError( # pragma: no cover

125 f"Unexpected shape {scores.shape} for a binary classifiation")

126 return numpy.hstack([-scores, scores])

127

128

129def _save_model_dump(model, folder, basename, names):

130 if hasattr(model, "save"): # pragma: no cover

131 dest = os.path.join(folder, basename + ".model.keras")

132 names.append(dest)

133 model.save(dest)

134 else:

135 dest = os.path.join(folder, basename + ".model.pkl")

136 names.append(dest)

137 with open(dest, "wb") as f:

138 try:

139 pickle.dump(model, f)

140 except AttributeError as e: # pragma no cover

141 print(

142 f"[dump_data_and_model] cannot pickle model '{dest}' due to {e}.")

143

144

145def dump_data_and_model( # pylint: disable=R0912

146 data, model, onnx_model=None, basename="model", folder=None,

147 inputs=None, backend=('python', 'onnxruntime'),

148 context=None, allow_failure=None, methods=None,

149 dump_error_log=None, benchmark=None, comparable_outputs=None,

150 intermediate_steps=False, fail_evenif_notimplemented=False,

151 verbose=False, classes=None, check_error=None, disable_optimisation=False):

152 """

153 Saves data with pickle, saves the model with pickle and *onnx*,

154 runs and saves the predictions for the given model.

155 This function is used to test a backend (runtime) for *onnx*.

156

157 :param data: any kind of data

158 :param model: any model

159 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it

160 only if the model accepts one float vector

161 :param basename: three files are writen ``<basename>.data.pkl``,

162 ``<basename>.model.pkl``, ``<basename>.model.onnx``

163 :param folder: files are written in this folder,

164 it is created if it does not exist, if *folder* is None,

165 it looks first in environment variable ``ONNXTESTDUMP``,

166 otherwise, it is placed into ``'temp_dump'``.

167 :param inputs: standard type or specific one if specified, only used is

168 parameter *onnx* is None

169 :param backend: backend used to compare expected output and runtime output.

170 Two options are currently supported: None for no test,

171 `'onnxruntime'` to use module :epkg:`onnxruntime`,

172 ``python`` to use the python runtiume.

173 :param context: used if the model contains a custom operator such

174 as a custom Keras function...

175 :param allow_failure: None to raise an exception if comparison fails

176 for the backends, otherwise a string which is then evaluated to check

177 whether or not the test can fail, example:

178 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"``

179 :param dump_error_log: if True, dumps any error message in a file

180 ``<basename>.err``, if it is None, it checks the environment

181 variable ``ONNXTESTDUMPERROR``

182 :param benchmark: if True, runs a benchmark and stores the results

183 into a file ``<basename>.bench``, if None, it checks the environment

184 variable ``ONNXTESTBENCHMARK``

185 :param verbose: additional information

186 :param methods: ONNX may produce one or several results, each of them

187 is equivalent to the output of a method from the model class,

188 this parameter defines which methods is equivalent to ONNX outputs.

189 If not specified, it falls back into a default behaviour implemented

190 for classifiers, regressors, clustering.

191 :param comparable_outputs: compares only these outputs

192 :param intermediate_steps: displays intermediate steps

193 in case of an error

194 :param fail_evenif_notimplemented: the test is considered as failing

195 even if the error is due to onnxuntime missing the implementation

196 of a new operator defiend in ONNX.

197 :param classes: classes names

198 (only for classifier, mandatory if option 'nocl' is used)

199 :param check_error: do not raise an exception if the error message

200 contains this text

201 :param disable_optimisation: disable all optimisations *onnxruntime*

202 could do

203 :return: the created files

204

205 Some convention for the name,

206 *Bin* for a binary classifier, *Mcl* for a multiclass

207 classifier, *Reg* for a regressor, *MRg* for a multi-regressor.

208 The name can contain some flags. Expected outputs refer to the

209 outputs computed with the original library, computed outputs

210 refer to the outputs computed with a ONNX runtime.

211

212 * ``-CannotLoad``: the model can be converted but the runtime

213 cannot load it

214 * ``-Dec3``: compares expected and computed outputs up to

215 3 decimals (5 by default)

216 * ``-Dec4``: compares expected and computed outputs up to

217 4 decimals (5 by default)

218 * ``-NoProb``: The original models computed probabilites for two classes

219 *size=(N, 2)* but the runtime produces a vector of size *N*, the test

220 will compare the second column to the column

221 * ``-Out0``: only compares the first output on both sides

222 * ``-Reshape``: merges all outputs into one single vector and resizes

223 it before comparing

224 * ``-SkipDim1``: before comparing expected and computed output,

225 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)*

226 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix

227

228 If the *backend* is not None, the function either raises an exception

229 if the comparison between the expected outputs and the backend outputs

230 fails or it saves the backend output and adds it to the results.

231 """

232 # delayed import because too long

233 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed

234

235 runtime_test = dict(model=model, data=data)

236

237 if folder is None:

238 folder = os.environ.get("ONNXTESTDUMP", "temp_dump")

239 if dump_error_log is None:

240 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in (

241 "1", 1, "True", "true", True)

242 if benchmark is None:

243 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in (

244 "1", 1, "True", "true", True)

245 if not os.path.exists(folder):

246 os.makedirs(folder)

247

248 lambda_original = None

249 if isinstance(data, (numpy.ndarray, pandas.DataFrame)):

250 dataone = data[:1].copy()

251 else:

252 dataone = data

253

254 if methods is not None:

255 prediction = []

256 for method in methods:

257 if callable(method):

258 call = lambda X, model=model: method(

259 model, X) # pragma: no cover

260 else:

261 try:

262 call = getattr(model, method)

263 except AttributeError as e: # pragma no cover

264 if method == 'decision_function_binary':

265 call = (

266 lambda X, model=model:

267 _raw_score_binary_classification(model, X))

268 else:

269 raise e

270 if callable(call):

271 prediction.append(call(data))

272 # we only take the last one for benchmark

273 lambda_original = lambda: call(dataone)

274 else:

275 raise RuntimeError( # pragma: no cover

276 f"Method '{method}' is not callable.")

277 else:

278 if hasattr(model, "predict"):

279 if _has_predict_proba(model):

280 # Classifier

281 prediction = [model.predict(data), model.predict_proba(data)]

282 lambda_original = lambda: model.predict_proba(dataone)

283 elif _has_decision_function(model):

284 # Classifier without probabilities

285 prediction = [model.predict(data),

286 model.decision_function(data)]

287 lambda_original = (

288 lambda: model.decision_function(dataone))

289 elif _has_transform_model(model):

290 # clustering

291 try:

292 prediction = [model.predict(data), model.transform(data)]

293 lambda_original = lambda: model.transform(dataone)

294 except ValueError:

295 # 0.23 enforced type checking.

296 d64 = data.astype(numpy.float64)

297 prediction = [model.predict(d64), model.transform(d64)]

298 dataone64 = dataone.astype(numpy.float64)

299 lambda_original = lambda: model.transform(dataone64)

300 else:

301 # Regressor or VotingClassifier

302 prediction = [model.predict(data)]

303 lambda_original = lambda: model.predict(dataone)

304

305 elif hasattr(model, "transform"):

306 options = extract_options(basename)

307 SklCol = options.get("SklCol", False)

308 if SklCol:

309 prediction = model.transform(data.ravel()) # pragma: no cover

310 lambda_original = lambda: model.transform(

311 dataone.ravel()) # pragma: no cover

312 else:

313 prediction = model.transform(data)

314 lambda_original = lambda: model.transform(dataone)

315 else:

316 raise TypeError( # pragma: no cover

317 f"Model has no predict or transform method: {type(model)}")

318

319 runtime_test["expected"] = prediction

320

321 names = []

322 dest = os.path.join(folder, basename + ".expected.pkl")

323 names.append(dest)

324 with open(dest, "wb") as f:

325 pickle.dump(prediction, f)

326

327 dest = os.path.join(folder, basename + ".data.pkl")

328 names.append(dest)

329 with open(dest, "wb") as f:

330 pickle.dump(data, f)

331

332 _save_model_dump(model, folder, basename, names)

333

334 if dump_error_log: # pragma: no cover

335 error_dump = os.path.join(folder, basename + ".err")

336

337 if onnx_model is None: # pragma: no cover

338 array = numpy.array(data)

339 if inputs is None:

340 if array.dtype == numpy.float64:

341 inputs = [("input", DoubleTensorType(list(array.shape)))]

342 else:

343 inputs = [("input", FloatTensorType(list(array.shape)))]

344 onnx_model, _ = convert_model(model, basename, inputs)

345

346 dest = os.path.join(folder, basename + ".model.onnx")

347 names.append(dest)

348 with open(dest, "wb") as f:

349 f.write(onnx_model.SerializeToString())

350 if verbose: # pragma: no cover

351 print(f"[dump_data_and_model] created '{dest}'.")

352

353 runtime_test["onnx"] = dest

354

355 # backend

356 if backend is not None:

357 if isinstance(backend, tuple):

358 backend = list(backend)

359 if not isinstance(backend, list):

360 backend = [backend]

361 for b in backend:

362 if not is_backend_enabled(b):

363 continue # pragma: no cover

364 if isinstance(allow_failure, str):

365 allow = evaluate_condition(

366 b, allow_failure) # pragma: no cover

367 else:

368 allow = allow_failure

369 if allow is None and not check_error:

370 output, lambda_onnx = compare_backend(

371 b, runtime_test, options=extract_options(basename),

372 context=context, verbose=verbose,

373 comparable_outputs=comparable_outputs,

374 intermediate_steps=intermediate_steps,

375 disable_optimisation=disable_optimisation,

376 classes=classes)

377 elif check_error:

378 try:

379 output, lambda_onnx = compare_backend(

380 b, runtime_test, options=extract_options(basename),

381 context=context, verbose=verbose,

382 comparable_outputs=comparable_outputs,

383 intermediate_steps=intermediate_steps,

384 disable_optimisation=disable_optimisation,

385 classes=classes)

386 except Exception as e: # pragma: no cover

387 if check_error in str(e):

388 warnings.warn(str(e))

389 continue

390 raise e

391 else:

392 try:

393 output, lambda_onnx = compare_backend(

394 b, runtime_test,

395 options=extract_options(basename),

396 context=context, verbose=verbose,

397 comparable_outputs=comparable_outputs,

398 intermediate_steps=intermediate_steps,

399 classes=classes)

400 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover

401 if fail_evenif_notimplemented:

402 raise e

403 warnings.warn(str(e))

404 continue

405 except AssertionError as e: # pragma no cover

406 if dump_error_log:

407 with open(error_dump, "w", encoding="utf-8") as f:

408 f.write(str(e) + "\n--------------\n")

409 traceback.print_exc(file=f)

410 if isinstance(allow, bool) and allow:

411 warnings.warn("Issue with '{0}' due to {1}".format(

412 basename,

413 str(e).replace("\n", " -- ")))

414 continue

415 raise e

416

417 if output is not None:

418 dest = os.path.join(folder,

419 basename + f".backend.{b}.pkl")

420 names.append(dest)

421 with open(dest, "wb") as f:

422 pickle.dump(output, f)

423 if (benchmark and lambda_onnx is not None and

424 lambda_original is not None):

425 # run a benchmark

426 obs = compute_benchmark({

427 "onnxrt": lambda_onnx,

428 "original": lambda_original

429 })

430 df = pandas.DataFrame(obs)

431 df["input_size"] = sys.getsizeof(dataone)

432 dest = os.path.join(folder, basename + ".bench")

433 df.to_csv(dest, index=False)

434

435 return names

436

437

438def convert_model(model, name, input_types):

439 """

440 Runs the appropriate conversion method.

441

442 :param model: model, *scikit-learn*, *keras*,

443 or *coremltools* object

444 :param name: model name

445 :param input_types: input types

446 :return: *onnx* model

447 """

448 from skl2onnx import convert_sklearn # delayed

449

450 model, prefix = convert_sklearn(model, name, input_types), "Sklearn"

451 if model is None: # pragma: no cover

452 raise RuntimeError(f"Unable to convert model of type '{type(model)}'.")

453 return model, prefix

454

455

456def dump_one_class_classification(

457 model, suffix="", folder=None, allow_failure=None,

458 comparable_outputs=None, verbose=False, benchmark=False,

459 methods=None):

460 """

461 Trains and dumps a model for a One Class outlier problem.

462 The function trains a model and calls

463 :func:`dump_data_and_model`.

464

465 Every created filename will follow the pattern:

466 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

467 """

468 from skl2onnx.common.data_types import FloatTensorType # delayed

469 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]]

470 X = numpy.array(X, dtype=numpy.float32)

471 y = [1, 1, 1]

472 model.fit(X, y)

473 model_onnx, prefix = convert_model(model, "one_class",

474 [("input", FloatTensorType([None, 2]))])

475 dump_data_and_model(

476 X, model, model_onnx, folder=folder,

477 allow_failure=allow_failure,

478 basename=prefix + "One" + model.__class__.__name__ + suffix,

479 verbose=verbose, comparable_outputs=comparable_outputs,

480 benchmark=benchmark, methods=methods)

481

482

483def dump_binary_classification(

484 model, suffix="", folder=None, allow_failure=None,

485 comparable_outputs=None, verbose=False, label_string=False,

486 benchmark=False, methods=None, nrows=None):

487 """

488 Trains and dumps a model for a binary classification problem.

489 The function trains a model and calls

490 :func:`dump_data_and_model`.

491

492 Every created filename will follow the pattern:

493 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

494 """

495 from skl2onnx.common.data_types import FloatTensorType # delayed

496 X = [[0, 1], [1, 1], [2, 0]]

497 X = numpy.array(X, dtype=numpy.float32)

498 if label_string:

499 y = ["A", "B", "A"]

500 else:

501 y = numpy.array([0, 1, 0], numpy.int64)

502 model.fit(X, y)

503 model_onnx, prefix = convert_model(model, "binary classifier",

504 [("input", FloatTensorType([None, 2]))])

505 if nrows == 2:

506 for nr in range(X.shape[0] - 1):

507 dump_data_and_model(

508 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure,

509 basename=prefix + "Bin" + model.__class__.__name__ + suffix,

510 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)

511 else:

512 dump_data_and_model(

513 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

514 basename=prefix + "Bin" + model.__class__.__name__ + suffix,

515 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)

516

517 X, y = make_classification(10, n_features=4, random_state=42)

518 X = X[:, :2]

519 model.fit(X, y)

520 model_onnx, prefix = convert_model(model, "binary classifier",

521 [("input", FloatTensorType([None, 2]))])

522 xt = X.astype(numpy.float32)

523 if nrows is not None:

524 xt = xt[:nrows]

525 dump_data_and_model(

526 xt, model, model_onnx,

527 allow_failure=allow_failure, folder=folder,

528 basename=prefix + "RndBin" + model.__class__.__name__ + suffix,

529 verbose=verbose, comparable_outputs=comparable_outputs,

530 benchmark=benchmark, methods=methods)

531

532

533def dump_multiple_classification(

534 model, suffix="", folder=None, allow_failure=None, verbose=False,

535 label_string=False, first_class=0, comparable_outputs=None,

536 benchmark=False, methods=None):

537 """

538 Trains and dumps a model for a binary classification problem.

539 The function trains a model and calls

540 :func:`dump_data_and_model`.

541

542 Every created filename will follow the pattern:

543 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

544 """

545 from skl2onnx.common.data_types import FloatTensorType # delayed

546 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]

547 X = numpy.array(X, dtype=numpy.float32)

548 y = [0, 1, 2, 1, 1, 2]

549 y = [i + first_class for i in y]

550 if label_string:

551 y = ["l%d" % i for i in y]

552 model.fit(X, y)

553 if verbose: # pragma: no cover

554 print(

555 f"[dump_multiple_classification] model '{model.__class__.__name__}'")

556 model_onnx, prefix = convert_model(model, "multi-class classifier",

557 [("input", FloatTensorType([None, 2]))])

558 if verbose: # pragma: no cover

559 print("[dump_multiple_classification] model was converted")

560 dump_data_and_model(

561 X.astype(numpy.float32), model, model_onnx, folder=folder,

562 allow_failure=allow_failure,

563 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,

564 verbose=verbose, comparable_outputs=comparable_outputs,

565 methods=methods)

566

567 X, y = make_classification(40, n_features=4, random_state=42,

568 n_classes=3, n_clusters_per_class=1)

569 X = X[:, :2]

570 model.fit(X, y)

571 if verbose: # pragma: no cover

572 print(

573 f"[dump_multiple_classification] model '{model.__class__.__name__}'")

574 model_onnx, prefix = convert_model(model, "multi-class classifier",

575 [("input", FloatTensorType([None, 2]))])

576 if verbose: # pragma: no cover

577 print("[dump_multiple_classification] model was converted")

578 dump_data_and_model(

579 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,

580 allow_failure=allow_failure,

581 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix,

582 verbose=verbose, comparable_outputs=comparable_outputs,

583 benchmark=benchmark, methods=methods)

584

585

586def dump_multilabel_classification(

587 model, suffix="", folder=None, allow_failure=None, verbose=False,

588 label_string=False, first_class=0, comparable_outputs=None,

589 benchmark=False, backend=('python', 'onnxruntime')):

590 """

591 Trains and dumps a model for a binary classification problem.

592 The function trains a model and calls

593 :func:`dump_data_and_model`.

594

595 Every created filename will follow the pattern:

596 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

597 """

598 from skl2onnx.common.data_types import FloatTensorType # delayed

599 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]

600 X = numpy.array(X, dtype=numpy.float32)

601 if label_string:

602 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]]

603 else:

604 y = [[0 + first_class], [1 + first_class], [2 + first_class],

605 [0 + first_class, 1 + first_class],

606 [1 + first_class], [2 + first_class]]

607 y = MultiLabelBinarizer().fit_transform(y)

608 model.fit(X, y)

609 if verbose: # pragma: no cover

610 print(

611 f"[make_multilabel_classification] model '{model.__class__.__name__}'")

612 model_onnx, prefix = convert_model(model, "multi-label-classifier",

613 [("input", FloatTensorType([None, 2]))])

614 if verbose: # pragma: no cover

615 print("[make_multilabel_classification] model was converted")

616 dump_data_and_model(

617 X.astype(numpy.float32), model, model_onnx, folder=folder,

618 allow_failure=allow_failure,

619 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,

620 verbose=verbose, comparable_outputs=comparable_outputs,

621 backend=backend)

622

623 X, y = make_multilabel_classification( # pylint: disable=W0632

624 40, n_features=4, random_state=42, n_classes=3)

625 X = X[:, :2]

626 model.fit(X, y)

627 if verbose: # pragma: no cover

628 print(

629 f"[make_multilabel_classification] model '{model.__class__.__name__}'")

630 model_onnx, prefix = convert_model(model, "multi-class classifier",

631 [("input", FloatTensorType([None, 2]))])

632 if verbose: # pragma: no cover

633 print("[make_multilabel_classification] model was converted")

634 dump_data_and_model(

635 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,

636 allow_failure=allow_failure,

637 basename=prefix + "RndMla" + model.__class__.__name__ + suffix,

638 verbose=verbose, comparable_outputs=comparable_outputs,

639 benchmark=benchmark, backend=backend)

640

641

642def dump_multiple_regression(

643 model, suffix="", folder=None, allow_failure=None,

644 comparable_outputs=None, verbose=False, benchmark=False):

645 """

646 Trains and dumps a model for a multi regression problem.

647 The function trains a model and calls

648 :func:`dump_data_and_model`.

649

650 Every created filename will follow the pattern:

651 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

652 """

653 from skl2onnx.common.data_types import FloatTensorType # delayed

654 X = [[0, 1], [1, 1], [2, 0]]

655 X = numpy.array(X, dtype=numpy.float32)

656 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32)

657 model.fit(X, y)

658 model_onnx, prefix = convert_model(model, "multi-regressor",

659 [("input", FloatTensorType([None, 2]))])

660 dump_data_and_model(

661 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

662 basename=prefix + "MRg" + model.__class__.__name__ + suffix,

663 verbose=verbose, comparable_outputs=comparable_outputs,

664 benchmark=benchmark)

665

666

667def dump_single_regression(model, suffix="", folder=None, allow_failure=None,

668 comparable_outputs=None, benchmark=False):

669 """

670 Trains and dumps a model for a regression problem.

671 The function trains a model and calls

672 :func:`dump_data_and_model`.

673

674 Every created filename will follow the pattern:

675 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

676 """

677 from skl2onnx.common.data_types import FloatTensorType # delayed

678 X = [[0, 1], [1, 1], [2, 0]]

679 X = numpy.array(X, dtype=numpy.float32)

680 y = numpy.array([100, -10, 50], dtype=numpy.float32)

681 model.fit(X, y)

682 model_onnx, prefix = convert_model(model, "single regressor",

683 [("input", FloatTensorType([None, 2]))])

684 dump_data_and_model(

685 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

686 basename=prefix + "Reg" + model.__class__.__name__ + suffix,

687 comparable_outputs=comparable_outputs)

688

689

690def timeit_repeat(fct, number, repeat):

691 """

692 Returns a series of *repeat* time measures for

693 *number* executions of *code* assuming *fct*

694 is a function.

695 """

696 res = []

697 for _ in range(0, repeat):

698 t1 = time.perf_counter()

699 for __ in range(0, number):

700 fct()

701 t2 = time.perf_counter()

702 res.append(t2 - t1)

703 return res

704

705

706def timeexec(fct, number, repeat):

707 """

708 Measures the time for a given expression.

709

710 :param fct: function to measure (as a string)

711 :param number: number of time to run the expression

712 (and then divide by this number to get an average)

713 :param repeat: number of times to repeat the computation

714 of the above average

715 :return: dictionary

716 """

717 rep = timeit_repeat(fct, number=number, repeat=repeat)

718 ave = sum(rep) / (number * repeat)

719 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5

720 fir = rep[0] / number

721 fir3 = sum(rep[:3]) / (3 * number)

722 las3 = sum(rep[-3:]) / (3 * number)

723 rep.sort()

724 mini = rep[len(rep) // 20] / number

725 maxi = rep[-len(rep) // 20] / number

726 return dict(average=ave, deviation=std, first=fir, first3=fir3,

727 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number)

728

729

730def compute_benchmark(fcts, number=10, repeat=100):

731 """

732 Compares the processing time several functions.

733

734 :param fcts: dictionary ``{'name': fct}``

735 :param number: number of time to run the expression

736 (and then divide by this number to get an average)

737 :param repeat: number of times to repeat the computation

738 of the above average

739 :return: list of [{'name': name, 'time': ...}]

740 """

741 obs = []

742 for name, fct in fcts.items():

743 res = timeexec(fct, number=number, repeat=repeat)

744 res["name"] = name

745 obs.append(res)

746 return obs

747

748

749def binary_array_to_string(mat):

750 """

751 Used to compare decision path.

752 """

753 if not isinstance(mat, numpy.ndarray):

754 raise NotImplementedError( # pragma: no cover

755 "Not implemented for other types than arrays.")

756 if len(mat.shape) != 2:

757 raise NotImplementedError( # pragma: no cover

758 "Not implemented for other arrays than matrices.")

759 res = [[str(i) for i in row] for row in mat.tolist()]

760 return [''.join(row) for row in res]

Coverage for mlprodict/testing/test_utils/tests_helper.py: 98%

277 statements