Coverage for mlprodict/testing/test_utils/tests_helper.py: 98%
277 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1"""
2@file
3@brief Inspired from sklearn-onnx, handles two backends.
4"""
5import pickle
6import os
7import warnings
8import traceback
9import time
10import sys
11import numpy
12import pandas
13from sklearn.datasets import (
14 make_classification, make_multilabel_classification,
15 make_regression)
16from sklearn.model_selection import train_test_split
17from sklearn.preprocessing import MultiLabelBinarizer
18from .utils_backend import compare_backend
19from .utils_backend_common import (
20 extract_options, evaluate_condition, is_backend_enabled,
21 OnnxBackendMissingNewOnnxOperatorException)
24def _has_predict_proba(model):
25 if hasattr(model, "voting") and model.voting == "hard":
26 return False
27 return hasattr(model, "predict_proba")
30def _has_decision_function(model):
31 if hasattr(model, "voting"):
32 return False
33 return hasattr(model, "decision_function")
36def _has_transform_model(model):
37 if hasattr(model, "voting"):
38 return False
39 return hasattr(model, "fit_transform") and hasattr(model, "score")
42def fit_classification_model(model, n_classes, is_int=False,
43 pos_features=False, label_string=False,
44 random_state=42, is_bool=False,
45 n_features=20):
46 """
47 Fits a classification model.
48 """
49 X, y = make_classification(n_classes=n_classes, n_features=n_features,
50 n_samples=500,
51 random_state=random_state,
52 n_informative=7)
53 if label_string:
54 y = numpy.array(['cl%d' % cl for cl in y])
55 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)
56 if pos_features:
57 X = numpy.abs(X)
58 if is_bool:
59 X = X.astype(bool)
60 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
61 random_state=42)
62 model.fit(X_train, y_train)
63 return model, X_test
66def fit_multilabel_classification_model(model, n_classes=5, n_labels=2,
67 n_samples=400, n_features=20,
68 is_int=False):
69 """
70 Fits a classification model.
71 """
72 X, y = make_multilabel_classification(
73 n_classes=n_classes, n_labels=n_labels, n_features=n_features,
74 n_samples=n_samples, random_state=42)[:2]
75 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)
76 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
77 random_state=42)
78 model.fit(X_train, y_train)
79 return model, X_test
82def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False,
83 factor=1., n_features=10, n_samples=500,
84 n_informative=10):
85 """
86 Fits a regression model.
87 """
88 X, y = make_regression(n_features=n_features, n_samples=n_samples,
89 n_targets=n_targets, random_state=42,
90 n_informative=n_informative)[:2]
91 y *= factor
92 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)
93 if is_bool:
94 X = X.astype(bool)
95 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
96 random_state=42)
97 model.fit(X_train, y_train)
98 return model, X_test
101def fit_classification_model_simple(model, n_classes, is_int=False,
102 pos_features=False):
103 """
104 Fits a classification model.
105 """
106 X, y = make_classification(n_classes=n_classes, n_features=10,
107 n_samples=500, n_redundant=0,
108 n_repeated=0,
109 random_state=42, n_informative=9)
110 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)
111 if pos_features:
112 X = numpy.abs(X)
113 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,
114 random_state=42)
115 model.fit(X_train, y_train)
116 return model, X_test
119def _raw_score_binary_classification(model, X):
120 scores = model.decision_function(X)
121 if len(scores.shape) == 1:
122 scores = scores.reshape(-1, 1)
123 if len(scores.shape) != 2 or scores.shape[1] != 1:
124 raise RuntimeError( # pragma: no cover
125 f"Unexpected shape {scores.shape} for a binary classifiation")
126 return numpy.hstack([-scores, scores])
129def _save_model_dump(model, folder, basename, names):
130 if hasattr(model, "save"): # pragma: no cover
131 dest = os.path.join(folder, basename + ".model.keras")
132 names.append(dest)
133 model.save(dest)
134 else:
135 dest = os.path.join(folder, basename + ".model.pkl")
136 names.append(dest)
137 with open(dest, "wb") as f:
138 try:
139 pickle.dump(model, f)
140 except AttributeError as e: # pragma no cover
141 print(
142 f"[dump_data_and_model] cannot pickle model '{dest}' due to {e}.")
145def dump_data_and_model( # pylint: disable=R0912
146 data, model, onnx_model=None, basename="model", folder=None,
147 inputs=None, backend=('python', 'onnxruntime'),
148 context=None, allow_failure=None, methods=None,
149 dump_error_log=None, benchmark=None, comparable_outputs=None,
150 intermediate_steps=False, fail_evenif_notimplemented=False,
151 verbose=False, classes=None, check_error=None, disable_optimisation=False):
152 """
153 Saves data with pickle, saves the model with pickle and *onnx*,
154 runs and saves the predictions for the given model.
155 This function is used to test a backend (runtime) for *onnx*.
157 :param data: any kind of data
158 :param model: any model
159 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it
160 only if the model accepts one float vector
161 :param basename: three files are writen ``<basename>.data.pkl``,
162 ``<basename>.model.pkl``, ``<basename>.model.onnx``
163 :param folder: files are written in this folder,
164 it is created if it does not exist, if *folder* is None,
165 it looks first in environment variable ``ONNXTESTDUMP``,
166 otherwise, it is placed into ``'temp_dump'``.
167 :param inputs: standard type or specific one if specified, only used is
168 parameter *onnx* is None
169 :param backend: backend used to compare expected output and runtime output.
170 Two options are currently supported: None for no test,
171 `'onnxruntime'` to use module :epkg:`onnxruntime`,
172 ``python`` to use the python runtiume.
173 :param context: used if the model contains a custom operator such
174 as a custom Keras function...
175 :param allow_failure: None to raise an exception if comparison fails
176 for the backends, otherwise a string which is then evaluated to check
177 whether or not the test can fail, example:
178 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"``
179 :param dump_error_log: if True, dumps any error message in a file
180 ``<basename>.err``, if it is None, it checks the environment
181 variable ``ONNXTESTDUMPERROR``
182 :param benchmark: if True, runs a benchmark and stores the results
183 into a file ``<basename>.bench``, if None, it checks the environment
184 variable ``ONNXTESTBENCHMARK``
185 :param verbose: additional information
186 :param methods: ONNX may produce one or several results, each of them
187 is equivalent to the output of a method from the model class,
188 this parameter defines which methods is equivalent to ONNX outputs.
189 If not specified, it falls back into a default behaviour implemented
190 for classifiers, regressors, clustering.
191 :param comparable_outputs: compares only these outputs
192 :param intermediate_steps: displays intermediate steps
193 in case of an error
194 :param fail_evenif_notimplemented: the test is considered as failing
195 even if the error is due to onnxuntime missing the implementation
196 of a new operator defiend in ONNX.
197 :param classes: classes names
198 (only for classifier, mandatory if option 'nocl' is used)
199 :param check_error: do not raise an exception if the error message
200 contains this text
201 :param disable_optimisation: disable all optimisations *onnxruntime*
202 could do
203 :return: the created files
205 Some convention for the name,
206 *Bin* for a binary classifier, *Mcl* for a multiclass
207 classifier, *Reg* for a regressor, *MRg* for a multi-regressor.
208 The name can contain some flags. Expected outputs refer to the
209 outputs computed with the original library, computed outputs
210 refer to the outputs computed with a ONNX runtime.
212 * ``-CannotLoad``: the model can be converted but the runtime
213 cannot load it
214 * ``-Dec3``: compares expected and computed outputs up to
215 3 decimals (5 by default)
216 * ``-Dec4``: compares expected and computed outputs up to
217 4 decimals (5 by default)
218 * ``-NoProb``: The original models computed probabilites for two classes
219 *size=(N, 2)* but the runtime produces a vector of size *N*, the test
220 will compare the second column to the column
221 * ``-Out0``: only compares the first output on both sides
222 * ``-Reshape``: merges all outputs into one single vector and resizes
223 it before comparing
224 * ``-SkipDim1``: before comparing expected and computed output,
225 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)*
226 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix
228 If the *backend* is not None, the function either raises an exception
229 if the comparison between the expected outputs and the backend outputs
230 fails or it saves the backend output and adds it to the results.
231 """
232 # delayed import because too long
233 from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType # delayed
235 runtime_test = dict(model=model, data=data)
237 if folder is None:
238 folder = os.environ.get("ONNXTESTDUMP", "temp_dump")
239 if dump_error_log is None:
240 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in (
241 "1", 1, "True", "true", True)
242 if benchmark is None:
243 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in (
244 "1", 1, "True", "true", True)
245 if not os.path.exists(folder):
246 os.makedirs(folder)
248 lambda_original = None
249 if isinstance(data, (numpy.ndarray, pandas.DataFrame)):
250 dataone = data[:1].copy()
251 else:
252 dataone = data
254 if methods is not None:
255 prediction = []
256 for method in methods:
257 if callable(method):
258 call = lambda X, model=model: method(
259 model, X) # pragma: no cover
260 else:
261 try:
262 call = getattr(model, method)
263 except AttributeError as e: # pragma no cover
264 if method == 'decision_function_binary':
265 call = (
266 lambda X, model=model:
267 _raw_score_binary_classification(model, X))
268 else:
269 raise e
270 if callable(call):
271 prediction.append(call(data))
272 # we only take the last one for benchmark
273 lambda_original = lambda: call(dataone)
274 else:
275 raise RuntimeError( # pragma: no cover
276 f"Method '{method}' is not callable.")
277 else:
278 if hasattr(model, "predict"):
279 if _has_predict_proba(model):
280 # Classifier
281 prediction = [model.predict(data), model.predict_proba(data)]
282 lambda_original = lambda: model.predict_proba(dataone)
283 elif _has_decision_function(model):
284 # Classifier without probabilities
285 prediction = [model.predict(data),
286 model.decision_function(data)]
287 lambda_original = (
288 lambda: model.decision_function(dataone))
289 elif _has_transform_model(model):
290 # clustering
291 try:
292 prediction = [model.predict(data), model.transform(data)]
293 lambda_original = lambda: model.transform(dataone)
294 except ValueError:
295 # 0.23 enforced type checking.
296 d64 = data.astype(numpy.float64)
297 prediction = [model.predict(d64), model.transform(d64)]
298 dataone64 = dataone.astype(numpy.float64)
299 lambda_original = lambda: model.transform(dataone64)
300 else:
301 # Regressor or VotingClassifier
302 prediction = [model.predict(data)]
303 lambda_original = lambda: model.predict(dataone)
305 elif hasattr(model, "transform"):
306 options = extract_options(basename)
307 SklCol = options.get("SklCol", False)
308 if SklCol:
309 prediction = model.transform(data.ravel()) # pragma: no cover
310 lambda_original = lambda: model.transform(
311 dataone.ravel()) # pragma: no cover
312 else:
313 prediction = model.transform(data)
314 lambda_original = lambda: model.transform(dataone)
315 else:
316 raise TypeError( # pragma: no cover
317 f"Model has no predict or transform method: {type(model)}")
319 runtime_test["expected"] = prediction
321 names = []
322 dest = os.path.join(folder, basename + ".expected.pkl")
323 names.append(dest)
324 with open(dest, "wb") as f:
325 pickle.dump(prediction, f)
327 dest = os.path.join(folder, basename + ".data.pkl")
328 names.append(dest)
329 with open(dest, "wb") as f:
330 pickle.dump(data, f)
332 _save_model_dump(model, folder, basename, names)
334 if dump_error_log: # pragma: no cover
335 error_dump = os.path.join(folder, basename + ".err")
337 if onnx_model is None: # pragma: no cover
338 array = numpy.array(data)
339 if inputs is None:
340 if array.dtype == numpy.float64:
341 inputs = [("input", DoubleTensorType(list(array.shape)))]
342 else:
343 inputs = [("input", FloatTensorType(list(array.shape)))]
344 onnx_model, _ = convert_model(model, basename, inputs)
346 dest = os.path.join(folder, basename + ".model.onnx")
347 names.append(dest)
348 with open(dest, "wb") as f:
349 f.write(onnx_model.SerializeToString())
350 if verbose: # pragma: no cover
351 print(f"[dump_data_and_model] created '{dest}'.")
353 runtime_test["onnx"] = dest
355 # backend
356 if backend is not None:
357 if isinstance(backend, tuple):
358 backend = list(backend)
359 if not isinstance(backend, list):
360 backend = [backend]
361 for b in backend:
362 if not is_backend_enabled(b):
363 continue # pragma: no cover
364 if isinstance(allow_failure, str):
365 allow = evaluate_condition(
366 b, allow_failure) # pragma: no cover
367 else:
368 allow = allow_failure
369 if allow is None and not check_error:
370 output, lambda_onnx = compare_backend(
371 b, runtime_test, options=extract_options(basename),
372 context=context, verbose=verbose,
373 comparable_outputs=comparable_outputs,
374 intermediate_steps=intermediate_steps,
375 disable_optimisation=disable_optimisation,
376 classes=classes)
377 elif check_error:
378 try:
379 output, lambda_onnx = compare_backend(
380 b, runtime_test, options=extract_options(basename),
381 context=context, verbose=verbose,
382 comparable_outputs=comparable_outputs,
383 intermediate_steps=intermediate_steps,
384 disable_optimisation=disable_optimisation,
385 classes=classes)
386 except Exception as e: # pragma: no cover
387 if check_error in str(e):
388 warnings.warn(str(e))
389 continue
390 raise e
391 else:
392 try:
393 output, lambda_onnx = compare_backend(
394 b, runtime_test,
395 options=extract_options(basename),
396 context=context, verbose=verbose,
397 comparable_outputs=comparable_outputs,
398 intermediate_steps=intermediate_steps,
399 classes=classes)
400 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover
401 if fail_evenif_notimplemented:
402 raise e
403 warnings.warn(str(e))
404 continue
405 except AssertionError as e: # pragma no cover
406 if dump_error_log:
407 with open(error_dump, "w", encoding="utf-8") as f:
408 f.write(str(e) + "\n--------------\n")
409 traceback.print_exc(file=f)
410 if isinstance(allow, bool) and allow:
411 warnings.warn("Issue with '{0}' due to {1}".format(
412 basename,
413 str(e).replace("\n", " -- ")))
414 continue
415 raise e
417 if output is not None:
418 dest = os.path.join(folder,
419 basename + f".backend.{b}.pkl")
420 names.append(dest)
421 with open(dest, "wb") as f:
422 pickle.dump(output, f)
423 if (benchmark and lambda_onnx is not None and
424 lambda_original is not None):
425 # run a benchmark
426 obs = compute_benchmark({
427 "onnxrt": lambda_onnx,
428 "original": lambda_original
429 })
430 df = pandas.DataFrame(obs)
431 df["input_size"] = sys.getsizeof(dataone)
432 dest = os.path.join(folder, basename + ".bench")
433 df.to_csv(dest, index=False)
435 return names
438def convert_model(model, name, input_types):
439 """
440 Runs the appropriate conversion method.
442 :param model: model, *scikit-learn*, *keras*,
443 or *coremltools* object
444 :param name: model name
445 :param input_types: input types
446 :return: *onnx* model
447 """
448 from skl2onnx import convert_sklearn # delayed
450 model, prefix = convert_sklearn(model, name, input_types), "Sklearn"
451 if model is None: # pragma: no cover
452 raise RuntimeError(f"Unable to convert model of type '{type(model)}'.")
453 return model, prefix
456def dump_one_class_classification(
457 model, suffix="", folder=None, allow_failure=None,
458 comparable_outputs=None, verbose=False, benchmark=False,
459 methods=None):
460 """
461 Trains and dumps a model for a One Class outlier problem.
462 The function trains a model and calls
463 :func:`dump_data_and_model`.
465 Every created filename will follow the pattern:
466 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
467 """
468 from skl2onnx.common.data_types import FloatTensorType # delayed
469 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]]
470 X = numpy.array(X, dtype=numpy.float32)
471 y = [1, 1, 1]
472 model.fit(X, y)
473 model_onnx, prefix = convert_model(model, "one_class",
474 [("input", FloatTensorType([None, 2]))])
475 dump_data_and_model(
476 X, model, model_onnx, folder=folder,
477 allow_failure=allow_failure,
478 basename=prefix + "One" + model.__class__.__name__ + suffix,
479 verbose=verbose, comparable_outputs=comparable_outputs,
480 benchmark=benchmark, methods=methods)
483def dump_binary_classification(
484 model, suffix="", folder=None, allow_failure=None,
485 comparable_outputs=None, verbose=False, label_string=False,
486 benchmark=False, methods=None, nrows=None):
487 """
488 Trains and dumps a model for a binary classification problem.
489 The function trains a model and calls
490 :func:`dump_data_and_model`.
492 Every created filename will follow the pattern:
493 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
494 """
495 from skl2onnx.common.data_types import FloatTensorType # delayed
496 X = [[0, 1], [1, 1], [2, 0]]
497 X = numpy.array(X, dtype=numpy.float32)
498 if label_string:
499 y = ["A", "B", "A"]
500 else:
501 y = numpy.array([0, 1, 0], numpy.int64)
502 model.fit(X, y)
503 model_onnx, prefix = convert_model(model, "binary classifier",
504 [("input", FloatTensorType([None, 2]))])
505 if nrows == 2:
506 for nr in range(X.shape[0] - 1):
507 dump_data_and_model(
508 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure,
509 basename=prefix + "Bin" + model.__class__.__name__ + suffix,
510 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)
511 else:
512 dump_data_and_model(
513 X, model, model_onnx, folder=folder, allow_failure=allow_failure,
514 basename=prefix + "Bin" + model.__class__.__name__ + suffix,
515 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)
517 X, y = make_classification(10, n_features=4, random_state=42)
518 X = X[:, :2]
519 model.fit(X, y)
520 model_onnx, prefix = convert_model(model, "binary classifier",
521 [("input", FloatTensorType([None, 2]))])
522 xt = X.astype(numpy.float32)
523 if nrows is not None:
524 xt = xt[:nrows]
525 dump_data_and_model(
526 xt, model, model_onnx,
527 allow_failure=allow_failure, folder=folder,
528 basename=prefix + "RndBin" + model.__class__.__name__ + suffix,
529 verbose=verbose, comparable_outputs=comparable_outputs,
530 benchmark=benchmark, methods=methods)
533def dump_multiple_classification(
534 model, suffix="", folder=None, allow_failure=None, verbose=False,
535 label_string=False, first_class=0, comparable_outputs=None,
536 benchmark=False, methods=None):
537 """
538 Trains and dumps a model for a binary classification problem.
539 The function trains a model and calls
540 :func:`dump_data_and_model`.
542 Every created filename will follow the pattern:
543 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
544 """
545 from skl2onnx.common.data_types import FloatTensorType # delayed
546 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
547 X = numpy.array(X, dtype=numpy.float32)
548 y = [0, 1, 2, 1, 1, 2]
549 y = [i + first_class for i in y]
550 if label_string:
551 y = ["l%d" % i for i in y]
552 model.fit(X, y)
553 if verbose: # pragma: no cover
554 print(
555 f"[dump_multiple_classification] model '{model.__class__.__name__}'")
556 model_onnx, prefix = convert_model(model, "multi-class classifier",
557 [("input", FloatTensorType([None, 2]))])
558 if verbose: # pragma: no cover
559 print("[dump_multiple_classification] model was converted")
560 dump_data_and_model(
561 X.astype(numpy.float32), model, model_onnx, folder=folder,
562 allow_failure=allow_failure,
563 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,
564 verbose=verbose, comparable_outputs=comparable_outputs,
565 methods=methods)
567 X, y = make_classification(40, n_features=4, random_state=42,
568 n_classes=3, n_clusters_per_class=1)
569 X = X[:, :2]
570 model.fit(X, y)
571 if verbose: # pragma: no cover
572 print(
573 f"[dump_multiple_classification] model '{model.__class__.__name__}'")
574 model_onnx, prefix = convert_model(model, "multi-class classifier",
575 [("input", FloatTensorType([None, 2]))])
576 if verbose: # pragma: no cover
577 print("[dump_multiple_classification] model was converted")
578 dump_data_and_model(
579 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,
580 allow_failure=allow_failure,
581 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix,
582 verbose=verbose, comparable_outputs=comparable_outputs,
583 benchmark=benchmark, methods=methods)
586def dump_multilabel_classification(
587 model, suffix="", folder=None, allow_failure=None, verbose=False,
588 label_string=False, first_class=0, comparable_outputs=None,
589 benchmark=False, backend=('python', 'onnxruntime')):
590 """
591 Trains and dumps a model for a binary classification problem.
592 The function trains a model and calls
593 :func:`dump_data_and_model`.
595 Every created filename will follow the pattern:
596 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
597 """
598 from skl2onnx.common.data_types import FloatTensorType # delayed
599 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]
600 X = numpy.array(X, dtype=numpy.float32)
601 if label_string:
602 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]]
603 else:
604 y = [[0 + first_class], [1 + first_class], [2 + first_class],
605 [0 + first_class, 1 + first_class],
606 [1 + first_class], [2 + first_class]]
607 y = MultiLabelBinarizer().fit_transform(y)
608 model.fit(X, y)
609 if verbose: # pragma: no cover
610 print(
611 f"[make_multilabel_classification] model '{model.__class__.__name__}'")
612 model_onnx, prefix = convert_model(model, "multi-label-classifier",
613 [("input", FloatTensorType([None, 2]))])
614 if verbose: # pragma: no cover
615 print("[make_multilabel_classification] model was converted")
616 dump_data_and_model(
617 X.astype(numpy.float32), model, model_onnx, folder=folder,
618 allow_failure=allow_failure,
619 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,
620 verbose=verbose, comparable_outputs=comparable_outputs,
621 backend=backend)
623 X, y = make_multilabel_classification( # pylint: disable=W0632
624 40, n_features=4, random_state=42, n_classes=3)
625 X = X[:, :2]
626 model.fit(X, y)
627 if verbose: # pragma: no cover
628 print(
629 f"[make_multilabel_classification] model '{model.__class__.__name__}'")
630 model_onnx, prefix = convert_model(model, "multi-class classifier",
631 [("input", FloatTensorType([None, 2]))])
632 if verbose: # pragma: no cover
633 print("[make_multilabel_classification] model was converted")
634 dump_data_and_model(
635 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,
636 allow_failure=allow_failure,
637 basename=prefix + "RndMla" + model.__class__.__name__ + suffix,
638 verbose=verbose, comparable_outputs=comparable_outputs,
639 benchmark=benchmark, backend=backend)
642def dump_multiple_regression(
643 model, suffix="", folder=None, allow_failure=None,
644 comparable_outputs=None, verbose=False, benchmark=False):
645 """
646 Trains and dumps a model for a multi regression problem.
647 The function trains a model and calls
648 :func:`dump_data_and_model`.
650 Every created filename will follow the pattern:
651 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
652 """
653 from skl2onnx.common.data_types import FloatTensorType # delayed
654 X = [[0, 1], [1, 1], [2, 0]]
655 X = numpy.array(X, dtype=numpy.float32)
656 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32)
657 model.fit(X, y)
658 model_onnx, prefix = convert_model(model, "multi-regressor",
659 [("input", FloatTensorType([None, 2]))])
660 dump_data_and_model(
661 X, model, model_onnx, folder=folder, allow_failure=allow_failure,
662 basename=prefix + "MRg" + model.__class__.__name__ + suffix,
663 verbose=verbose, comparable_outputs=comparable_outputs,
664 benchmark=benchmark)
667def dump_single_regression(model, suffix="", folder=None, allow_failure=None,
668 comparable_outputs=None, benchmark=False):
669 """
670 Trains and dumps a model for a regression problem.
671 The function trains a model and calls
672 :func:`dump_data_and_model`.
674 Every created filename will follow the pattern:
675 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.
676 """
677 from skl2onnx.common.data_types import FloatTensorType # delayed
678 X = [[0, 1], [1, 1], [2, 0]]
679 X = numpy.array(X, dtype=numpy.float32)
680 y = numpy.array([100, -10, 50], dtype=numpy.float32)
681 model.fit(X, y)
682 model_onnx, prefix = convert_model(model, "single regressor",
683 [("input", FloatTensorType([None, 2]))])
684 dump_data_and_model(
685 X, model, model_onnx, folder=folder, allow_failure=allow_failure,
686 basename=prefix + "Reg" + model.__class__.__name__ + suffix,
687 comparable_outputs=comparable_outputs)
690def timeit_repeat(fct, number, repeat):
691 """
692 Returns a series of *repeat* time measures for
693 *number* executions of *code* assuming *fct*
694 is a function.
695 """
696 res = []
697 for _ in range(0, repeat):
698 t1 = time.perf_counter()
699 for __ in range(0, number):
700 fct()
701 t2 = time.perf_counter()
702 res.append(t2 - t1)
703 return res
706def timeexec(fct, number, repeat):
707 """
708 Measures the time for a given expression.
710 :param fct: function to measure (as a string)
711 :param number: number of time to run the expression
712 (and then divide by this number to get an average)
713 :param repeat: number of times to repeat the computation
714 of the above average
715 :return: dictionary
716 """
717 rep = timeit_repeat(fct, number=number, repeat=repeat)
718 ave = sum(rep) / (number * repeat)
719 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5
720 fir = rep[0] / number
721 fir3 = sum(rep[:3]) / (3 * number)
722 las3 = sum(rep[-3:]) / (3 * number)
723 rep.sort()
724 mini = rep[len(rep) // 20] / number
725 maxi = rep[-len(rep) // 20] / number
726 return dict(average=ave, deviation=std, first=fir, first3=fir3,
727 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number)
730def compute_benchmark(fcts, number=10, repeat=100):
731 """
732 Compares the processing time several functions.
734 :param fcts: dictionary ``{'name': fct}``
735 :param number: number of time to run the expression
736 (and then divide by this number to get an average)
737 :param repeat: number of times to repeat the computation
738 of the above average
739 :return: list of [{'name': name, 'time': ...}]
740 """
741 obs = []
742 for name, fct in fcts.items():
743 res = timeexec(fct, number=number, repeat=repeat)
744 res["name"] = name
745 obs.append(res)
746 return obs
749def binary_array_to_string(mat):
750 """
751 Used to compare decision path.
752 """
753 if not isinstance(mat, numpy.ndarray):
754 raise NotImplementedError( # pragma: no cover
755 "Not implemented for other types than arrays.")
756 if len(mat.shape) != 2:
757 raise NotImplementedError( # pragma: no cover
758 "Not implemented for other arrays than matrices.")
759 res = [[str(i) for i in row] for row in mat.tolist()]
760 return [''.join(row) for row in res]