Coverage for mlprodict/asv_benchmark/common_asv_skl.py: 95%
312 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1# pylint: disable=E1101
2"""
3Common class for all benchmarks testing
4converted models from :epkg:`scikit-learn`
5with :epkg:`asv`. The benchmark can be run through
6file :epkg:`run_asv.sh` on Linux or :epkg:`run_asv.bat` on
7Windows.
9.. warning::
10 On Windows, you should avoid cloning the repository
11 on a folder with a long full name. Visual Studio tends to
12 abide by the rule of the maximum path length even though
13 the system is told otherwise.
14"""
15import os
16from datetime import datetime
17import pickle
18from logging import getLogger
19import numpy
20from sklearn import set_config
21from sklearn.datasets import load_iris
22from sklearn.metrics import (
23 accuracy_score, mean_absolute_error, silhouette_score)
24from sklearn.model_selection import train_test_split
25from mlprodict import get_ir_version, __max_supported_opset__
26from mlprodict.onnxrt import OnnxInference
27from mlprodict.onnx_conv import (
28 to_onnx, register_rewritten_operators, register_converters,
29 register_new_operators)
30from mlprodict.onnxrt.validate.validate_benchmark import make_n_rows
31from mlprodict.onnxrt.validate.validate_problems import _modify_dimension
32from mlprodict.onnx_tools.optim import onnx_statistics
33from mlprodict.tools.asv_options_helper import (
34 expand_onnx_options, version2number)
35from mlprodict.tools.model_info import set_random_state
38class _CommonAsvSklBenchmark:
39 """
40 Common tests to all benchmarks testing converted
41 :epkg:`scikit-learn` models. See `benchmark attributes
42 <https://asv.readthedocs.io/en/stable/benchmarks.html#general>`_.
43 """
45 # Part which changes.
46 # params and param_names may be changed too.
48 params = [
49 ['skl', 'pyrtc', 'ort'], # values for runtime
50 [1, 10, 100, 10000], # values for N
51 [4, 20], # values for nf
52 [__max_supported_opset__], # values for opset
53 ["float", "double"], # values for dtype
54 [None], # values for optim
55 ]
56 param_names = ['rt', 'N', 'nf', 'opset', 'dtype', 'optim']
57 chk_method_name = None
58 version = datetime.now().isoformat()
59 pretty_source = "disabled"
61 par_ydtype = numpy.int64
62 par_dofit = True
63 par_convopts = None
65 def _create_model(self): # pragma: no cover
66 raise NotImplementedError("This method must be overwritten.")
68 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): # pragma: no cover
69 raise NotImplementedError("This method must be overwritten.")
71 def _score_metric(self, X, y_exp, y_pred): # pragma: no cover
72 raise NotImplementedError("This method must be overwritten.")
74 def _optimize_onnx(self, onx):
75 return onx
77 def _get_xdtype(self, dtype):
78 if dtype in ('float', numpy.float32):
79 return numpy.float32
80 elif dtype in ('double', '64', 64, numpy.float64):
81 return numpy.float64
82 raise ValueError( # pragma: no cover
83 f"Unknown dtype '{dtype}'.")
85 def _get_dataset(self, nf, dtype):
86 xdtype = self._get_xdtype(dtype)
87 data = load_iris()
88 X, y = data.data, data.target
89 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
90 rnd = state.randn(*X.shape) / 3
91 X += rnd
92 X = _modify_dimension(X, nf)
93 X_train, X_test, y_train, y_test = train_test_split(
94 X, y, random_state=42)
95 Xt = X_test.astype(xdtype)
96 yt = y_test.astype(self.par_ydtype)
97 if X_train.shape[0] < X_train.shape[1]:
98 raise RuntimeError( # pragma: no cover
99 "Unable to train a model with less observations than features "
100 "shape=%r." % (X_train.shape, ))
101 return (X_train, y_train), (Xt, yt)
103 def _to_onnx(self, model, X, opset, dtype, optim):
104 if optim is None or len(optim) == 0:
105 options = self.par_convopts
106 elif self.par_convopts and len(self.par_convopts) > 0:
107 raise NotImplementedError( # pragma: no cover
108 f"Conflict between par_convopts={self.par_convopts} and optim={optim}")
109 else:
110 # Expand common onnx options, see _nick_name_options.
111 options = expand_onnx_options(model, optim)
113 return to_onnx(model, X, options=options, target_opset=opset)
115 def _create_onnx_inference(self, onx, runtime):
116 if 'onnxruntime' in runtime:
117 old = onx.ir_version
118 onx.ir_version = get_ir_version(__max_supported_opset__)
119 else:
120 old = None
122 try:
123 res = OnnxInference(
124 onx, runtime=runtime,
125 runtime_options=dict(log_severity_level=3))
126 except RuntimeError as e: # pragma: no cover
127 if "[ONNXRuntimeError]" in str(e):
128 return RuntimeError(f"onnxruntime fails due to {str(e)}")
129 raise e
130 if old is not None:
131 onx.ir_version = old
132 return res
134 # Part which does not change.
136 def _check_rt(self, rt, meth):
137 """
138 Checks that runtime has the appropriate method.
139 """
140 if rt is None:
141 raise ValueError("rt cannot be empty.") # pragma: no cover
142 if not hasattr(rt, meth):
143 raise TypeError( # pragma: no cover
144 f"rt of type {type(rt)!r} has no method {meth!r}.")
146 def runtime_name(self, runtime):
147 """
148 Returns the runtime shortname.
149 """
150 if runtime == 'skl':
151 name = runtime
152 elif runtime == 'ort':
153 name = 'onnxruntime1'
154 elif runtime == 'ort2':
155 name = 'onnxruntime2' # pragma: no cover
156 elif runtime == 'pyrt':
157 name = 'python'
158 elif runtime == 'pyrtc':
159 name = 'python_compiled'
160 else:
161 raise ValueError( # pragma: no cover
162 f"Unknown runtime '{runtime}'.")
163 return name
165 def _name(self, nf, opset, dtype):
166 last = f'cache-{self.__class__.__name__}-nf{nf}-op{opset}-dt{dtype}.pickle'
167 return last
169 def setup_cache(self):
170 "asv API"
171 for dtype in self.params[4]:
172 for opv in self.params[3]:
173 for nf in self.params[2]:
174 (X_train, y_train), (X, y) = self._get_dataset(nf, dtype)
175 model = self._create_model()
176 if self.par_dofit:
177 set_random_state(model)
178 model.fit(X_train, y_train)
179 stored = {'model': model, 'X': X, 'y': y}
180 filename = self._name(nf, opv, dtype)
181 with open(filename, "wb") as f:
182 pickle.dump(stored, f)
183 if not os.path.exists(filename):
184 raise RuntimeError( # pragma: no cover
185 f"Unable to dump model {model!r} into {filename!r}.")
187 def setup(self, runtime, N, nf, opset, dtype, optim):
188 "asv API"
189 logger = getLogger('skl2onnx')
190 logger.disabled = True
191 register_converters()
192 register_rewritten_operators()
193 register_new_operators()
194 with open(self._name(nf, opset, dtype), "rb") as f:
195 stored = pickle.load(f)
196 self.stored = stored
197 self.model = stored['model']
198 self.X, self.y = make_n_rows(stored['X'], N, stored['y'])
199 onx, rt_, rt_fct_, rt_fct_track_ = self._create_onnx_and_runtime(
200 runtime, self.model, self.X, opset, dtype, optim)
201 self.onx = onx
202 setattr(self, "rt_" + runtime, rt_)
203 setattr(self, "rt_fct_" + runtime, rt_fct_)
204 setattr(self, "rt_fct_track_" + runtime, rt_fct_track_)
205 set_config(assume_finite=True)
207 def time_predict(self, runtime, N, nf, opset, dtype, optim):
208 "asv API"
209 return getattr(self, "rt_fct_" + runtime)(self.X)
211 def peakmem_predict(self, runtime, N, nf, opset, dtype, optim):
212 "asv API"
213 return getattr(self, "rt_fct_" + runtime)(self.X)
215 def track_score(self, runtime, N, nf, opset, dtype, optim):
216 "asv API"
217 yp = getattr(self, "rt_fct_track_" + runtime)(self.X)
218 return self._score_metric(self.X, self.y, yp)
220 def track_onnxsize(self, runtime, N, nf, opset, dtype, optim):
221 "asv API"
222 return len(self.onx.SerializeToString())
224 def track_nbnodes(self, runtime, N, nf, opset, dtype, optim):
225 "asv API"
226 stats = onnx_statistics(self.onx)
227 return stats.get('nnodes', 0)
229 def track_vmlprodict(self, runtime, N, nf, opset, dtype, optim):
230 "asv API"
231 from mlprodict import __version__
232 return version2number(__version__)
234 def track_vsklearn(self, runtime, N, nf, opset, dtype, optim):
235 "asv API"
236 from sklearn import __version__
237 return version2number(__version__)
239 def track_vort(self, runtime, N, nf, opset, dtype, optim):
240 "asv API"
241 from onnxruntime import __version__ as onnxrt_version
242 return version2number(onnxrt_version)
244 def check_method_name(self, method_name):
245 "Does some verifications. Fails if inconsistencies."
246 if getattr(self, 'chk_method_name', None) not in (None, method_name):
247 raise RuntimeError( # pragma: no cover
248 f"Method name must be '{method_name}'.")
249 if getattr(self, 'chk_method_name', None) is None:
250 raise RuntimeError( # pragma: no cover
251 "Unable to check that the method name is correct "
252 "(expected is '{}')".format(
253 method_name))
256class _CommonAsvSklBenchmarkClassifier(_CommonAsvSklBenchmark):
257 """
258 Common class for a classifier.
259 """
260 chk_method_name = 'predict_proba'
262 def _score_metric(self, X, y_exp, y_pred):
263 return accuracy_score(y_exp, y_pred)
265 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
266 self.check_method_name('predict_proba')
267 onx_ = self._to_onnx(model, X, opset, dtype, optim)
268 onx = self._optimize_onnx(onx_)
269 name = self.runtime_name(runtime)
270 if name == 'skl':
271 rt_ = None
272 rt_fct_ = lambda X: model.predict_proba(X)
273 rt_fct_track_ = lambda X: model.predict(X)
274 else:
275 rt_ = self._create_onnx_inference(onx, name)
276 self._check_rt(rt_, 'run')
277 rt_fct_ = lambda pX: rt_.run({'X': pX})
278 rt_fct_track_ = lambda pX: rt_fct_(pX)['output_label']
279 return onx, rt_, rt_fct_, rt_fct_track_
282class _CommonAsvSklBenchmarkClassifierRawScore(_CommonAsvSklBenchmark):
283 """
284 Common class for a classifier.
285 """
286 chk_method_name = 'decision_function'
288 def _score_metric(self, X, y_exp, y_pred):
289 return accuracy_score(y_exp, y_pred)
291 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
292 self.check_method_name('decision_function')
293 onx_ = self._to_onnx(model, X, opset, dtype, optim)
294 onx = self._optimize_onnx(onx_)
295 name = self.runtime_name(runtime)
296 if name == 'skl':
297 rt_ = None
298 rt_fct_ = lambda X: model.decision_function(X)
299 rt_fct_track_ = lambda X: model.predict(X)
300 else:
301 rt_ = self._create_onnx_inference(onx, name)
302 self._check_rt(rt_, 'run')
303 rt_fct_ = lambda X: rt_.run({'X': X})
304 rt_fct_track_ = lambda X: rt_fct_(X)['output_label']
305 return onx, rt_, rt_fct_, rt_fct_track_
308class _CommonAsvSklBenchmarkClustering(_CommonAsvSklBenchmark):
309 """
310 Common class for a clustering algorithm.
311 """
312 chk_method_name = 'predict'
314 def _score_metric(self, X, y_exp, y_pred):
315 if X.shape[0] == 1:
316 return 0. # pragma: no cover
317 elif set(y_pred) == 1:
318 return 0. # pragma: no cover
319 return silhouette_score(X, y_pred)
321 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
322 self.check_method_name('predict')
323 onx_ = self._to_onnx(model, X, opset, dtype, optim)
324 onx = self._optimize_onnx(onx_)
325 name = self.runtime_name(runtime)
326 if name == 'skl':
327 rt_ = None
328 rt_fct_ = lambda X: model.predict(X.astype(numpy.float64))
329 rt_fct_track_ = lambda X: model.predict(X.astype(numpy.float64))
330 else:
331 rt_ = self._create_onnx_inference(onx, name)
332 self._check_rt(rt_, 'run')
333 rt_fct_ = lambda X: rt_.run({'X': X})
334 rt_fct_track_ = lambda X: rt_fct_(X)['label']
335 return onx, rt_, rt_fct_, rt_fct_track_
338class _CommonAsvSklBenchmarkMultiClassifier(_CommonAsvSklBenchmark):
339 """
340 Common class for a multi-classifier.
341 """
342 chk_method_name = 'predict_proba'
344 def _get_dataset(self, nf, dtype):
345 xdtype = self._get_xdtype(dtype)
346 data = load_iris()
347 X, y = data.data, data.target
348 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
349 rnd = state.randn(*X.shape) / 3
350 X += rnd
351 nbclass = len(set(y))
352 y_ = numpy.zeros((y.shape[0], nbclass), dtype=y.dtype)
353 for i, vy in enumerate(y):
354 y_[i, vy] = 1
355 y = y_
356 X = _modify_dimension(X, nf)
357 X_train, X_test, y_train, y_test = train_test_split(
358 X, y, random_state=42)
359 X = X_test.astype(xdtype)
360 y = y_test.astype(self.par_ydtype)
361 return (X_train, y_train), (X, y)
363 def _score_metric(self, X, y_exp, y_pred):
364 return accuracy_score(y_exp.ravel(), y_pred.ravel())
366 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
367 self.check_method_name('predict_proba')
368 onx_ = self._to_onnx(model, X, opset, dtype, optim)
369 onx = self._optimize_onnx(onx_)
370 name = self.runtime_name(runtime)
371 if name == 'skl':
372 rt_ = None
373 rt_fct_ = lambda X: model.predict_proba(X)
374 rt_fct_track_ = lambda X: model.predict(X)
375 else:
376 rt_ = self._create_onnx_inference(onx, name)
377 self._check_rt(rt_, 'run')
378 rt_fct_ = lambda X: rt_.run({'X': X})
379 rt_fct_track_ = lambda X: rt_fct_(X)['output_label']
380 return onx, rt_, rt_fct_, rt_fct_track_
383class _CommonAsvSklBenchmarkOutlier(_CommonAsvSklBenchmark):
384 """
385 Common class for outlier detection.
386 """
387 chk_method_name = 'predict'
389 def _score_metric(self, X, y_exp, y_pred):
390 return numpy.sum(y_pred) / y_pred.shape[0]
392 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
393 self.check_method_name('predict')
394 onx_ = self._to_onnx(model, X, opset, dtype, optim)
395 onx = self._optimize_onnx(onx_)
396 name = self.runtime_name(runtime)
397 if name == 'skl':
398 rt_ = None
399 rt_fct_ = lambda X: model.predict(X)
400 rt_fct_track_ = lambda X: model.predict(X)
401 else:
402 rt_ = self._create_onnx_inference(onx, name)
403 self._check_rt(rt_, 'run')
404 rt_fct_ = lambda X: rt_.run({'X': X})
405 rt_fct_track_ = lambda X: rt_fct_(X)['scores']
406 return onx, rt_, rt_fct_, rt_fct_track_
409class _CommonAsvSklBenchmarkRegressor(_CommonAsvSklBenchmark):
410 """
411 Common class for a regressor.
412 """
413 chk_method_name = 'predict'
415 def _score_metric(self, X, y_exp, y_pred):
416 return mean_absolute_error(y_exp, y_pred)
418 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
419 self.check_method_name('predict')
420 onx = self._to_onnx(model, X, opset, dtype, optim)
421 name = self.runtime_name(runtime)
422 if name == 'skl':
423 rt_ = None
424 rt_fct_ = lambda X: model.predict(X)
425 rt_fct_track_ = lambda X: model.predict(X)
426 else:
427 rt_ = self._create_onnx_inference(onx, name)
428 self._check_rt(rt_, 'run')
429 rt_fct_ = lambda X: rt_.run({'X': X})
430 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
431 return onx, rt_, rt_fct_, rt_fct_track_
434class _CommonAsvSklBenchmarkTrainableTransform(_CommonAsvSklBenchmark):
435 """
436 Common class for a trainable transformer.
437 """
438 chk_method_name = 'transform'
440 def _score_metric(self, X, y_exp, y_pred):
441 return numpy.sum(y_pred) / y_pred.shape[0]
443 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
444 self.check_method_name('transform')
445 onx_ = self._to_onnx(model, X, opset, dtype, optim)
446 onx = self._optimize_onnx(onx_)
447 name = self.runtime_name(runtime)
448 if name == 'skl':
449 rt_ = None
450 rt_fct_ = lambda X: model.transform(X)
451 rt_fct_track_ = lambda X: model.transform(X)
452 else:
453 rt_ = self._create_onnx_inference(onx, name)
454 self._check_rt(rt_, 'run')
455 rt_fct_ = lambda X: rt_.run({'X': X})
456 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
457 return onx, rt_, rt_fct_, rt_fct_track_
460class _CommonAsvSklBenchmarkTransform(_CommonAsvSklBenchmark):
461 """
462 Common class for a transformer.
463 """
464 chk_method_name = 'transform'
466 def _score_metric(self, X, y_exp, y_pred):
467 return numpy.sum(y_pred) / y_pred.shape[0]
469 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
470 self.check_method_name('transform')
471 onx_ = self._to_onnx(model, X, opset, dtype, optim)
472 onx = self._optimize_onnx(onx_)
473 name = self.runtime_name(runtime)
474 if name == 'skl':
475 rt_ = None
476 rt_fct_ = lambda X: model.transform(X)
477 rt_fct_track_ = lambda X: model.transform(X)
478 else:
479 rt_ = self._create_onnx_inference(onx, name)
480 self._check_rt(rt_, 'run')
481 rt_fct_ = lambda X: rt_.run({'X': X})
482 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
483 return onx, rt_, rt_fct_, rt_fct_track_
486class _CommonAsvSklBenchmarkTransformPositive(_CommonAsvSklBenchmarkTransform):
487 """
488 Common class for a transformer for positive features.
489 """
490 chk_method_name = 'transform'
492 def _get_dataset(self, nf, dtype):
493 xdtype = self._get_xdtype(dtype)
494 data = load_iris()
495 X, y = data.data, data.target
496 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
497 rnd = state.randn(*X.shape) / 3
498 X += rnd
499 X = _modify_dimension(X, nf)
500 X = numpy.abs(X)
501 X_train, X_test, y_train, y_test = train_test_split(
502 X, y, random_state=42)
503 X = X_test.astype(xdtype)
504 y = y_test.astype(self.par_ydtype)
505 return (X_train, y_train), (X, y)