Coverage for mlprodict/onnxrt/ops_cpu/op_tree_ensemble_regressor.py: 98%
83 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1# -*- encoding: utf-8 -*-
2# pylint: disable=E0203,E1101,C0111
3"""
4@file
5@brief Runtime operator.
6"""
7from collections import OrderedDict
8import numpy
9from onnx.defs import onnx_opset_version
10from ._op_helper import _get_typed_class_attribute
11from ._op import OpRunUnaryNum, RuntimeTypeError
12from ._new_ops import OperatorSchema
13from .op_tree_ensemble_regressor_ import ( # pylint: disable=E0611,E0401
14 RuntimeTreeEnsembleRegressorFloat, RuntimeTreeEnsembleRegressorDouble)
15from .op_tree_ensemble_regressor_p_ import ( # pylint: disable=E0611,E0401
16 RuntimeTreeEnsembleRegressorPFloat, RuntimeTreeEnsembleRegressorPDouble)
19class TreeEnsembleRegressorCommon(OpRunUnaryNum):
21 def __init__(self, dtype, onnx_node, desc=None,
22 expected_attributes=None, runtime_version=3, **options):
23 OpRunUnaryNum.__init__(
24 self, onnx_node, desc=desc,
25 expected_attributes=expected_attributes, **options)
26 self.parallel = (60, 128, 20)
27 self._dtype = dtype
28 self._runtime_version = runtime_version
29 self._init(dtype=dtype, version=runtime_version)
31 def change_parallel(self, trees, trees_rows, rows):
32 self.parallel = (trees, trees_rows, rows)
33 self._init(dtype=self._dtype, version=self._runtime_version)
35 def _get_typed_attributes(self, k):
36 return _get_typed_class_attribute(self, k, self.__class__.atts)
38 def _find_custom_operator_schema(self, op_name):
39 """
40 Finds a custom operator defined by this runtime.
41 """
42 if op_name == "TreeEnsembleRegressorDouble":
43 return TreeEnsembleRegressorDoubleSchema()
44 raise RuntimeError( # pragma: no cover
45 f"Unable to find a schema for operator '{op_name}'.")
47 def _init(self, dtype, version):
48 atts = []
49 for k in self.__class__.atts:
50 v = self._get_typed_attributes(k)
51 if k.endswith('_as_tensor'):
52 if (v is not None and isinstance(v, numpy.ndarray) and
53 v.size > 0):
54 # replacements
55 atts[-1] = v
56 if dtype is None:
57 dtype = v.dtype
58 continue
59 atts.append(v)
61 if dtype is None:
62 dtype = numpy.float32
64 if dtype == numpy.float32:
65 if version == 0:
66 self.rt_ = RuntimeTreeEnsembleRegressorFloat()
67 elif version == 1:
68 self.rt_ = RuntimeTreeEnsembleRegressorPFloat(
69 self.parallel[0], self.parallel[1], self.parallel[2], False, False)
70 elif version == 2:
71 self.rt_ = RuntimeTreeEnsembleRegressorPFloat(
72 self.parallel[0], self.parallel[1], self.parallel[2], True, False)
73 elif version == 3:
74 self.rt_ = RuntimeTreeEnsembleRegressorPFloat(
75 self.parallel[0], self.parallel[1], self.parallel[2], True, True)
76 else:
77 raise ValueError(f"Unknown version '{version}'.")
78 elif dtype == numpy.float64:
79 if version == 0:
80 self.rt_ = RuntimeTreeEnsembleRegressorDouble()
81 elif version == 1:
82 self.rt_ = RuntimeTreeEnsembleRegressorPDouble(
83 self.parallel[0], self.parallel[1], self.parallel[2], False, False)
84 elif version == 2:
85 self.rt_ = RuntimeTreeEnsembleRegressorPDouble(
86 self.parallel[0], self.parallel[1], self.parallel[2], True, False)
87 elif version == 3:
88 self.rt_ = RuntimeTreeEnsembleRegressorPDouble(
89 self.parallel[0], self.parallel[1], self.parallel[2], True, True)
90 else:
91 raise ValueError(f"Unknown version '{version}'.")
92 else:
93 raise RuntimeTypeError( # pragma: no cover
94 f"Unsupported dtype={dtype}.")
95 self.rt_.init(*atts)
97 def _run(self, x, attributes=None, verbose=0, fLOG=None): # pylint: disable=W0221
98 """
99 This is a C++ implementation coming from
100 :epkg:`onnxruntime`.
101 `tree_ensemble_classifier.cc
102 <https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/cpu/ml/tree_ensemble_classifier.cc>`_.
103 See class :class:`RuntimeTreeEnsembleRegressorFloat
104 <mlprodict.onnxrt.ops_cpu.op_tree_ensemble_regressor_.RuntimeTreeEnsembleRegressorFloat>` or
105 class :class:`RuntimeTreeEnsembleRegressorDouble
106 <mlprodict.onnxrt.ops_cpu.op_tree_ensemble_regressor_.RuntimeTreeEnsembleRegressorDouble>`.
107 """
108 if hasattr(x, 'todense'):
109 x = x.todense()
110 pred = self.rt_.compute(x)
111 if pred.shape[0] != x.shape[0]:
112 pred = pred.reshape(x.shape[0], pred.shape[0] // x.shape[0])
113 return (pred, )
116class TreeEnsembleRegressor_1(TreeEnsembleRegressorCommon):
118 atts = OrderedDict([
119 ('aggregate_function', b'SUM'),
120 ('base_values', numpy.empty(0, dtype=numpy.float32)),
121 ('base_values_as_tensor', []),
122 ('n_targets', 1),
123 ('nodes_falsenodeids', numpy.empty(0, dtype=numpy.int64)),
124 ('nodes_featureids', numpy.empty(0, dtype=numpy.int64)),
125 ('nodes_hitrates', numpy.empty(0, dtype=numpy.float32)),
126 ('nodes_missing_value_tracks_true', numpy.empty(0, dtype=numpy.int64)),
127 ('nodes_modes', []),
128 ('nodes_nodeids', numpy.empty(0, dtype=numpy.int64)),
129 ('nodes_treeids', numpy.empty(0, dtype=numpy.int64)),
130 ('nodes_truenodeids', numpy.empty(0, dtype=numpy.int64)),
131 ('nodes_values', numpy.empty(0, dtype=numpy.float32)),
132 ('post_transform', b'NONE'),
133 ('target_ids', numpy.empty(0, dtype=numpy.int64)),
134 ('target_nodeids', numpy.empty(0, dtype=numpy.int64)),
135 ('target_treeids', numpy.empty(0, dtype=numpy.int64)),
136 ('target_weights', numpy.empty(0, dtype=numpy.float32)),
137 ])
139 def __init__(self, onnx_node, desc=None, runtime_version=1, **options):
140 TreeEnsembleRegressorCommon.__init__(
141 self, numpy.float32, onnx_node, desc=desc,
142 expected_attributes=TreeEnsembleRegressor_1.atts,
143 runtime_version=runtime_version, **options)
146class TreeEnsembleRegressor_3(TreeEnsembleRegressorCommon):
148 atts = OrderedDict([
149 ('aggregate_function', b'SUM'),
150 ('base_values', numpy.empty(0, dtype=numpy.float32)),
151 ('base_values_as_tensor', []),
152 ('n_targets', 1),
153 ('nodes_falsenodeids', numpy.empty(0, dtype=numpy.int64)),
154 ('nodes_featureids', numpy.empty(0, dtype=numpy.int64)),
155 ('nodes_hitrates', numpy.empty(0, dtype=numpy.float32)),
156 ('nodes_hitrates_as_tensor', []),
157 ('nodes_missing_value_tracks_true', numpy.empty(0, dtype=numpy.int64)),
158 ('nodes_modes', []),
159 ('nodes_nodeids', numpy.empty(0, dtype=numpy.int64)),
160 ('nodes_treeids', numpy.empty(0, dtype=numpy.int64)),
161 ('nodes_truenodeids', numpy.empty(0, dtype=numpy.int64)),
162 ('nodes_values', numpy.empty(0, dtype=numpy.float32)),
163 ('nodes_values_as_tensor', []),
164 ('post_transform', b'NONE'),
165 ('target_ids', numpy.empty(0, dtype=numpy.int64)),
166 ('target_nodeids', numpy.empty(0, dtype=numpy.int64)),
167 ('target_treeids', numpy.empty(0, dtype=numpy.int64)),
168 ('target_weights', numpy.empty(0, dtype=numpy.float32)),
169 ('target_weights_as_tensor', []),
170 ])
172 def __init__(self, onnx_node, desc=None, runtime_version=1, **options):
173 TreeEnsembleRegressorCommon.__init__(
174 self, None, onnx_node, desc=desc,
175 expected_attributes=TreeEnsembleRegressor_3.atts,
176 runtime_version=runtime_version, **options)
179class TreeEnsembleRegressorDouble(TreeEnsembleRegressorCommon):
180 """
181 Runtime for the custom operator `TreeEnsembleRegressorDouble`.
182 .. exref::
183 :title: How to use TreeEnsembleRegressorDouble instead of TreeEnsembleRegressor
184 .. runpython::
185 :showcode:
186 import warnings
187 import numpy
188 from sklearn.datasets import make_regression
189 from sklearn.ensemble import (
190 RandomForestRegressor, GradientBoostingRegressor,
191 HistGradientBoostingRegressor)
192 from mlprodict.onnx_conv import to_onnx
193 from mlprodict.onnxrt import OnnxInference
194 with warnings.catch_warnings():
195 warnings.simplefilter("ignore")
196 models = [
197 RandomForestRegressor(n_estimators=10),
198 GradientBoostingRegressor(n_estimators=10),
199 HistGradientBoostingRegressor(max_iter=10),
200 ]
201 X, y = make_regression(1000, n_features=5, n_targets=1)
202 X = X.astype(numpy.float64)
203 conv = {}
204 for model in models:
205 model.fit(X[:500], y[:500])
206 onx64 = to_onnx(model, X, rewrite_ops=True, target_opset=15)
207 assert 'TreeEnsembleRegressorDouble' in str(onx64)
208 expected = model.predict(X)
209 oinf = OnnxInference(onx64)
210 got = oinf.run({'X': X})
211 diff = numpy.abs(got['variable'] - expected)
212 print("%s: max=%f mean=%f" % (
213 model.__class__.__name__, diff.max(), diff.mean()))
214 """
216 atts = OrderedDict([
217 ('aggregate_function', b'SUM'),
218 ('base_values', numpy.empty(0, dtype=numpy.float64)),
219 ('n_targets', 1),
220 ('nodes_falsenodeids', numpy.empty(0, dtype=numpy.int64)),
221 ('nodes_featureids', numpy.empty(0, dtype=numpy.int64)),
222 ('nodes_hitrates', numpy.empty(0, dtype=numpy.float64)),
223 ('nodes_missing_value_tracks_true', numpy.empty(0, dtype=numpy.int64)),
224 ('nodes_modes', []),
225 ('nodes_nodeids', numpy.empty(0, dtype=numpy.int64)),
226 ('nodes_treeids', numpy.empty(0, dtype=numpy.int64)),
227 ('nodes_truenodeids', numpy.empty(0, dtype=numpy.int64)),
228 ('nodes_values', numpy.empty(0, dtype=numpy.float64)),
229 ('post_transform', b'NONE'),
230 ('target_ids', numpy.empty(0, dtype=numpy.int64)),
231 ('target_nodeids', numpy.empty(0, dtype=numpy.int64)),
232 ('target_treeids', numpy.empty(0, dtype=numpy.int64)),
233 ('target_weights', numpy.empty(0, dtype=numpy.float64)),
234 ])
236 def __init__(self, onnx_node, desc=None, runtime_version=1, **options):
237 TreeEnsembleRegressorCommon.__init__(
238 self, numpy.float64, onnx_node, desc=desc,
239 expected_attributes=TreeEnsembleRegressorDouble.atts,
240 runtime_version=runtime_version, **options)
243class TreeEnsembleRegressorDoubleSchema(OperatorSchema):
244 """
245 Defines a schema for operators added in this package
246 such as @see cl TreeEnsembleRegressorDouble.
247 """
249 def __init__(self):
250 OperatorSchema.__init__(self, 'TreeEnsembleRegressorDouble')
251 self.attributes = TreeEnsembleRegressorDouble.atts
254if onnx_opset_version() >= 16:
255 TreeEnsembleRegressor = TreeEnsembleRegressor_3
256else:
257 TreeEnsembleRegressor = TreeEnsembleRegressor_1 # pragma: no cover