Coverage for mlprodict/onnxrt/ops_cpu/op_tree_ensemble_regressor.py: 98%

83 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1# -*- encoding: utf-8 -*- 

2# pylint: disable=E0203,E1101,C0111 

3""" 

4@file 

5@brief Runtime operator. 

6""" 

7from collections import OrderedDict 

8import numpy 

9from onnx.defs import onnx_opset_version 

10from ._op_helper import _get_typed_class_attribute 

11from ._op import OpRunUnaryNum, RuntimeTypeError 

12from ._new_ops import OperatorSchema 

13from .op_tree_ensemble_regressor_ import ( # pylint: disable=E0611,E0401 

14 RuntimeTreeEnsembleRegressorFloat, RuntimeTreeEnsembleRegressorDouble) 

15from .op_tree_ensemble_regressor_p_ import ( # pylint: disable=E0611,E0401 

16 RuntimeTreeEnsembleRegressorPFloat, RuntimeTreeEnsembleRegressorPDouble) 

17 

18 

19class TreeEnsembleRegressorCommon(OpRunUnaryNum): 

20 

21 def __init__(self, dtype, onnx_node, desc=None, 

22 expected_attributes=None, runtime_version=3, **options): 

23 OpRunUnaryNum.__init__( 

24 self, onnx_node, desc=desc, 

25 expected_attributes=expected_attributes, **options) 

26 self.parallel = (60, 128, 20) 

27 self._dtype = dtype 

28 self._runtime_version = runtime_version 

29 self._init(dtype=dtype, version=runtime_version) 

30 

31 def change_parallel(self, trees, trees_rows, rows): 

32 self.parallel = (trees, trees_rows, rows) 

33 self._init(dtype=self._dtype, version=self._runtime_version) 

34 

35 def _get_typed_attributes(self, k): 

36 return _get_typed_class_attribute(self, k, self.__class__.atts) 

37 

38 def _find_custom_operator_schema(self, op_name): 

39 """ 

40 Finds a custom operator defined by this runtime. 

41 """ 

42 if op_name == "TreeEnsembleRegressorDouble": 

43 return TreeEnsembleRegressorDoubleSchema() 

44 raise RuntimeError( # pragma: no cover 

45 f"Unable to find a schema for operator '{op_name}'.") 

46 

47 def _init(self, dtype, version): 

48 atts = [] 

49 for k in self.__class__.atts: 

50 v = self._get_typed_attributes(k) 

51 if k.endswith('_as_tensor'): 

52 if (v is not None and isinstance(v, numpy.ndarray) and 

53 v.size > 0): 

54 # replacements 

55 atts[-1] = v 

56 if dtype is None: 

57 dtype = v.dtype 

58 continue 

59 atts.append(v) 

60 

61 if dtype is None: 

62 dtype = numpy.float32 

63 

64 if dtype == numpy.float32: 

65 if version == 0: 

66 self.rt_ = RuntimeTreeEnsembleRegressorFloat() 

67 elif version == 1: 

68 self.rt_ = RuntimeTreeEnsembleRegressorPFloat( 

69 self.parallel[0], self.parallel[1], self.parallel[2], False, False) 

70 elif version == 2: 

71 self.rt_ = RuntimeTreeEnsembleRegressorPFloat( 

72 self.parallel[0], self.parallel[1], self.parallel[2], True, False) 

73 elif version == 3: 

74 self.rt_ = RuntimeTreeEnsembleRegressorPFloat( 

75 self.parallel[0], self.parallel[1], self.parallel[2], True, True) 

76 else: 

77 raise ValueError(f"Unknown version '{version}'.") 

78 elif dtype == numpy.float64: 

79 if version == 0: 

80 self.rt_ = RuntimeTreeEnsembleRegressorDouble() 

81 elif version == 1: 

82 self.rt_ = RuntimeTreeEnsembleRegressorPDouble( 

83 self.parallel[0], self.parallel[1], self.parallel[2], False, False) 

84 elif version == 2: 

85 self.rt_ = RuntimeTreeEnsembleRegressorPDouble( 

86 self.parallel[0], self.parallel[1], self.parallel[2], True, False) 

87 elif version == 3: 

88 self.rt_ = RuntimeTreeEnsembleRegressorPDouble( 

89 self.parallel[0], self.parallel[1], self.parallel[2], True, True) 

90 else: 

91 raise ValueError(f"Unknown version '{version}'.") 

92 else: 

93 raise RuntimeTypeError( # pragma: no cover 

94 f"Unsupported dtype={dtype}.") 

95 self.rt_.init(*atts) 

96 

97 def _run(self, x, attributes=None, verbose=0, fLOG=None): # pylint: disable=W0221 

98 """ 

99 This is a C++ implementation coming from 

100 :epkg:`onnxruntime`. 

101 `tree_ensemble_classifier.cc 

102 <https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/cpu/ml/tree_ensemble_classifier.cc>`_. 

103 See class :class:`RuntimeTreeEnsembleRegressorFloat 

104 <mlprodict.onnxrt.ops_cpu.op_tree_ensemble_regressor_.RuntimeTreeEnsembleRegressorFloat>` or 

105 class :class:`RuntimeTreeEnsembleRegressorDouble 

106 <mlprodict.onnxrt.ops_cpu.op_tree_ensemble_regressor_.RuntimeTreeEnsembleRegressorDouble>`. 

107 """ 

108 if hasattr(x, 'todense'): 

109 x = x.todense() 

110 pred = self.rt_.compute(x) 

111 if pred.shape[0] != x.shape[0]: 

112 pred = pred.reshape(x.shape[0], pred.shape[0] // x.shape[0]) 

113 return (pred, ) 

114 

115 

116class TreeEnsembleRegressor_1(TreeEnsembleRegressorCommon): 

117 

118 atts = OrderedDict([ 

119 ('aggregate_function', b'SUM'), 

120 ('base_values', numpy.empty(0, dtype=numpy.float32)), 

121 ('base_values_as_tensor', []), 

122 ('n_targets', 1), 

123 ('nodes_falsenodeids', numpy.empty(0, dtype=numpy.int64)), 

124 ('nodes_featureids', numpy.empty(0, dtype=numpy.int64)), 

125 ('nodes_hitrates', numpy.empty(0, dtype=numpy.float32)), 

126 ('nodes_missing_value_tracks_true', numpy.empty(0, dtype=numpy.int64)), 

127 ('nodes_modes', []), 

128 ('nodes_nodeids', numpy.empty(0, dtype=numpy.int64)), 

129 ('nodes_treeids', numpy.empty(0, dtype=numpy.int64)), 

130 ('nodes_truenodeids', numpy.empty(0, dtype=numpy.int64)), 

131 ('nodes_values', numpy.empty(0, dtype=numpy.float32)), 

132 ('post_transform', b'NONE'), 

133 ('target_ids', numpy.empty(0, dtype=numpy.int64)), 

134 ('target_nodeids', numpy.empty(0, dtype=numpy.int64)), 

135 ('target_treeids', numpy.empty(0, dtype=numpy.int64)), 

136 ('target_weights', numpy.empty(0, dtype=numpy.float32)), 

137 ]) 

138 

139 def __init__(self, onnx_node, desc=None, runtime_version=1, **options): 

140 TreeEnsembleRegressorCommon.__init__( 

141 self, numpy.float32, onnx_node, desc=desc, 

142 expected_attributes=TreeEnsembleRegressor_1.atts, 

143 runtime_version=runtime_version, **options) 

144 

145 

146class TreeEnsembleRegressor_3(TreeEnsembleRegressorCommon): 

147 

148 atts = OrderedDict([ 

149 ('aggregate_function', b'SUM'), 

150 ('base_values', numpy.empty(0, dtype=numpy.float32)), 

151 ('base_values_as_tensor', []), 

152 ('n_targets', 1), 

153 ('nodes_falsenodeids', numpy.empty(0, dtype=numpy.int64)), 

154 ('nodes_featureids', numpy.empty(0, dtype=numpy.int64)), 

155 ('nodes_hitrates', numpy.empty(0, dtype=numpy.float32)), 

156 ('nodes_hitrates_as_tensor', []), 

157 ('nodes_missing_value_tracks_true', numpy.empty(0, dtype=numpy.int64)), 

158 ('nodes_modes', []), 

159 ('nodes_nodeids', numpy.empty(0, dtype=numpy.int64)), 

160 ('nodes_treeids', numpy.empty(0, dtype=numpy.int64)), 

161 ('nodes_truenodeids', numpy.empty(0, dtype=numpy.int64)), 

162 ('nodes_values', numpy.empty(0, dtype=numpy.float32)), 

163 ('nodes_values_as_tensor', []), 

164 ('post_transform', b'NONE'), 

165 ('target_ids', numpy.empty(0, dtype=numpy.int64)), 

166 ('target_nodeids', numpy.empty(0, dtype=numpy.int64)), 

167 ('target_treeids', numpy.empty(0, dtype=numpy.int64)), 

168 ('target_weights', numpy.empty(0, dtype=numpy.float32)), 

169 ('target_weights_as_tensor', []), 

170 ]) 

171 

172 def __init__(self, onnx_node, desc=None, runtime_version=1, **options): 

173 TreeEnsembleRegressorCommon.__init__( 

174 self, None, onnx_node, desc=desc, 

175 expected_attributes=TreeEnsembleRegressor_3.atts, 

176 runtime_version=runtime_version, **options) 

177 

178 

179class TreeEnsembleRegressorDouble(TreeEnsembleRegressorCommon): 

180 """ 

181 Runtime for the custom operator `TreeEnsembleRegressorDouble`. 

182 .. exref:: 

183 :title: How to use TreeEnsembleRegressorDouble instead of TreeEnsembleRegressor 

184 .. runpython:: 

185 :showcode: 

186 import warnings 

187 import numpy 

188 from sklearn.datasets import make_regression 

189 from sklearn.ensemble import ( 

190 RandomForestRegressor, GradientBoostingRegressor, 

191 HistGradientBoostingRegressor) 

192 from mlprodict.onnx_conv import to_onnx 

193 from mlprodict.onnxrt import OnnxInference 

194 with warnings.catch_warnings(): 

195 warnings.simplefilter("ignore") 

196 models = [ 

197 RandomForestRegressor(n_estimators=10), 

198 GradientBoostingRegressor(n_estimators=10), 

199 HistGradientBoostingRegressor(max_iter=10), 

200 ] 

201 X, y = make_regression(1000, n_features=5, n_targets=1) 

202 X = X.astype(numpy.float64) 

203 conv = {} 

204 for model in models: 

205 model.fit(X[:500], y[:500]) 

206 onx64 = to_onnx(model, X, rewrite_ops=True, target_opset=15) 

207 assert 'TreeEnsembleRegressorDouble' in str(onx64) 

208 expected = model.predict(X) 

209 oinf = OnnxInference(onx64) 

210 got = oinf.run({'X': X}) 

211 diff = numpy.abs(got['variable'] - expected) 

212 print("%s: max=%f mean=%f" % ( 

213 model.__class__.__name__, diff.max(), diff.mean())) 

214 """ 

215 

216 atts = OrderedDict([ 

217 ('aggregate_function', b'SUM'), 

218 ('base_values', numpy.empty(0, dtype=numpy.float64)), 

219 ('n_targets', 1), 

220 ('nodes_falsenodeids', numpy.empty(0, dtype=numpy.int64)), 

221 ('nodes_featureids', numpy.empty(0, dtype=numpy.int64)), 

222 ('nodes_hitrates', numpy.empty(0, dtype=numpy.float64)), 

223 ('nodes_missing_value_tracks_true', numpy.empty(0, dtype=numpy.int64)), 

224 ('nodes_modes', []), 

225 ('nodes_nodeids', numpy.empty(0, dtype=numpy.int64)), 

226 ('nodes_treeids', numpy.empty(0, dtype=numpy.int64)), 

227 ('nodes_truenodeids', numpy.empty(0, dtype=numpy.int64)), 

228 ('nodes_values', numpy.empty(0, dtype=numpy.float64)), 

229 ('post_transform', b'NONE'), 

230 ('target_ids', numpy.empty(0, dtype=numpy.int64)), 

231 ('target_nodeids', numpy.empty(0, dtype=numpy.int64)), 

232 ('target_treeids', numpy.empty(0, dtype=numpy.int64)), 

233 ('target_weights', numpy.empty(0, dtype=numpy.float64)), 

234 ]) 

235 

236 def __init__(self, onnx_node, desc=None, runtime_version=1, **options): 

237 TreeEnsembleRegressorCommon.__init__( 

238 self, numpy.float64, onnx_node, desc=desc, 

239 expected_attributes=TreeEnsembleRegressorDouble.atts, 

240 runtime_version=runtime_version, **options) 

241 

242 

243class TreeEnsembleRegressorDoubleSchema(OperatorSchema): 

244 """ 

245 Defines a schema for operators added in this package 

246 such as @see cl TreeEnsembleRegressorDouble. 

247 """ 

248 

249 def __init__(self): 

250 OperatorSchema.__init__(self, 'TreeEnsembleRegressorDouble') 

251 self.attributes = TreeEnsembleRegressorDouble.atts 

252 

253 

254if onnx_opset_version() >= 16: 

255 TreeEnsembleRegressor = TreeEnsembleRegressor_3 

256else: 

257 TreeEnsembleRegressor = TreeEnsembleRegressor_1 # pragma: no cover