Coverage for mlprodict/npy/xop_convert.py: 98%

131 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1""" 

2@file 

3@brief Easier API to build onnx graphs. Inspired from :epkg:`skl2onnx`. 

4 

5.. versionadded:: 0.9 

6""" 

7import logging 

8import numpy 

9from .xop import OnnxOperator, OnnxOperatorFunction 

10from .xop_variable import NodeResultName, Variable 

11 

12 

13logger = logging.getLogger('xop') 

14 

15 

16class OnnxSubOnnx(OnnxOperator): 

17 """ 

18 This operator is used to insert existing ONNX into 

19 the ONNX graph being built. 

20 """ 

21 

22 domain = 'mlprodict' 

23 since_version = 1 

24 expected_inputs = None 

25 expected_outputs = None 

26 input_range = [1, 1e9] 

27 output_range = [1, 1e9] 

28 op_type = 'SubOnnx' 

29 domain = 'mlprodict.xop' 

30 

31 def __init__(self, model, *inputs, output_names=None): 

32 logger.debug("SubOnnx(ONNX, %d in, output_names=%r)", 

33 len(inputs), output_names) 

34 if model is None: 

35 raise ValueError("Model cannot be None.") # pragma: no cover 

36 if len(inputs) > len(model.graph.input): 

37 raise RuntimeError( # pragma: no cover 

38 "Unexpected number of inputs %r > expected %r." % ( 

39 len(inputs), len(model.graph.input))) 

40 if (output_names is not None and 

41 len(output_names) != len(model.graph.output)): 

42 raise RuntimeError( # pragma: no cover 

43 "Unexpected number of outputs %r != expected %r." % ( 

44 len(output_names), len(model.graph.output))) 

45 if len(inputs) == 0: 

46 if hasattr(model, 'graph'): 

47 inputs = [Variable(i.name, i.type.tensor_type) 

48 for i in model.graph.input] 

49 else: 

50 inputs = [Variable(n) for n in model.input] 

51 OnnxOperator.__init__(self, *inputs, output_names=output_names) 

52 if self.output_names is None and self.expected_outputs is None: 

53 if hasattr(model, 'graph'): 

54 self.expected_outputs = [ 

55 (i.name, i.type.tensor_type) 

56 for i in model.graph.output] 

57 else: 

58 self.expected_outputs = [(n, None) for n in model.output] 

59 self.model = model 

60 

61 @property 

62 def input_names(self): 

63 "Returns the input names." 

64 return ([i.name for i in self.model.graph.input] 

65 if hasattr(self.model, 'graph') else list(self.model.input)) 

66 

67 def __repr__(self): 

68 "usual" 

69 atts = {} 

70 for att in ['output_names']: 

71 value = getattr(self, att, None) 

72 if value is not None: 

73 atts[att] = value 

74 atts.update(self.kwargs) 

75 msg = ", ".join(f"{k}={v!r}" for k, v in atts.items()) 

76 if len(atts) > 0: 

77 msg = ", " + msg 

78 return f"{self.__class__.__name__}(...{msg})" 

79 

80 def add_to(self, builder): 

81 """ 

82 Adds to graph builder. 

83 

84 :param builder: instance of @see cl _GraphBuilder, 

85 it must have a method `add_node` 

86 """ 

87 logger.debug("SubOnnx.add_to(builder)") 

88 inputs = builder.get_input_names(self, self.inputs) 

89 n_outputs = len(self.model.graph.output) 

90 outputs = [builder.get_unique_output_name(NodeResultName(self, i)) 

91 for i in range(n_outputs)] 

92 

93 mapped_names = {} 

94 

95 # adding initializers 

96 for init in self.model.graph.initializer: 

97 new_name = builder.get_unique_name(init.name, reserved=False) 

98 mapped_names[init.name] = new_name 

99 builder.add_initializer(new_name, init) 

100 

101 # linking inputs 

102 for inp, name in zip(self.model.graph.input, inputs): 

103 new_name = builder.get_unique_name(inp.name, reserved=False) 

104 mapped_names[inp.name] = new_name 

105 builder.add_node( 

106 'Identity', builder.get_unique_name( 

107 '_sub_' + name, reserved=False), 

108 [name], [new_name]) 

109 

110 # adding nodes 

111 for node in list(self.model.graph.node): 

112 new_inputs = [] 

113 for i in node.input: 

114 if i not in mapped_names: 

115 raise RuntimeError( # pragma: no cover 

116 f"Unable to find input {i!r} in {mapped_names!r}.") 

117 new_inputs.append(mapped_names[i]) 

118 new_outputs = [] 

119 for o in node.output: 

120 new_name = builder.get_unique_name(o, reserved=False) 

121 mapped_names[o] = new_name 

122 new_outputs.append(new_name) 

123 

124 atts = {} 

125 for att in node.attribute: 

126 atts[att.name] = OnnxOperatorFunction.attribute_to_value(att) 

127 

128 builder.add_node( 

129 node.op_type, 

130 builder.get_unique_name('_sub_' + node.name, reserved=False), 

131 new_inputs, new_outputs, domain=node.domain, **atts) 

132 

133 # linking outputs 

134 for out, name in zip(self.model.graph.output, outputs): 

135 builder.add_node( 

136 'Identity', builder.get_unique_name( 

137 '_sub_' + out.name, reserved=False), 

138 [mapped_names[out.name]], [name]) 

139 

140 def to_onnx_this(self, evaluated_inputs): 

141 """ 

142 Returns the ONNX graph. 

143 

144 :param evaluated_inputs: unused 

145 :return: ONNX graph 

146 """ 

147 return self.model 

148 

149 

150class OnnxSubEstimator(OnnxSubOnnx): 

151 """ 

152 This operator is used to call the converter of a model 

153 to insert the node coming from the conversion into a 

154 bigger ONNX graph. It supports model from :epkg:`scikit-learn` 

155 using :epkg:`sklearn-onnx`. 

156 

157 :param model: model to convert 

158 :param inputs: inputs 

159 :param op_version: targetted opset 

160 :param options: to rewrite the options used to convert the model 

161 :param initial_types: the implementation may be wrong in guessing 

162 the input types of the model, this parameter can be used 

163 to overwrite them, usually a dictionary 

164 `{ input_name: numpy array as an example }` 

165 :param kwargs: any other parameters such as black listed or 

166 white listed operators 

167 """ 

168 

169 since_version = 1 

170 expected_inputs = None 

171 expected_outputs = None 

172 input_range = [1, 1e9] 

173 output_range = [1, 1e9] 

174 op_type = "SubEstimator" 

175 domain = 'mlprodict.xop' 

176 

177 def __init__(self, model, *inputs, op_version=None, 

178 output_names=None, options=None, 

179 initial_types=None, **kwargs): 

180 logger.debug("OnnxSubEstimator(%r, %r, op_version=%r, " 

181 "output_names=%r, initial_types=%r, options=%r, " 

182 "kwargs=%r)", type(model), inputs, op_version, 

183 output_names, initial_types, options, kwargs) 

184 if model is None: 

185 raise ValueError("Model cannot be None.") # pragma: no cover 

186 onx = OnnxSubEstimator._to_onnx( 

187 model, inputs, op_version=op_version, options=options, 

188 initial_types=initial_types, **kwargs) 

189 OnnxSubOnnx.__init__( 

190 self, onx, *inputs, output_names=output_names) 

191 self.ml_model = model 

192 self.options = options 

193 self.initial_types = initial_types 

194 self.op_version = op_version 

195 

196 def __repr__(self): 

197 "usual" 

198 atts = {} 

199 for att in ['op_version', 'output_names', 'options', 

200 'initial_types']: 

201 value = getattr(self, att, None) 

202 if value is not None: 

203 atts[att] = value 

204 atts.update(self.kwargs) 

205 msg = ", ".join(f"{k}={v!r}" for k, v in atts.items()) 

206 if len(atts) > 0: 

207 msg = ", " + msg 

208 return f"{self.__class__.__name__}({self.ml_model!r}{msg})" 

209 

210 @staticmethod 

211 def _to_onnx(model, inputs, op_version=None, options=None, 

212 initial_types=None, **kwargs): 

213 """ 

214 Converts a model into ONNX and inserts it into an ONNX graph. 

215 

216 :param model: a trained machine learned model 

217 :param inputs: inputs 

218 :param op_version: opset versions or None to use the latest one 

219 :param options: options to change the behaviour of the converter 

220 :param kwargs: additional parameters such as black listed or while listed 

221 operators 

222 :return: ONNX model 

223 

224 The method currently supports models trained with 

225 :epkg:`scikit-learn`, :epkg:`xgboost`, :epkg`:lightgbm`. 

226 """ 

227 from sklearn.base import BaseEstimator 

228 

229 if isinstance(model, BaseEstimator): 

230 logger.debug("OnnxSubEstimator._to_onnx(%r, %r, op_version=%r " 

231 "options=%r, initial_types=%r, kwargs=%r)", 

232 type(model), inputs, op_version, options, 

233 initial_types, kwargs) 

234 return OnnxSubEstimator._to_onnx_sklearn( 

235 model, inputs, op_version=op_version, options=options, 

236 initial_types=initial_types, **kwargs) 

237 raise RuntimeError( # pragma: no cover 

238 f"Unable to convert into ONNX model type {type(model)!r}.") 

239 

240 @staticmethod 

241 def _to_onnx_sklearn(model, inputs, op_version=None, options=None, 

242 initial_types=None, **kwargs): 

243 """ 

244 Converts a :epkg:`scikit-learn` model into ONNX 

245 and inserts it into an ONNX graph. The library relies on 

246 function @see fn to_onnx and library :epkg:`skearn-onnx`. 

247 

248 :param model: a trained machine learned model 

249 :param inputs: inputs 

250 :param op_version: opset versions or None to use the latest one 

251 :param initial_types: if None, the input types are guessed from the 

252 inputs. The function converts into ONNX the previous 

253 node of the graph and tries to infer the initial_types 

254 with the little informations it has. It may not work. 

255 It is recommended to specify this parameter. 

256 :param options: options to change the behaviour of the converter 

257 :param kwargs: additional parameters such as black listed or while listed 

258 operators 

259 :return: ONNX model 

260 

261 Default options is `{'zipmap': False}` for a classifier. 

262 """ 

263 from ..onnx_conv.convert import to_onnx 

264 if options is None: 

265 from sklearn.base import ClassifierMixin 

266 if isinstance(model, ClassifierMixin): 

267 options = {'zipmap': False} 

268 if initial_types is None: 

269 # adding more information 

270 from skl2onnx.common.data_types import _guess_numpy_type # delayed 

271 for i, n in enumerate(inputs): 

272 if not isinstance(n, Variable): 

273 raise NotImplementedError( 

274 "Inpput %d is not a variable but %r." % (i, type(n))) 

275 initial_types = [(n.name, _guess_numpy_type(n.dtype, n.shape)) 

276 for n in inputs] 

277 

278 logger.debug("OnnxSubEstimator._to_onnx_sklearn(%r, %r, " 

279 "op_version=%r, options=%r, initial_types=%r, " 

280 "kwargs=%r)", 

281 type(model), inputs, op_version, options, 

282 initial_types, kwargs) 

283 

284 if isinstance(initial_types, numpy.ndarray): 

285 if len(inputs) != 1: 

286 raise RuntimeError( # pragma: no cover 

287 "The model has %s inputs but only %d input are " 

288 "described in 'initial_types'." % ( 

289 len(inputs), 1)) 

290 X = initial_types 

291 initial_types = None 

292 elif len(inputs) != len(initial_types): 

293 raise RuntimeError( # pragma: no cover 

294 "The model has %s inputs but only %d input are " 

295 "described in 'initial_types'." % ( 

296 len(inputs), len(initial_types))) 

297 else: 

298 X = None 

299 

300 onx = to_onnx(model, X, initial_types=initial_types, options=options, 

301 rewrite_ops=True, target_opset=op_version, **kwargs) 

302 return onx