Coverage for mlprodict/npy/xop_convert.py: 98%
131 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1"""
2@file
3@brief Easier API to build onnx graphs. Inspired from :epkg:`skl2onnx`.
5.. versionadded:: 0.9
6"""
7import logging
8import numpy
9from .xop import OnnxOperator, OnnxOperatorFunction
10from .xop_variable import NodeResultName, Variable
13logger = logging.getLogger('xop')
16class OnnxSubOnnx(OnnxOperator):
17 """
18 This operator is used to insert existing ONNX into
19 the ONNX graph being built.
20 """
22 domain = 'mlprodict'
23 since_version = 1
24 expected_inputs = None
25 expected_outputs = None
26 input_range = [1, 1e9]
27 output_range = [1, 1e9]
28 op_type = 'SubOnnx'
29 domain = 'mlprodict.xop'
31 def __init__(self, model, *inputs, output_names=None):
32 logger.debug("SubOnnx(ONNX, %d in, output_names=%r)",
33 len(inputs), output_names)
34 if model is None:
35 raise ValueError("Model cannot be None.") # pragma: no cover
36 if len(inputs) > len(model.graph.input):
37 raise RuntimeError( # pragma: no cover
38 "Unexpected number of inputs %r > expected %r." % (
39 len(inputs), len(model.graph.input)))
40 if (output_names is not None and
41 len(output_names) != len(model.graph.output)):
42 raise RuntimeError( # pragma: no cover
43 "Unexpected number of outputs %r != expected %r." % (
44 len(output_names), len(model.graph.output)))
45 if len(inputs) == 0:
46 if hasattr(model, 'graph'):
47 inputs = [Variable(i.name, i.type.tensor_type)
48 for i in model.graph.input]
49 else:
50 inputs = [Variable(n) for n in model.input]
51 OnnxOperator.__init__(self, *inputs, output_names=output_names)
52 if self.output_names is None and self.expected_outputs is None:
53 if hasattr(model, 'graph'):
54 self.expected_outputs = [
55 (i.name, i.type.tensor_type)
56 for i in model.graph.output]
57 else:
58 self.expected_outputs = [(n, None) for n in model.output]
59 self.model = model
61 @property
62 def input_names(self):
63 "Returns the input names."
64 return ([i.name for i in self.model.graph.input]
65 if hasattr(self.model, 'graph') else list(self.model.input))
67 def __repr__(self):
68 "usual"
69 atts = {}
70 for att in ['output_names']:
71 value = getattr(self, att, None)
72 if value is not None:
73 atts[att] = value
74 atts.update(self.kwargs)
75 msg = ", ".join(f"{k}={v!r}" for k, v in atts.items())
76 if len(atts) > 0:
77 msg = ", " + msg
78 return f"{self.__class__.__name__}(...{msg})"
80 def add_to(self, builder):
81 """
82 Adds to graph builder.
84 :param builder: instance of @see cl _GraphBuilder,
85 it must have a method `add_node`
86 """
87 logger.debug("SubOnnx.add_to(builder)")
88 inputs = builder.get_input_names(self, self.inputs)
89 n_outputs = len(self.model.graph.output)
90 outputs = [builder.get_unique_output_name(NodeResultName(self, i))
91 for i in range(n_outputs)]
93 mapped_names = {}
95 # adding initializers
96 for init in self.model.graph.initializer:
97 new_name = builder.get_unique_name(init.name, reserved=False)
98 mapped_names[init.name] = new_name
99 builder.add_initializer(new_name, init)
101 # linking inputs
102 for inp, name in zip(self.model.graph.input, inputs):
103 new_name = builder.get_unique_name(inp.name, reserved=False)
104 mapped_names[inp.name] = new_name
105 builder.add_node(
106 'Identity', builder.get_unique_name(
107 '_sub_' + name, reserved=False),
108 [name], [new_name])
110 # adding nodes
111 for node in list(self.model.graph.node):
112 new_inputs = []
113 for i in node.input:
114 if i not in mapped_names:
115 raise RuntimeError( # pragma: no cover
116 f"Unable to find input {i!r} in {mapped_names!r}.")
117 new_inputs.append(mapped_names[i])
118 new_outputs = []
119 for o in node.output:
120 new_name = builder.get_unique_name(o, reserved=False)
121 mapped_names[o] = new_name
122 new_outputs.append(new_name)
124 atts = {}
125 for att in node.attribute:
126 atts[att.name] = OnnxOperatorFunction.attribute_to_value(att)
128 builder.add_node(
129 node.op_type,
130 builder.get_unique_name('_sub_' + node.name, reserved=False),
131 new_inputs, new_outputs, domain=node.domain, **atts)
133 # linking outputs
134 for out, name in zip(self.model.graph.output, outputs):
135 builder.add_node(
136 'Identity', builder.get_unique_name(
137 '_sub_' + out.name, reserved=False),
138 [mapped_names[out.name]], [name])
140 def to_onnx_this(self, evaluated_inputs):
141 """
142 Returns the ONNX graph.
144 :param evaluated_inputs: unused
145 :return: ONNX graph
146 """
147 return self.model
150class OnnxSubEstimator(OnnxSubOnnx):
151 """
152 This operator is used to call the converter of a model
153 to insert the node coming from the conversion into a
154 bigger ONNX graph. It supports model from :epkg:`scikit-learn`
155 using :epkg:`sklearn-onnx`.
157 :param model: model to convert
158 :param inputs: inputs
159 :param op_version: targetted opset
160 :param options: to rewrite the options used to convert the model
161 :param initial_types: the implementation may be wrong in guessing
162 the input types of the model, this parameter can be used
163 to overwrite them, usually a dictionary
164 `{ input_name: numpy array as an example }`
165 :param kwargs: any other parameters such as black listed or
166 white listed operators
167 """
169 since_version = 1
170 expected_inputs = None
171 expected_outputs = None
172 input_range = [1, 1e9]
173 output_range = [1, 1e9]
174 op_type = "SubEstimator"
175 domain = 'mlprodict.xop'
177 def __init__(self, model, *inputs, op_version=None,
178 output_names=None, options=None,
179 initial_types=None, **kwargs):
180 logger.debug("OnnxSubEstimator(%r, %r, op_version=%r, "
181 "output_names=%r, initial_types=%r, options=%r, "
182 "kwargs=%r)", type(model), inputs, op_version,
183 output_names, initial_types, options, kwargs)
184 if model is None:
185 raise ValueError("Model cannot be None.") # pragma: no cover
186 onx = OnnxSubEstimator._to_onnx(
187 model, inputs, op_version=op_version, options=options,
188 initial_types=initial_types, **kwargs)
189 OnnxSubOnnx.__init__(
190 self, onx, *inputs, output_names=output_names)
191 self.ml_model = model
192 self.options = options
193 self.initial_types = initial_types
194 self.op_version = op_version
196 def __repr__(self):
197 "usual"
198 atts = {}
199 for att in ['op_version', 'output_names', 'options',
200 'initial_types']:
201 value = getattr(self, att, None)
202 if value is not None:
203 atts[att] = value
204 atts.update(self.kwargs)
205 msg = ", ".join(f"{k}={v!r}" for k, v in atts.items())
206 if len(atts) > 0:
207 msg = ", " + msg
208 return f"{self.__class__.__name__}({self.ml_model!r}{msg})"
210 @staticmethod
211 def _to_onnx(model, inputs, op_version=None, options=None,
212 initial_types=None, **kwargs):
213 """
214 Converts a model into ONNX and inserts it into an ONNX graph.
216 :param model: a trained machine learned model
217 :param inputs: inputs
218 :param op_version: opset versions or None to use the latest one
219 :param options: options to change the behaviour of the converter
220 :param kwargs: additional parameters such as black listed or while listed
221 operators
222 :return: ONNX model
224 The method currently supports models trained with
225 :epkg:`scikit-learn`, :epkg:`xgboost`, :epkg`:lightgbm`.
226 """
227 from sklearn.base import BaseEstimator
229 if isinstance(model, BaseEstimator):
230 logger.debug("OnnxSubEstimator._to_onnx(%r, %r, op_version=%r "
231 "options=%r, initial_types=%r, kwargs=%r)",
232 type(model), inputs, op_version, options,
233 initial_types, kwargs)
234 return OnnxSubEstimator._to_onnx_sklearn(
235 model, inputs, op_version=op_version, options=options,
236 initial_types=initial_types, **kwargs)
237 raise RuntimeError( # pragma: no cover
238 f"Unable to convert into ONNX model type {type(model)!r}.")
240 @staticmethod
241 def _to_onnx_sklearn(model, inputs, op_version=None, options=None,
242 initial_types=None, **kwargs):
243 """
244 Converts a :epkg:`scikit-learn` model into ONNX
245 and inserts it into an ONNX graph. The library relies on
246 function @see fn to_onnx and library :epkg:`skearn-onnx`.
248 :param model: a trained machine learned model
249 :param inputs: inputs
250 :param op_version: opset versions or None to use the latest one
251 :param initial_types: if None, the input types are guessed from the
252 inputs. The function converts into ONNX the previous
253 node of the graph and tries to infer the initial_types
254 with the little informations it has. It may not work.
255 It is recommended to specify this parameter.
256 :param options: options to change the behaviour of the converter
257 :param kwargs: additional parameters such as black listed or while listed
258 operators
259 :return: ONNX model
261 Default options is `{'zipmap': False}` for a classifier.
262 """
263 from ..onnx_conv.convert import to_onnx
264 if options is None:
265 from sklearn.base import ClassifierMixin
266 if isinstance(model, ClassifierMixin):
267 options = {'zipmap': False}
268 if initial_types is None:
269 # adding more information
270 from skl2onnx.common.data_types import _guess_numpy_type # delayed
271 for i, n in enumerate(inputs):
272 if not isinstance(n, Variable):
273 raise NotImplementedError(
274 "Inpput %d is not a variable but %r." % (i, type(n)))
275 initial_types = [(n.name, _guess_numpy_type(n.dtype, n.shape))
276 for n in inputs]
278 logger.debug("OnnxSubEstimator._to_onnx_sklearn(%r, %r, "
279 "op_version=%r, options=%r, initial_types=%r, "
280 "kwargs=%r)",
281 type(model), inputs, op_version, options,
282 initial_types, kwargs)
284 if isinstance(initial_types, numpy.ndarray):
285 if len(inputs) != 1:
286 raise RuntimeError( # pragma: no cover
287 "The model has %s inputs but only %d input are "
288 "described in 'initial_types'." % (
289 len(inputs), 1))
290 X = initial_types
291 initial_types = None
292 elif len(inputs) != len(initial_types):
293 raise RuntimeError( # pragma: no cover
294 "The model has %s inputs but only %d input are "
295 "described in 'initial_types'." % (
296 len(inputs), len(initial_types)))
297 else:
298 X = None
300 onx = to_onnx(model, X, initial_types=initial_types, options=options,
301 rewrite_ops=True, target_opset=op_version, **kwargs)
302 return onx