Coverage for mlprodict/onnx_tools/exports/numpy_helper.py: 89%
294 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1"""
2@file
3@brief Numpy helpers for the conversion from onnx to numpy.
4"""
5import numpy
8def make_slice(data, starts, ends, axes=None, steps=None):
9 """
10 Implements operator slice in numpy.
12 :param data: input
13 :param starts: mandatory
14 :param ends: mandatory
15 :param axes: optional
16 :param steps: optional
17 :return: results
18 """
19 slices = [slice(0, data.shape[i]) for i in range(len(data.shape))]
20 if axes is None:
21 axes = range(len(starts))
22 for i, a in enumerate(axes):
23 if steps is None:
24 slices[a] = slice(starts[i], ends[i])
25 else:
26 slices[a] = slice(starts[i], ends[i], steps[i])
27 tslices = tuple(slices)
28 try:
29 return data[tslices]
30 except IndexError as e:
31 raise IndexError(
32 f"Unable to run `data[tslices]` with type(data)={type(data)} "
33 f"and type(tslices)={type(tslices)}.") from e
36def argmax_use_numpy_select_last_index(
37 data, axis=0, keepdims=True, select_last_index=False):
38 """
39 Needed or operator `ArgMax`.
40 """
41 if not select_last_index:
42 result = numpy.argmax(data, axis=axis)
43 if keepdims and len(result.shape) < len(data.shape):
44 result = numpy.expand_dims(result, axis)
45 return result.astype(numpy.int64)
47 data = numpy.flip(data, axis)
48 result = numpy.argmax(data, axis=axis)
49 result = data.shape[axis] - result - 1
50 if keepdims:
51 result = numpy.expand_dims(result, axis)
52 return result.astype(numpy.int64)
55def argmin_use_numpy_select_last_index(
56 data, axis=0, keepdims=True, select_last_index=False):
57 """
58 Needed or operator `ArgMin`.
59 """
60 if not select_last_index:
61 result = numpy.argmin(data, axis=axis)
62 if keepdims and len(result.shape) < len(data.shape):
63 result = numpy.expand_dims(result, axis)
64 return result.astype(numpy.int64)
66 data = numpy.flip(data, axis)
67 result = numpy.argmin(data, axis=axis)
68 result = data.shape[axis] - result - 1
69 if keepdims:
70 result = numpy.expand_dims(result, axis)
71 return result.astype(numpy.int64)
74def array_feature_extrator(data, indices):
75 """
76 Implementation of operator *ArrayFeatureExtractor*
77 with :epkg:`numpy`.
78 """
79 if len(indices.shape) == 2 and indices.shape[0] == 1:
80 index = indices.ravel().tolist()
81 add = len(index)
82 elif len(indices.shape) == 1:
83 index = indices.tolist()
84 add = len(index)
85 else:
86 add = 1
87 for s in indices.shape:
88 add *= s
89 index = indices.ravel().tolist()
90 if len(data.shape) == 1:
91 new_shape = (1, add)
92 else:
93 new_shape = list(data.shape[:-1]) + [add]
94 tem = data[..., index]
95 res = tem.reshape(new_shape)
96 return res
99class NumpyCode:
100 """
101 Converts an ONNX operators into :epkg:`numpy` code.
103 :param opset: target opset for the conversion (usually unused)
104 :param name: node name
105 :param op_type: operator type
106 :param domain: domain
107 :param inputs: inputs
108 :param outputs: outputs
109 :param attributes: attributes
110 :param used: dictionary `{k: v}`,
111 list of nodes taking *k* as input
112 :param context: whole context
113 :param mark_inits: marks initializer as replaced
114 :param indent: indentation of the second line and following
115 :return: code as str
116 """
118 def __init__(self, opset, name=None, op_type=None, domain='',
119 inputs=None, outputs=None, attributes=None,
120 used=None, context=None, mark_inits=None,
121 indent="", **unused):
122 self.opset = opset
123 self.name = name
124 self.op_type = op_type
125 self.domain = domain
126 self.inputs = inputs
127 self.outputs = outputs
128 self.attributes = attributes
129 self.used = used
130 self.context = context
131 self.mark_inits = mark_inits
132 self.unused = unused
133 self.indent = indent
135 def _make_sure_inputs(self, n, m=None):
136 if m is None:
137 m = n
138 if len(self.inputs) < n:
139 raise RuntimeError( # pragma: no cover
140 "Expecting at least %d inputs for operator %r not %r." % (
141 n, self.op_type, self.inputs))
142 if len(self.inputs) > m:
143 raise RuntimeError( # pragma: no cover
144 "Expecting at most %d inputs for operator %r not %r." % (
145 m, self.op_type, self.inputs))
147 def _make_sure_opsets(self, mi, ma=None):
148 if mi is not None and self.opset < mi:
149 raise RuntimeError( # pragma: no cover
150 "Cannot convert operator type %d, opset %d < %d." % (
151 self.op_type, self.opset, mi))
152 if ma is not None and self.opset > ma:
153 raise RuntimeError( # pragma: no cover
154 "Cannot convert operator type %d, opset %d > %d." % (
155 self.op_type, self.opset, mi))
157 def _getat(self, name, defval=None, format=None):
159 def f(v):
160 if format is None:
161 return v
162 if format == 'listint' and isinstance(v, str):
163 return list(
164 map(int, v.strip('[]').replace(' ', '').split(',')))
165 if format == 'listfloat' and isinstance(v, str):
166 return list(
167 map(float, v.strip('[]').replace(' ', '').split(',')))
168 raise ValueError( # pragma: no cover
169 f"Unable to convert {v!r} with format={format!r}.")
171 for n, val in self.attributes:
172 if name == n:
173 return f(val)
174 return defval
176 def _simplify(self, name, kind):
177 value = None
178 if (self.used is not None and name in self.used and
179 len(self.used[name]) == 1 and self.context is not None):
180 inits = self.context['initializers_dict']
181 if name in inits:
182 v = inits[name]
183 if v.dtype == numpy.int64 and v.size < 10:
184 value = v
185 if name not in self.mark_inits:
186 self.mark_inits[name] = []
187 self.mark_inits[name].append(v)
189 if kind == 'tuple':
190 if value is None:
191 return f"tuple({name})"
192 if value.size == 1:
193 return str(tuple(value)[0])
194 return str(tuple(value))
195 elif kind == 'list':
196 if value is None:
197 return name
198 if len(value.shape) == 0:
199 return str(value)
200 return str(list(value))
201 raise NotImplementedError( # pragma: no cover
202 f"Unknown scenario to simplify ({kind!r}).")
204 @staticmethod
205 def _make_tuple(val):
206 if isinstance(val, tuple):
207 return val
208 if isinstance(val, list):
209 return tuple(val)
210 if isinstance(val, int):
211 return val
212 if isinstance(val, str):
213 return tuple(map(int, val.strip('()[]').replace(" ", "").split(",")))
214 raise NotImplementedError( # pragma: no cover
215 f"Unable to convert type {type(val)!r} ({val!r}) into tuple.")
217 def make_numpy_code(self):
218 """
219 Main method, returns the python code for a given
220 operator.
221 """
222 if self.domain == '':
223 return self._make_numpy_code_onnx()
225 if self.domain == 'ai.onnx.ml':
226 return self._make_numpy_code_onnxml()
228 if self.domain == 'com.microsoft':
229 return self._make_numpy_code_others()
231 raise NotImplementedError( # pragma: no cover
232 f"Unable to convert any operator from domain {self.domain!r}.")
234 def _make_numpy_code_onnx(self):
236 binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@',
237 Pow='**')
238 unary_ops = dict(Neg='-')
239 unary_ops_ = dict(Sqrt='** 0.5')
241 outs = ", ".join(self.outputs)
243 if self.op_type in binary_ops:
244 self._make_sure_inputs(2)
245 return "%s = %s %s %s" % (
246 outs, self.inputs[0], binary_ops[self.op_type],
247 self.inputs[1])
249 if self.op_type in unary_ops:
250 self._make_sure_inputs(1)
251 return f"{outs} = {unary_ops[self.op_type]} {self.inputs[0]}"
253 if self.op_type in unary_ops_:
254 self._make_sure_inputs(1)
255 return f"{outs} = {self.inputs[0]} {unary_ops_[self.op_type]}"
257 if self.op_type in {'Abs', 'Ceil', 'Cos', 'Cosh',
258 'Exp', 'Log', 'Sin', 'Sinh',
259 'Tan', 'Tanh'}:
260 return f"{outs} = numpy.{self.op_type.lower()}({self.inputs[0]})"
262 if self.op_type == 'ArgMax':
263 self._make_sure_opsets(12)
264 self._make_sure_inputs(1)
265 axis = self._getat('axis', 0)
266 keepdims = self._getat('keepdims', 1)
267 select_last_index = self._getat('keepdims', 0)
268 if select_last_index:
269 return (
270 "%s = argmax_use_numpy_select_last_index("
271 "%s, axis=%s, keepdims=%s, select_last_index=%s)" % (
272 outs, self.inputs[0], axis, keepdims, select_last_index))
273 if keepdims:
274 return "%s = numpy.expand_dims(numpy.argmax(%s, axis=%s), -1)" % (
275 outs, self.inputs[0], axis)
276 return f"{outs} = numpy.argmax({self.inputs[0]}, axis={axis})"
278 if self.op_type == 'ArgMin':
279 self._make_sure_opsets(12)
280 self._make_sure_inputs(1)
281 axis = self._getat('axis', 0)
282 keepdims = self._getat('keepdims', 1)
283 select_last_index = self._getat('keepdims', 0)
284 if select_last_index:
285 return (
286 "%s = argmin_use_numpy_select_last_index("
287 "%s, axis=%s, keepdims=%s, select_last_index=%s)" % (
288 outs, self.inputs[0], axis, keepdims, select_last_index))
289 if keepdims:
290 return "%s = numpy.expand_dims(numpy.argmin(%s, axis=%s), -1)" % (
291 outs, self.inputs[0], axis)
292 return f"{outs} = numpy.argmin({self.inputs[0]}, axis={axis})"
294 if self.op_type == 'Cast':
295 from ..onnx2py_helper import _elem_type_as_str
296 self._make_sure_inputs(1)
297 to = int(self._getat('to', 1))
298 dtype = _elem_type_as_str(to)
299 dtype = {'double': 'float64', 'float': 'float32'}.get(dtype, dtype)
300 return f"{outs} = {self.inputs[0]}.astype(numpy.{dtype})"
302 if self.op_type == 'Concat':
303 axis = self._getat('axis', 0)
304 return f"{outs} = numpy.concatenate([{', '.join(self.inputs)}], {axis})"
306 if self.op_type == 'ConstantOfShape':
307 self._make_sure_opsets(9)
308 self._make_sure_inputs(1)
309 value = self._getat('value', 0, format='listfloat')
310 shape = self._simplify(self.inputs[0], kind='tuple')
311 return f"{outs} = numpy.full({shape}, {value})"
313 if self.op_type == 'Max':
314 return f"{outs} = numpy.maximum({', '.join(self.inputs)})"
316 if self.op_type == 'Gather':
317 self._make_sure_opsets(11)
318 self._make_sure_inputs(2)
319 axis = self._getat('axis', 0)
320 return "%s = numpy.take(%s, %s, axis=%s)" % (
321 outs, self.inputs[0],
322 self._simplify(self.inputs[1], 'list'), axis)
324 if self.op_type == 'Gemm':
325 self._make_sure_inputs(2, 3)
326 alpha = self._getat('alpha', 0.)
327 transA = self._getat('transA', 0)
328 transB = self._getat('transB', 0)
329 ta = ".T" if transA in ('1', 1, True) else ""
330 tb = ".T" if transB in ('1', 1, True) else ""
331 if len(self.inputs) == 2:
332 return f"{outs} = {self.inputs[0]}{ta} @ {self.inputs[1]}{tb} * {alpha}"
333 beta = self._getat('beta', 0.)
334 return "%s = %s%s @ %s%s * %s + %s * %s" % (
335 outs, self.inputs[0], ta, self.inputs[1], tb, alpha,
336 self.inputs[2], beta)
338 if self.op_type == 'Identity':
339 return f"{outs} = {self.inputs[0]}"
341 if self.op_type == 'ReduceProd':
342 self._make_sure_inputs(1)
343 axes = self._getat('axes', "[0]")
344 keepdims = self._getat('keepdims', 0)
345 return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % (
346 outs, self.inputs[0], axes, keepdims)
348 if self.op_type == 'ReduceSum':
349 self._make_sure_opsets(11)
350 self._make_sure_inputs(2)
351 keepdims = self._getat('keepdims', 0)
352 return "%s = %s.sum(axis=%s, keepdims=%s)" % (
353 outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'),
354 keepdims)
356 if self.op_type == 'ReduceSumSquare':
357 self._make_sure_inputs(1)
358 axes = self._getat('axes', "[0]")
359 keepdims = self._getat('keepdims', 0)
360 return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % (
361 outs, self.inputs[0], axes, keepdims)
363 if self.op_type == 'Reshape':
364 self._make_sure_inputs(2)
365 simp = self._simplify(self.inputs[1], 'tuple')
366 return f"{outs} = {self.inputs[0]}.reshape({simp})"
368 if self.op_type == 'Shape':
369 self._make_sure_inputs(1)
370 return f"{outs} = numpy.array({self.inputs[0]}.shape, dtype=numpy.int64)"
372 if self.op_type == 'Slice':
373 return f"{outs} = make_slice({', '.join(self.inputs)})"
375 if self.op_type == 'Softmax':
376 self._make_sure_inputs(1)
377 axis = self._getat('axis', -1)
378 return f"{outs} = scipy_special.softmax({self.inputs[0]}, axis={axis})"
380 if self.op_type == 'Squeeze':
381 self._make_sure_opsets(13)
382 self._make_sure_inputs(2)
383 return "%s = numpy.squeeze(%s, axis=%s)" % (
384 outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'))
386 if self.op_type == 'Transpose':
387 self._make_sure_inputs(1)
388 perm = self._getat('perm', None)
389 return "%s = numpy.transpose(%s, axes=%s)" % (
390 outs, self.inputs[0], self._make_tuple(perm))
392 if self.op_type == 'Unsqueeze':
393 self._make_sure_opsets(13)
394 self._make_sure_inputs(2)
395 return "%s = numpy.expand_dims(%s, axis=%s)" % (
396 outs, self.inputs[0],
397 self._simplify(self.inputs[1], 'tuple'))
399 raise NotImplementedError( # pragma: no cover
400 f"Unable to convert operator type {self.op_type!r} name={self.name!r}.")
402 def _make_numpy_code_onnxml(self):
403 outs = ", ".join(self.outputs)
405 if self.op_type == 'ArrayFeatureExtractor':
406 self._make_sure_inputs(2)
407 return "%s = array_feature_extrator(%s, %s)" % (
408 outs, self.inputs[0], self.inputs[1])
410 if self.op_type == 'LinearClassifier':
411 multi_class = self._getat('targets', 0)
412 if multi_class != 0:
413 raise NotImplementedError( # pragma: no cover
414 "Conversion of operator %r with multi_class=%r "
415 "is not implemented." % (self.op_type, multi_class))
416 self._make_sure_inputs(1)
417 coefficients = self._getat('coefficients', None)
418 intercepts = self._getat('intercepts', None)
419 post_transform = self._getat(
420 'post_transform', 'NONE').strip('"\'b')
421 classlabels_strings = self._getat('classlabels_strings', None)
422 if classlabels_strings is not None:
423 raise NotImplementedError( # pragma: no cover
424 "Conversion of operator %r with classlabels_strings=%r "
425 "is not implemented." % (self.op_type, classlabels_strings))
426 classlabels_ints = self._getat(
427 'classlabels_ints', None, format="listint")
428 if classlabels_ints != list(range(len(classlabels_ints))):
429 raise NotImplementedError( # pragma: no cover
430 "Conversion of operator %r with classlabels_ints=%r!=%r "
431 "is not implemented." % (
432 self.op_type, classlabels_ints,
433 list(range(len(classlabels_ints)))))
434 targets = len(classlabels_ints)
435 rows = [
436 "coefs = numpy.array(%s, dtype=numpy.float32)."
437 "reshape((%d, -1)).T" % (coefficients, targets),
438 "%sinter = numpy.array(%s, dtype=numpy.float32)."
439 "reshape((-1, %d))" % (self.indent, intercepts, targets)]
441 if post_transform == "SOFTMAX":
442 rows.append(
443 "%s%s = scipy_special.softmax"
444 "(%s @ coefs + inter, axis=1)" % (
445 self.indent, self.outputs[1], self.inputs[0]))
446 elif post_transform == 'NONE':
447 rows.append(
448 "%s%s = %s @ coefs + inter" % (
449 self.indent, self.outputs[1], self.inputs[0]))
450 elif post_transform != "NONE":
451 raise NotImplementedError( # pragma: no cover
452 "Conversion of operator %r with post_transform=%r "
453 "is not implemented." % (self.op_type, post_transform))
454 rows.append("%s%s = numpy.argmax(%s, axis=1)" % (
455 self.indent, self.outputs[0], self.outputs[1]))
456 return "\n".join(rows)
458 if self.op_type == 'LinearRegressor':
459 self._make_sure_inputs(1)
460 coefficients = self._getat('coefficients', None)
461 intercepts = self._getat('intercepts', None)
462 post_transform = self._getat(
463 'post_transform', 'NONE').strip('"\'b')
464 targets = self._getat('targets', 1)
465 if post_transform != "NONE":
466 raise NotImplementedError( # pragma: no cover
467 "Conversion of operator %r with post_transform=%r "
468 "is not implemented." % (self.op_type, post_transform))
469 rows = [
470 "coefs = numpy.array(%s, dtype=numpy.float32)."
471 "reshape((%d, -1)).T" % (coefficients, targets),
472 "%sinter = numpy.array(%s, dtype=numpy.float32)."
473 "reshape((-1, %d))" % (self.indent, intercepts, targets),
474 f"{self.indent}{outs} = {self.inputs[0]} @ coefs + inter"]
475 return "\n".join(rows)
477 if self.op_type == 'Normalizer':
478 self._make_sure_inputs(1)
479 post_transform = self._getat('norm', 'MAX').strip('"\'b')
480 if post_transform == 'L2':
481 return "%s = %s / (%s ** 2).sum(axis=1) ** 0.5" % (
482 outs, self.inputs[0], self.inputs[0])
483 if post_transform == 'L1':
484 post_transform = 'sum'
485 return "%s = %s / %s.%s(axis=1, keepdims=1)" % (
486 outs, self.inputs[0], self.inputs[0], post_transform.lower())
488 raise NotImplementedError( # pragma: no cover
489 "Unable to convert operator type %r name=%r (onnxml)." % (
490 self.op_type, self.name))
492 def _make_numpy_code_others(self):
493 outs = ", ".join(self.outputs)
495 if self.op_type == 'CDist':
496 self._make_sure_inputs(2)
497 metric = self._getat('metric', 'euclidean').strip("'b")
498 return "%s = scipy_distance.cdist(%s, %s, metric=%r)" % (
499 outs, self.inputs[0], self.inputs[1], metric)
501 raise NotImplementedError( # pragma: no cover
502 "Unable to convert operator type %r (domain=%r) "
503 "name=%r (onnxml)." % (
504 self.op_type, self.domain, self.name))
507def make_numpy_code(opset, name=None, op_type=None, domain='',
508 inputs=None, outputs=None, attributes=None,
509 used=None, context=None, mark_inits=None,
510 indent="", **unused):
511 """
512 Converts an ONNX operators into :epkg:`numpy` code.
514 :param opset: target opset for the conversion (usually unused)
515 :param name: node name
516 :param op_type: operator type
517 :param domain: domain
518 :param inputs: inputs
519 :param outputs: outputs
520 :param attributes: attributes
521 :param used: dictionary `{k: v}`,
522 list of nodes taking *k* as input
523 :param context: whole context
524 :param mark_inits: marks initializer as replaced
525 :param indent: indentation of the second line and following
526 :return: code as str
527 """
528 cl = NumpyCode(
529 opset=opset, name=name, op_type=op_type, domain=domain,
530 inputs=inputs, outputs=outputs, attributes=attributes,
531 used=used, context=context, mark_inits=mark_inits,
532 indent=indent, **unused)
533 return cl.make_numpy_code()