Coverage for mlprodict/onnx_tools/exports/numpy

1"""

2@file

3@brief Numpy helpers for the conversion from onnx to numpy.

4"""

5import numpy

8def make_slice(data, starts, ends, axes=None, steps=None):

9 """

10 Implements operator slice in numpy.

12 :param data: input

13 :param starts: mandatory

14 :param ends: mandatory

15 :param axes: optional

16 :param steps: optional

17 :return: results

18 """

19 slices = [slice(0, data.shape[i]) for i in range(len(data.shape))]

20 if axes is None:

21 axes = range(len(starts))

22 for i, a in enumerate(axes):

23 if steps is None:

24 slices[a] = slice(starts[i], ends[i])

25 else:

26 slices[a] = slice(starts[i], ends[i], steps[i])

27 tslices = tuple(slices)

28 try:

29 return data[tslices]

30 except IndexError as e:

31 raise IndexError(

32 f"Unable to run `data[tslices]` with type(data)={type(data)} "

33 f"and type(tslices)={type(tslices)}.") from e

36def argmax_use_numpy_select_last_index(

37 data, axis=0, keepdims=True, select_last_index=False):

38 """

39 Needed or operator `ArgMax`.

40 """

41 if not select_last_index:

42 result = numpy.argmax(data, axis=axis)

43 if keepdims and len(result.shape) < len(data.shape):

44 result = numpy.expand_dims(result, axis)

45 return result.astype(numpy.int64)

47 data = numpy.flip(data, axis)

48 result = numpy.argmax(data, axis=axis)

49 result = data.shape[axis] - result - 1

50 if keepdims:

51 result = numpy.expand_dims(result, axis)

52 return result.astype(numpy.int64)

55def argmin_use_numpy_select_last_index(

56 data, axis=0, keepdims=True, select_last_index=False):

57 """

58 Needed or operator `ArgMin`.

59 """

60 if not select_last_index:

61 result = numpy.argmin(data, axis=axis)

62 if keepdims and len(result.shape) < len(data.shape):

63 result = numpy.expand_dims(result, axis)

64 return result.astype(numpy.int64)

66 data = numpy.flip(data, axis)

67 result = numpy.argmin(data, axis=axis)

68 result = data.shape[axis] - result - 1

69 if keepdims:

70 result = numpy.expand_dims(result, axis)

71 return result.astype(numpy.int64)

74def array_feature_extrator(data, indices):

75 """

76 Implementation of operator *ArrayFeatureExtractor*

77 with :epkg:`numpy`.

78 """

79 if len(indices.shape) == 2 and indices.shape[0] == 1:

80 index = indices.ravel().tolist()

81 add = len(index)

82 elif len(indices.shape) == 1:

83 index = indices.tolist()

84 add = len(index)

85 else:

86 add = 1

87 for s in indices.shape:

88 add *= s

89 index = indices.ravel().tolist()

90 if len(data.shape) == 1:

91 new_shape = (1, add)

92 else:

93 new_shape = list(data.shape[:-1]) + [add]

94 tem = data[..., index]

95 res = tem.reshape(new_shape)

96 return res

99class NumpyCode:

100 """

101 Converts an ONNX operators into :epkg:`numpy` code.

102

103 :param opset: target opset for the conversion (usually unused)

104 :param name: node name

105 :param op_type: operator type

106 :param domain: domain

107 :param inputs: inputs

108 :param outputs: outputs

109 :param attributes: attributes

110 :param used: dictionary `{k: v}`,

111 list of nodes taking *k* as input

112 :param context: whole context

113 :param mark_inits: marks initializer as replaced

114 :param indent: indentation of the second line and following

115 :return: code as str

116 """

117

118 def __init__(self, opset, name=None, op_type=None, domain='',

119 inputs=None, outputs=None, attributes=None,

120 used=None, context=None, mark_inits=None,

121 indent="", **unused):

122 self.opset = opset

123 self.name = name

124 self.op_type = op_type

125 self.domain = domain

126 self.inputs = inputs

127 self.outputs = outputs

128 self.attributes = attributes

129 self.used = used

130 self.context = context

131 self.mark_inits = mark_inits

132 self.unused = unused

133 self.indent = indent

134

135 def _make_sure_inputs(self, n, m=None):

136 if m is None:

137 m = n

138 if len(self.inputs) < n:

139 raise RuntimeError( # pragma: no cover

140 "Expecting at least %d inputs for operator %r not %r." % (

141 n, self.op_type, self.inputs))

142 if len(self.inputs) > m:

143 raise RuntimeError( # pragma: no cover

144 "Expecting at most %d inputs for operator %r not %r." % (

145 m, self.op_type, self.inputs))

146

147 def _make_sure_opsets(self, mi, ma=None):

148 if mi is not None and self.opset < mi:

149 raise RuntimeError( # pragma: no cover

150 "Cannot convert operator type %d, opset %d < %d." % (

151 self.op_type, self.opset, mi))

152 if ma is not None and self.opset > ma:

153 raise RuntimeError( # pragma: no cover

154 "Cannot convert operator type %d, opset %d > %d." % (

155 self.op_type, self.opset, mi))

156

157 def _getat(self, name, defval=None, format=None):

158

159 def f(v):

160 if format is None:

161 return v

162 if format == 'listint' and isinstance(v, str):

163 return list(

164 map(int, v.strip('[]').replace(' ', '').split(',')))

165 if format == 'listfloat' and isinstance(v, str):

166 return list(

167 map(float, v.strip('[]').replace(' ', '').split(',')))

168 raise ValueError( # pragma: no cover

169 f"Unable to convert {v!r} with format={format!r}.")

170

171 for n, val in self.attributes:

172 if name == n:

173 return f(val)

174 return defval

175

176 def _simplify(self, name, kind):

177 value = None

178 if (self.used is not None and name in self.used and

179 len(self.used[name]) == 1 and self.context is not None):

180 inits = self.context['initializers_dict']

181 if name in inits:

182 v = inits[name]

183 if v.dtype == numpy.int64 and v.size < 10:

184 value = v

185 if name not in self.mark_inits:

186 self.mark_inits[name] = []

187 self.mark_inits[name].append(v)

188

189 if kind == 'tuple':

190 if value is None:

191 return f"tuple({name})"

192 if value.size == 1:

193 return str(tuple(value)[0])

194 return str(tuple(value))

195 elif kind == 'list':

196 if value is None:

197 return name

198 if len(value.shape) == 0:

199 return str(value)

200 return str(list(value))

201 raise NotImplementedError( # pragma: no cover

202 f"Unknown scenario to simplify ({kind!r}).")

203

204 @staticmethod

205 def _make_tuple(val):

206 if isinstance(val, tuple):

207 return val

208 if isinstance(val, list):

209 return tuple(val)

210 if isinstance(val, int):

211 return val

212 if isinstance(val, str):

213 return tuple(map(int, val.strip('()[]').replace(" ", "").split(",")))

214 raise NotImplementedError( # pragma: no cover

215 f"Unable to convert type {type(val)!r} ({val!r}) into tuple.")

216

217 def make_numpy_code(self):

218 """

219 Main method, returns the python code for a given

220 operator.

221 """

222 if self.domain == '':

223 return self._make_numpy_code_onnx()

224

225 if self.domain == 'ai.onnx.ml':

226 return self._make_numpy_code_onnxml()

227

228 if self.domain == 'com.microsoft':

229 return self._make_numpy_code_others()

230

231 raise NotImplementedError( # pragma: no cover

232 f"Unable to convert any operator from domain {self.domain!r}.")

233

234 def _make_numpy_code_onnx(self):

235

236 binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@',

237 Pow='**')

238 unary_ops = dict(Neg='-')

239 unary_ops_ = dict(Sqrt='** 0.5')

240

241 outs = ", ".join(self.outputs)

242

243 if self.op_type in binary_ops:

244 self._make_sure_inputs(2)

245 return "%s = %s %s %s" % (

246 outs, self.inputs[0], binary_ops[self.op_type],

247 self.inputs[1])

248

249 if self.op_type in unary_ops:

250 self._make_sure_inputs(1)

251 return f"{outs} = {unary_ops[self.op_type]} {self.inputs[0]}"

252

253 if self.op_type in unary_ops_:

254 self._make_sure_inputs(1)

255 return f"{outs} = {self.inputs[0]} {unary_ops_[self.op_type]}"

256

257 if self.op_type in {'Abs', 'Ceil', 'Cos', 'Cosh',

258 'Exp', 'Log', 'Sin', 'Sinh',

259 'Tan', 'Tanh'}:

260 return f"{outs} = numpy.{self.op_type.lower()}({self.inputs[0]})"

261

262 if self.op_type == 'ArgMax':

263 self._make_sure_opsets(12)

264 self._make_sure_inputs(1)

265 axis = self._getat('axis', 0)

266 keepdims = self._getat('keepdims', 1)

267 select_last_index = self._getat('keepdims', 0)

268 if select_last_index:

269 return (

270 "%s = argmax_use_numpy_select_last_index("

271 "%s, axis=%s, keepdims=%s, select_last_index=%s)" % (

272 outs, self.inputs[0], axis, keepdims, select_last_index))

273 if keepdims:

274 return "%s = numpy.expand_dims(numpy.argmax(%s, axis=%s), -1)" % (

275 outs, self.inputs[0], axis)

276 return f"{outs} = numpy.argmax({self.inputs[0]}, axis={axis})"

277

278 if self.op_type == 'ArgMin':

279 self._make_sure_opsets(12)

280 self._make_sure_inputs(1)

281 axis = self._getat('axis', 0)

282 keepdims = self._getat('keepdims', 1)

283 select_last_index = self._getat('keepdims', 0)

284 if select_last_index:

285 return (

286 "%s = argmin_use_numpy_select_last_index("

287 "%s, axis=%s, keepdims=%s, select_last_index=%s)" % (

288 outs, self.inputs[0], axis, keepdims, select_last_index))

289 if keepdims:

290 return "%s = numpy.expand_dims(numpy.argmin(%s, axis=%s), -1)" % (

291 outs, self.inputs[0], axis)

292 return f"{outs} = numpy.argmin({self.inputs[0]}, axis={axis})"

293

294 if self.op_type == 'Cast':

295 from ..onnx2py_helper import _elem_type_as_str

296 self._make_sure_inputs(1)

297 to = int(self._getat('to', 1))

298 dtype = _elem_type_as_str(to)

299 dtype = {'double': 'float64', 'float': 'float32'}.get(dtype, dtype)

300 return f"{outs} = {self.inputs[0]}.astype(numpy.{dtype})"

301

302 if self.op_type == 'Concat':

303 axis = self._getat('axis', 0)

304 return f"{outs} = numpy.concatenate([{', '.join(self.inputs)}], {axis})"

305

306 if self.op_type == 'ConstantOfShape':

307 self._make_sure_opsets(9)

308 self._make_sure_inputs(1)

309 value = self._getat('value', 0, format='listfloat')

310 shape = self._simplify(self.inputs[0], kind='tuple')

311 return f"{outs} = numpy.full({shape}, {value})"

312

313 if self.op_type == 'Max':

314 return f"{outs} = numpy.maximum({', '.join(self.inputs)})"

315

316 if self.op_type == 'Gather':

317 self._make_sure_opsets(11)

318 self._make_sure_inputs(2)

319 axis = self._getat('axis', 0)

320 return "%s = numpy.take(%s, %s, axis=%s)" % (

321 outs, self.inputs[0],

322 self._simplify(self.inputs[1], 'list'), axis)

323

324 if self.op_type == 'Gemm':

325 self._make_sure_inputs(2, 3)

326 alpha = self._getat('alpha', 0.)

327 transA = self._getat('transA', 0)

328 transB = self._getat('transB', 0)

329 ta = ".T" if transA in ('1', 1, True) else ""

330 tb = ".T" if transB in ('1', 1, True) else ""

331 if len(self.inputs) == 2:

332 return f"{outs} = {self.inputs[0]}{ta} @ {self.inputs[1]}{tb} * {alpha}"

333 beta = self._getat('beta', 0.)

334 return "%s = %s%s @ %s%s * %s + %s * %s" % (

335 outs, self.inputs[0], ta, self.inputs[1], tb, alpha,

336 self.inputs[2], beta)

337

338 if self.op_type == 'Identity':

339 return f"{outs} = {self.inputs[0]}"

340

341 if self.op_type == 'ReduceProd':

342 self._make_sure_inputs(1)

343 axes = self._getat('axes', "[0]")

344 keepdims = self._getat('keepdims', 0)

345 return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % (

346 outs, self.inputs[0], axes, keepdims)

347

348 if self.op_type == 'ReduceSum':

349 self._make_sure_opsets(11)

350 self._make_sure_inputs(2)

351 keepdims = self._getat('keepdims', 0)

352 return "%s = %s.sum(axis=%s, keepdims=%s)" % (

353 outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'),

354 keepdims)

355

356 if self.op_type == 'ReduceSumSquare':

357 self._make_sure_inputs(1)

358 axes = self._getat('axes', "[0]")

359 keepdims = self._getat('keepdims', 0)

360 return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % (

361 outs, self.inputs[0], axes, keepdims)

362

363 if self.op_type == 'Reshape':

364 self._make_sure_inputs(2)

365 simp = self._simplify(self.inputs[1], 'tuple')

366 return f"{outs} = {self.inputs[0]}.reshape({simp})"

367

368 if self.op_type == 'Shape':

369 self._make_sure_inputs(1)

370 return f"{outs} = numpy.array({self.inputs[0]}.shape, dtype=numpy.int64)"

371

372 if self.op_type == 'Slice':

373 return f"{outs} = make_slice({', '.join(self.inputs)})"

374

375 if self.op_type == 'Softmax':

376 self._make_sure_inputs(1)

377 axis = self._getat('axis', -1)

378 return f"{outs} = scipy_special.softmax({self.inputs[0]}, axis={axis})"

379

380 if self.op_type == 'Squeeze':

381 self._make_sure_opsets(13)

382 self._make_sure_inputs(2)

383 return "%s = numpy.squeeze(%s, axis=%s)" % (

384 outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'))

385

386 if self.op_type == 'Transpose':

387 self._make_sure_inputs(1)

388 perm = self._getat('perm', None)

389 return "%s = numpy.transpose(%s, axes=%s)" % (

390 outs, self.inputs[0], self._make_tuple(perm))

391

392 if self.op_type == 'Unsqueeze':

393 self._make_sure_opsets(13)

394 self._make_sure_inputs(2)

395 return "%s = numpy.expand_dims(%s, axis=%s)" % (

396 outs, self.inputs[0],

397 self._simplify(self.inputs[1], 'tuple'))

398

399 raise NotImplementedError( # pragma: no cover

400 f"Unable to convert operator type {self.op_type!r} name={self.name!r}.")

401

402 def _make_numpy_code_onnxml(self):

403 outs = ", ".join(self.outputs)

404

405 if self.op_type == 'ArrayFeatureExtractor':

406 self._make_sure_inputs(2)

407 return "%s = array_feature_extrator(%s, %s)" % (

408 outs, self.inputs[0], self.inputs[1])

409

410 if self.op_type == 'LinearClassifier':

411 multi_class = self._getat('targets', 0)

412 if multi_class != 0:

413 raise NotImplementedError( # pragma: no cover

414 "Conversion of operator %r with multi_class=%r "

415 "is not implemented." % (self.op_type, multi_class))

416 self._make_sure_inputs(1)

417 coefficients = self._getat('coefficients', None)

418 intercepts = self._getat('intercepts', None)

419 post_transform = self._getat(

420 'post_transform', 'NONE').strip('"\'b')

421 classlabels_strings = self._getat('classlabels_strings', None)

422 if classlabels_strings is not None:

423 raise NotImplementedError( # pragma: no cover

424 "Conversion of operator %r with classlabels_strings=%r "

425 "is not implemented." % (self.op_type, classlabels_strings))

426 classlabels_ints = self._getat(

427 'classlabels_ints', None, format="listint")

428 if classlabels_ints != list(range(len(classlabels_ints))):

429 raise NotImplementedError( # pragma: no cover

430 "Conversion of operator %r with classlabels_ints=%r!=%r "

431 "is not implemented." % (

432 self.op_type, classlabels_ints,

433 list(range(len(classlabels_ints)))))

434 targets = len(classlabels_ints)

435 rows = [

436 "coefs = numpy.array(%s, dtype=numpy.float32)."

437 "reshape((%d, -1)).T" % (coefficients, targets),

438 "%sinter = numpy.array(%s, dtype=numpy.float32)."

439 "reshape((-1, %d))" % (self.indent, intercepts, targets)]

440

441 if post_transform == "SOFTMAX":

442 rows.append(

443 "%s%s = scipy_special.softmax"

444 "(%s @ coefs + inter, axis=1)" % (

445 self.indent, self.outputs[1], self.inputs[0]))

446 elif post_transform == 'NONE':

447 rows.append(

448 "%s%s = %s @ coefs + inter" % (

449 self.indent, self.outputs[1], self.inputs[0]))

450 elif post_transform != "NONE":

451 raise NotImplementedError( # pragma: no cover

452 "Conversion of operator %r with post_transform=%r "

453 "is not implemented." % (self.op_type, post_transform))

454 rows.append("%s%s = numpy.argmax(%s, axis=1)" % (

455 self.indent, self.outputs[0], self.outputs[1]))

456 return "\n".join(rows)

457

458 if self.op_type == 'LinearRegressor':

459 self._make_sure_inputs(1)

460 coefficients = self._getat('coefficients', None)

461 intercepts = self._getat('intercepts', None)

462 post_transform = self._getat(

463 'post_transform', 'NONE').strip('"\'b')

464 targets = self._getat('targets', 1)

465 if post_transform != "NONE":

466 raise NotImplementedError( # pragma: no cover

467 "Conversion of operator %r with post_transform=%r "

468 "is not implemented." % (self.op_type, post_transform))

469 rows = [

470 "coefs = numpy.array(%s, dtype=numpy.float32)."

471 "reshape((%d, -1)).T" % (coefficients, targets),

472 "%sinter = numpy.array(%s, dtype=numpy.float32)."

473 "reshape((-1, %d))" % (self.indent, intercepts, targets),

474 f"{self.indent}{outs} = {self.inputs[0]} @ coefs + inter"]

475 return "\n".join(rows)

476

477 if self.op_type == 'Normalizer':

478 self._make_sure_inputs(1)

479 post_transform = self._getat('norm', 'MAX').strip('"\'b')

480 if post_transform == 'L2':

481 return "%s = %s / (%s ** 2).sum(axis=1) ** 0.5" % (

482 outs, self.inputs[0], self.inputs[0])

483 if post_transform == 'L1':

484 post_transform = 'sum'

485 return "%s = %s / %s.%s(axis=1, keepdims=1)" % (

486 outs, self.inputs[0], self.inputs[0], post_transform.lower())

487

488 raise NotImplementedError( # pragma: no cover

489 "Unable to convert operator type %r name=%r (onnxml)." % (

490 self.op_type, self.name))

491

492 def _make_numpy_code_others(self):

493 outs = ", ".join(self.outputs)

494

495 if self.op_type == 'CDist':

496 self._make_sure_inputs(2)

497 metric = self._getat('metric', 'euclidean').strip("'b")

498 return "%s = scipy_distance.cdist(%s, %s, metric=%r)" % (

499 outs, self.inputs[0], self.inputs[1], metric)

500

501 raise NotImplementedError( # pragma: no cover

502 "Unable to convert operator type %r (domain=%r) "

503 "name=%r (onnxml)." % (

504 self.op_type, self.domain, self.name))

505

506

507def make_numpy_code(opset, name=None, op_type=None, domain='',

508 inputs=None, outputs=None, attributes=None,

509 used=None, context=None, mark_inits=None,

510 indent="", **unused):

511 """

512 Converts an ONNX operators into :epkg:`numpy` code.

513

514 :param opset: target opset for the conversion (usually unused)

515 :param name: node name

516 :param op_type: operator type

517 :param domain: domain

518 :param inputs: inputs

519 :param outputs: outputs

520 :param attributes: attributes

521 :param used: dictionary `{k: v}`,

522 list of nodes taking *k* as input

523 :param context: whole context

524 :param mark_inits: marks initializer as replaced

525 :param indent: indentation of the second line and following

526 :return: code as str

527 """

528 cl = NumpyCode(

529 opset=opset, name=name, op_type=op_type, domain=domain,

530 inputs=inputs, outputs=outputs, attributes=attributes,

531 used=used, context=context, mark_inits=mark_inits,

532 indent=indent, **unused)

533 return cl.make_numpy_code()

Coverage for mlprodict/onnx_tools/exports/numpy_helper.py: 89%

294 statements