Coverage for mlprodict/onnx_tools/exports/numpy_helper.py: 89%

294 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1""" 

2@file 

3@brief Numpy helpers for the conversion from onnx to numpy. 

4""" 

5import numpy 

6 

7 

8def make_slice(data, starts, ends, axes=None, steps=None): 

9 """ 

10 Implements operator slice in numpy. 

11 

12 :param data: input 

13 :param starts: mandatory 

14 :param ends: mandatory 

15 :param axes: optional 

16 :param steps: optional 

17 :return: results 

18 """ 

19 slices = [slice(0, data.shape[i]) for i in range(len(data.shape))] 

20 if axes is None: 

21 axes = range(len(starts)) 

22 for i, a in enumerate(axes): 

23 if steps is None: 

24 slices[a] = slice(starts[i], ends[i]) 

25 else: 

26 slices[a] = slice(starts[i], ends[i], steps[i]) 

27 tslices = tuple(slices) 

28 try: 

29 return data[tslices] 

30 except IndexError as e: 

31 raise IndexError( 

32 f"Unable to run `data[tslices]` with type(data)={type(data)} " 

33 f"and type(tslices)={type(tslices)}.") from e 

34 

35 

36def argmax_use_numpy_select_last_index( 

37 data, axis=0, keepdims=True, select_last_index=False): 

38 """ 

39 Needed or operator `ArgMax`. 

40 """ 

41 if not select_last_index: 

42 result = numpy.argmax(data, axis=axis) 

43 if keepdims and len(result.shape) < len(data.shape): 

44 result = numpy.expand_dims(result, axis) 

45 return result.astype(numpy.int64) 

46 

47 data = numpy.flip(data, axis) 

48 result = numpy.argmax(data, axis=axis) 

49 result = data.shape[axis] - result - 1 

50 if keepdims: 

51 result = numpy.expand_dims(result, axis) 

52 return result.astype(numpy.int64) 

53 

54 

55def argmin_use_numpy_select_last_index( 

56 data, axis=0, keepdims=True, select_last_index=False): 

57 """ 

58 Needed or operator `ArgMin`. 

59 """ 

60 if not select_last_index: 

61 result = numpy.argmin(data, axis=axis) 

62 if keepdims and len(result.shape) < len(data.shape): 

63 result = numpy.expand_dims(result, axis) 

64 return result.astype(numpy.int64) 

65 

66 data = numpy.flip(data, axis) 

67 result = numpy.argmin(data, axis=axis) 

68 result = data.shape[axis] - result - 1 

69 if keepdims: 

70 result = numpy.expand_dims(result, axis) 

71 return result.astype(numpy.int64) 

72 

73 

74def array_feature_extrator(data, indices): 

75 """ 

76 Implementation of operator *ArrayFeatureExtractor* 

77 with :epkg:`numpy`. 

78 """ 

79 if len(indices.shape) == 2 and indices.shape[0] == 1: 

80 index = indices.ravel().tolist() 

81 add = len(index) 

82 elif len(indices.shape) == 1: 

83 index = indices.tolist() 

84 add = len(index) 

85 else: 

86 add = 1 

87 for s in indices.shape: 

88 add *= s 

89 index = indices.ravel().tolist() 

90 if len(data.shape) == 1: 

91 new_shape = (1, add) 

92 else: 

93 new_shape = list(data.shape[:-1]) + [add] 

94 tem = data[..., index] 

95 res = tem.reshape(new_shape) 

96 return res 

97 

98 

99class NumpyCode: 

100 """ 

101 Converts an ONNX operators into :epkg:`numpy` code. 

102 

103 :param opset: target opset for the conversion (usually unused) 

104 :param name: node name 

105 :param op_type: operator type 

106 :param domain: domain 

107 :param inputs: inputs 

108 :param outputs: outputs 

109 :param attributes: attributes 

110 :param used: dictionary `{k: v}`, 

111 list of nodes taking *k* as input 

112 :param context: whole context 

113 :param mark_inits: marks initializer as replaced 

114 :param indent: indentation of the second line and following 

115 :return: code as str 

116 """ 

117 

118 def __init__(self, opset, name=None, op_type=None, domain='', 

119 inputs=None, outputs=None, attributes=None, 

120 used=None, context=None, mark_inits=None, 

121 indent="", **unused): 

122 self.opset = opset 

123 self.name = name 

124 self.op_type = op_type 

125 self.domain = domain 

126 self.inputs = inputs 

127 self.outputs = outputs 

128 self.attributes = attributes 

129 self.used = used 

130 self.context = context 

131 self.mark_inits = mark_inits 

132 self.unused = unused 

133 self.indent = indent 

134 

135 def _make_sure_inputs(self, n, m=None): 

136 if m is None: 

137 m = n 

138 if len(self.inputs) < n: 

139 raise RuntimeError( # pragma: no cover 

140 "Expecting at least %d inputs for operator %r not %r." % ( 

141 n, self.op_type, self.inputs)) 

142 if len(self.inputs) > m: 

143 raise RuntimeError( # pragma: no cover 

144 "Expecting at most %d inputs for operator %r not %r." % ( 

145 m, self.op_type, self.inputs)) 

146 

147 def _make_sure_opsets(self, mi, ma=None): 

148 if mi is not None and self.opset < mi: 

149 raise RuntimeError( # pragma: no cover 

150 "Cannot convert operator type %d, opset %d < %d." % ( 

151 self.op_type, self.opset, mi)) 

152 if ma is not None and self.opset > ma: 

153 raise RuntimeError( # pragma: no cover 

154 "Cannot convert operator type %d, opset %d > %d." % ( 

155 self.op_type, self.opset, mi)) 

156 

157 def _getat(self, name, defval=None, format=None): 

158 

159 def f(v): 

160 if format is None: 

161 return v 

162 if format == 'listint' and isinstance(v, str): 

163 return list( 

164 map(int, v.strip('[]').replace(' ', '').split(','))) 

165 if format == 'listfloat' and isinstance(v, str): 

166 return list( 

167 map(float, v.strip('[]').replace(' ', '').split(','))) 

168 raise ValueError( # pragma: no cover 

169 f"Unable to convert {v!r} with format={format!r}.") 

170 

171 for n, val in self.attributes: 

172 if name == n: 

173 return f(val) 

174 return defval 

175 

176 def _simplify(self, name, kind): 

177 value = None 

178 if (self.used is not None and name in self.used and 

179 len(self.used[name]) == 1 and self.context is not None): 

180 inits = self.context['initializers_dict'] 

181 if name in inits: 

182 v = inits[name] 

183 if v.dtype == numpy.int64 and v.size < 10: 

184 value = v 

185 if name not in self.mark_inits: 

186 self.mark_inits[name] = [] 

187 self.mark_inits[name].append(v) 

188 

189 if kind == 'tuple': 

190 if value is None: 

191 return f"tuple({name})" 

192 if value.size == 1: 

193 return str(tuple(value)[0]) 

194 return str(tuple(value)) 

195 elif kind == 'list': 

196 if value is None: 

197 return name 

198 if len(value.shape) == 0: 

199 return str(value) 

200 return str(list(value)) 

201 raise NotImplementedError( # pragma: no cover 

202 f"Unknown scenario to simplify ({kind!r}).") 

203 

204 @staticmethod 

205 def _make_tuple(val): 

206 if isinstance(val, tuple): 

207 return val 

208 if isinstance(val, list): 

209 return tuple(val) 

210 if isinstance(val, int): 

211 return val 

212 if isinstance(val, str): 

213 return tuple(map(int, val.strip('()[]').replace(" ", "").split(","))) 

214 raise NotImplementedError( # pragma: no cover 

215 f"Unable to convert type {type(val)!r} ({val!r}) into tuple.") 

216 

217 def make_numpy_code(self): 

218 """ 

219 Main method, returns the python code for a given 

220 operator. 

221 """ 

222 if self.domain == '': 

223 return self._make_numpy_code_onnx() 

224 

225 if self.domain == 'ai.onnx.ml': 

226 return self._make_numpy_code_onnxml() 

227 

228 if self.domain == 'com.microsoft': 

229 return self._make_numpy_code_others() 

230 

231 raise NotImplementedError( # pragma: no cover 

232 f"Unable to convert any operator from domain {self.domain!r}.") 

233 

234 def _make_numpy_code_onnx(self): 

235 

236 binary_ops = dict(Add='+', Sub='-', Div='/', Mul='*', MatMul='@', 

237 Pow='**') 

238 unary_ops = dict(Neg='-') 

239 unary_ops_ = dict(Sqrt='** 0.5') 

240 

241 outs = ", ".join(self.outputs) 

242 

243 if self.op_type in binary_ops: 

244 self._make_sure_inputs(2) 

245 return "%s = %s %s %s" % ( 

246 outs, self.inputs[0], binary_ops[self.op_type], 

247 self.inputs[1]) 

248 

249 if self.op_type in unary_ops: 

250 self._make_sure_inputs(1) 

251 return f"{outs} = {unary_ops[self.op_type]} {self.inputs[0]}" 

252 

253 if self.op_type in unary_ops_: 

254 self._make_sure_inputs(1) 

255 return f"{outs} = {self.inputs[0]} {unary_ops_[self.op_type]}" 

256 

257 if self.op_type in {'Abs', 'Ceil', 'Cos', 'Cosh', 

258 'Exp', 'Log', 'Sin', 'Sinh', 

259 'Tan', 'Tanh'}: 

260 return f"{outs} = numpy.{self.op_type.lower()}({self.inputs[0]})" 

261 

262 if self.op_type == 'ArgMax': 

263 self._make_sure_opsets(12) 

264 self._make_sure_inputs(1) 

265 axis = self._getat('axis', 0) 

266 keepdims = self._getat('keepdims', 1) 

267 select_last_index = self._getat('keepdims', 0) 

268 if select_last_index: 

269 return ( 

270 "%s = argmax_use_numpy_select_last_index(" 

271 "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( 

272 outs, self.inputs[0], axis, keepdims, select_last_index)) 

273 if keepdims: 

274 return "%s = numpy.expand_dims(numpy.argmax(%s, axis=%s), -1)" % ( 

275 outs, self.inputs[0], axis) 

276 return f"{outs} = numpy.argmax({self.inputs[0]}, axis={axis})" 

277 

278 if self.op_type == 'ArgMin': 

279 self._make_sure_opsets(12) 

280 self._make_sure_inputs(1) 

281 axis = self._getat('axis', 0) 

282 keepdims = self._getat('keepdims', 1) 

283 select_last_index = self._getat('keepdims', 0) 

284 if select_last_index: 

285 return ( 

286 "%s = argmin_use_numpy_select_last_index(" 

287 "%s, axis=%s, keepdims=%s, select_last_index=%s)" % ( 

288 outs, self.inputs[0], axis, keepdims, select_last_index)) 

289 if keepdims: 

290 return "%s = numpy.expand_dims(numpy.argmin(%s, axis=%s), -1)" % ( 

291 outs, self.inputs[0], axis) 

292 return f"{outs} = numpy.argmin({self.inputs[0]}, axis={axis})" 

293 

294 if self.op_type == 'Cast': 

295 from ..onnx2py_helper import _elem_type_as_str 

296 self._make_sure_inputs(1) 

297 to = int(self._getat('to', 1)) 

298 dtype = _elem_type_as_str(to) 

299 dtype = {'double': 'float64', 'float': 'float32'}.get(dtype, dtype) 

300 return f"{outs} = {self.inputs[0]}.astype(numpy.{dtype})" 

301 

302 if self.op_type == 'Concat': 

303 axis = self._getat('axis', 0) 

304 return f"{outs} = numpy.concatenate([{', '.join(self.inputs)}], {axis})" 

305 

306 if self.op_type == 'ConstantOfShape': 

307 self._make_sure_opsets(9) 

308 self._make_sure_inputs(1) 

309 value = self._getat('value', 0, format='listfloat') 

310 shape = self._simplify(self.inputs[0], kind='tuple') 

311 return f"{outs} = numpy.full({shape}, {value})" 

312 

313 if self.op_type == 'Max': 

314 return f"{outs} = numpy.maximum({', '.join(self.inputs)})" 

315 

316 if self.op_type == 'Gather': 

317 self._make_sure_opsets(11) 

318 self._make_sure_inputs(2) 

319 axis = self._getat('axis', 0) 

320 return "%s = numpy.take(%s, %s, axis=%s)" % ( 

321 outs, self.inputs[0], 

322 self._simplify(self.inputs[1], 'list'), axis) 

323 

324 if self.op_type == 'Gemm': 

325 self._make_sure_inputs(2, 3) 

326 alpha = self._getat('alpha', 0.) 

327 transA = self._getat('transA', 0) 

328 transB = self._getat('transB', 0) 

329 ta = ".T" if transA in ('1', 1, True) else "" 

330 tb = ".T" if transB in ('1', 1, True) else "" 

331 if len(self.inputs) == 2: 

332 return f"{outs} = {self.inputs[0]}{ta} @ {self.inputs[1]}{tb} * {alpha}" 

333 beta = self._getat('beta', 0.) 

334 return "%s = %s%s @ %s%s * %s + %s * %s" % ( 

335 outs, self.inputs[0], ta, self.inputs[1], tb, alpha, 

336 self.inputs[2], beta) 

337 

338 if self.op_type == 'Identity': 

339 return f"{outs} = {self.inputs[0]}" 

340 

341 if self.op_type == 'ReduceProd': 

342 self._make_sure_inputs(1) 

343 axes = self._getat('axes', "[0]") 

344 keepdims = self._getat('keepdims', 0) 

345 return "%s = %s.prod(axis=tuple(%s), keepdims=%s)" % ( 

346 outs, self.inputs[0], axes, keepdims) 

347 

348 if self.op_type == 'ReduceSum': 

349 self._make_sure_opsets(11) 

350 self._make_sure_inputs(2) 

351 keepdims = self._getat('keepdims', 0) 

352 return "%s = %s.sum(axis=%s, keepdims=%s)" % ( 

353 outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple'), 

354 keepdims) 

355 

356 if self.op_type == 'ReduceSumSquare': 

357 self._make_sure_inputs(1) 

358 axes = self._getat('axes', "[0]") 

359 keepdims = self._getat('keepdims', 0) 

360 return "%s = (%s ** 2).sum(axis=tuple(%s), keepdims=%s)" % ( 

361 outs, self.inputs[0], axes, keepdims) 

362 

363 if self.op_type == 'Reshape': 

364 self._make_sure_inputs(2) 

365 simp = self._simplify(self.inputs[1], 'tuple') 

366 return f"{outs} = {self.inputs[0]}.reshape({simp})" 

367 

368 if self.op_type == 'Shape': 

369 self._make_sure_inputs(1) 

370 return f"{outs} = numpy.array({self.inputs[0]}.shape, dtype=numpy.int64)" 

371 

372 if self.op_type == 'Slice': 

373 return f"{outs} = make_slice({', '.join(self.inputs)})" 

374 

375 if self.op_type == 'Softmax': 

376 self._make_sure_inputs(1) 

377 axis = self._getat('axis', -1) 

378 return f"{outs} = scipy_special.softmax({self.inputs[0]}, axis={axis})" 

379 

380 if self.op_type == 'Squeeze': 

381 self._make_sure_opsets(13) 

382 self._make_sure_inputs(2) 

383 return "%s = numpy.squeeze(%s, axis=%s)" % ( 

384 outs, self.inputs[0], self._simplify(self.inputs[1], 'tuple')) 

385 

386 if self.op_type == 'Transpose': 

387 self._make_sure_inputs(1) 

388 perm = self._getat('perm', None) 

389 return "%s = numpy.transpose(%s, axes=%s)" % ( 

390 outs, self.inputs[0], self._make_tuple(perm)) 

391 

392 if self.op_type == 'Unsqueeze': 

393 self._make_sure_opsets(13) 

394 self._make_sure_inputs(2) 

395 return "%s = numpy.expand_dims(%s, axis=%s)" % ( 

396 outs, self.inputs[0], 

397 self._simplify(self.inputs[1], 'tuple')) 

398 

399 raise NotImplementedError( # pragma: no cover 

400 f"Unable to convert operator type {self.op_type!r} name={self.name!r}.") 

401 

402 def _make_numpy_code_onnxml(self): 

403 outs = ", ".join(self.outputs) 

404 

405 if self.op_type == 'ArrayFeatureExtractor': 

406 self._make_sure_inputs(2) 

407 return "%s = array_feature_extrator(%s, %s)" % ( 

408 outs, self.inputs[0], self.inputs[1]) 

409 

410 if self.op_type == 'LinearClassifier': 

411 multi_class = self._getat('targets', 0) 

412 if multi_class != 0: 

413 raise NotImplementedError( # pragma: no cover 

414 "Conversion of operator %r with multi_class=%r " 

415 "is not implemented." % (self.op_type, multi_class)) 

416 self._make_sure_inputs(1) 

417 coefficients = self._getat('coefficients', None) 

418 intercepts = self._getat('intercepts', None) 

419 post_transform = self._getat( 

420 'post_transform', 'NONE').strip('"\'b') 

421 classlabels_strings = self._getat('classlabels_strings', None) 

422 if classlabels_strings is not None: 

423 raise NotImplementedError( # pragma: no cover 

424 "Conversion of operator %r with classlabels_strings=%r " 

425 "is not implemented." % (self.op_type, classlabels_strings)) 

426 classlabels_ints = self._getat( 

427 'classlabels_ints', None, format="listint") 

428 if classlabels_ints != list(range(len(classlabels_ints))): 

429 raise NotImplementedError( # pragma: no cover 

430 "Conversion of operator %r with classlabels_ints=%r!=%r " 

431 "is not implemented." % ( 

432 self.op_type, classlabels_ints, 

433 list(range(len(classlabels_ints))))) 

434 targets = len(classlabels_ints) 

435 rows = [ 

436 "coefs = numpy.array(%s, dtype=numpy.float32)." 

437 "reshape((%d, -1)).T" % (coefficients, targets), 

438 "%sinter = numpy.array(%s, dtype=numpy.float32)." 

439 "reshape((-1, %d))" % (self.indent, intercepts, targets)] 

440 

441 if post_transform == "SOFTMAX": 

442 rows.append( 

443 "%s%s = scipy_special.softmax" 

444 "(%s @ coefs + inter, axis=1)" % ( 

445 self.indent, self.outputs[1], self.inputs[0])) 

446 elif post_transform == 'NONE': 

447 rows.append( 

448 "%s%s = %s @ coefs + inter" % ( 

449 self.indent, self.outputs[1], self.inputs[0])) 

450 elif post_transform != "NONE": 

451 raise NotImplementedError( # pragma: no cover 

452 "Conversion of operator %r with post_transform=%r " 

453 "is not implemented." % (self.op_type, post_transform)) 

454 rows.append("%s%s = numpy.argmax(%s, axis=1)" % ( 

455 self.indent, self.outputs[0], self.outputs[1])) 

456 return "\n".join(rows) 

457 

458 if self.op_type == 'LinearRegressor': 

459 self._make_sure_inputs(1) 

460 coefficients = self._getat('coefficients', None) 

461 intercepts = self._getat('intercepts', None) 

462 post_transform = self._getat( 

463 'post_transform', 'NONE').strip('"\'b') 

464 targets = self._getat('targets', 1) 

465 if post_transform != "NONE": 

466 raise NotImplementedError( # pragma: no cover 

467 "Conversion of operator %r with post_transform=%r " 

468 "is not implemented." % (self.op_type, post_transform)) 

469 rows = [ 

470 "coefs = numpy.array(%s, dtype=numpy.float32)." 

471 "reshape((%d, -1)).T" % (coefficients, targets), 

472 "%sinter = numpy.array(%s, dtype=numpy.float32)." 

473 "reshape((-1, %d))" % (self.indent, intercepts, targets), 

474 f"{self.indent}{outs} = {self.inputs[0]} @ coefs + inter"] 

475 return "\n".join(rows) 

476 

477 if self.op_type == 'Normalizer': 

478 self._make_sure_inputs(1) 

479 post_transform = self._getat('norm', 'MAX').strip('"\'b') 

480 if post_transform == 'L2': 

481 return "%s = %s / (%s ** 2).sum(axis=1) ** 0.5" % ( 

482 outs, self.inputs[0], self.inputs[0]) 

483 if post_transform == 'L1': 

484 post_transform = 'sum' 

485 return "%s = %s / %s.%s(axis=1, keepdims=1)" % ( 

486 outs, self.inputs[0], self.inputs[0], post_transform.lower()) 

487 

488 raise NotImplementedError( # pragma: no cover 

489 "Unable to convert operator type %r name=%r (onnxml)." % ( 

490 self.op_type, self.name)) 

491 

492 def _make_numpy_code_others(self): 

493 outs = ", ".join(self.outputs) 

494 

495 if self.op_type == 'CDist': 

496 self._make_sure_inputs(2) 

497 metric = self._getat('metric', 'euclidean').strip("'b") 

498 return "%s = scipy_distance.cdist(%s, %s, metric=%r)" % ( 

499 outs, self.inputs[0], self.inputs[1], metric) 

500 

501 raise NotImplementedError( # pragma: no cover 

502 "Unable to convert operator type %r (domain=%r) " 

503 "name=%r (onnxml)." % ( 

504 self.op_type, self.domain, self.name)) 

505 

506 

507def make_numpy_code(opset, name=None, op_type=None, domain='', 

508 inputs=None, outputs=None, attributes=None, 

509 used=None, context=None, mark_inits=None, 

510 indent="", **unused): 

511 """ 

512 Converts an ONNX operators into :epkg:`numpy` code. 

513 

514 :param opset: target opset for the conversion (usually unused) 

515 :param name: node name 

516 :param op_type: operator type 

517 :param domain: domain 

518 :param inputs: inputs 

519 :param outputs: outputs 

520 :param attributes: attributes 

521 :param used: dictionary `{k: v}`, 

522 list of nodes taking *k* as input 

523 :param context: whole context 

524 :param mark_inits: marks initializer as replaced 

525 :param indent: indentation of the second line and following 

526 :return: code as str 

527 """ 

528 cl = NumpyCode( 

529 opset=opset, name=name, op_type=op_type, domain=domain, 

530 inputs=inputs, outputs=outputs, attributes=attributes, 

531 used=used, context=context, mark_inits=mark_inits, 

532 indent=indent, **unused) 

533 return cl.make_numpy_code()