Coverage for mlprodict/onnxrt/ops_cpu/op_dict

1# -*- encoding: utf-8 -*-

2# pylint: disable=E0203,E1101,C0111

3"""

4@file

5@brief Runtime operator.

6"""

7import numpy

8from scipy.sparse import coo_matrix

9from ._op import OpRun, RuntimeTypeError

12class DictVectorizer(OpRun):

14 atts = {'int64_vocabulary': numpy.empty(0, dtype=numpy.int64),

15 'string_vocabulary': numpy.empty(0, dtype=numpy.str_)}

17 def __init__(self, onnx_node, desc=None, **options):

18 OpRun.__init__(self, onnx_node, desc=desc,

19 expected_attributes=DictVectorizer.atts,

20 **options)

21 self.dict_labels = {}

22 if len(self.int64_vocabulary) > 0:

23 for i, v in enumerate(self.int64_vocabulary):

24 self.dict_labels[v] = i

25 self.is_int = True

26 else:

27 for i, v in enumerate(self.string_vocabulary):

28 self.dict_labels[v.decode('utf-8')] = i

29 self.is_int = False

30 if len(self.dict_labels) == 0:

31 raise RuntimeError( # pragma: no cover

32 "int64_vocabulary and string_vocabulary cannot be both empty.")

34 def _run(self, x, attributes=None, verbose=0, fLOG=None): # pylint: disable=W0221

35 if not isinstance(x, (numpy.ndarray, list)):

36 raise RuntimeTypeError( # pragma: no cover

37 f"x must be iterable not {type(x)}.")

38 values = []

39 rows = []

40 cols = []

41 for i, row in enumerate(x):

42 for k, v in row.items():

43 values.append(v)

44 rows.append(i)

45 cols.append(self.dict_labels[k])

46 values = numpy.array(values)

47 rows = numpy.array(rows)

48 cols = numpy.array(cols)

49 return (coo_matrix((values, (rows, cols)), shape=(len(x), len(self.dict_labels))), )

Coverage for mlprodict/onnxrt/ops_cpu/op_dict_vectorizer.py: 100%