Coverage for mlprodict/onnxrt/ops_cpu/op_dict_vectorizer.py: 100%
31 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1# -*- encoding: utf-8 -*-
2# pylint: disable=E0203,E1101,C0111
3"""
4@file
5@brief Runtime operator.
6"""
7import numpy
8from scipy.sparse import coo_matrix
9from ._op import OpRun, RuntimeTypeError
12class DictVectorizer(OpRun):
14 atts = {'int64_vocabulary': numpy.empty(0, dtype=numpy.int64),
15 'string_vocabulary': numpy.empty(0, dtype=numpy.str_)}
17 def __init__(self, onnx_node, desc=None, **options):
18 OpRun.__init__(self, onnx_node, desc=desc,
19 expected_attributes=DictVectorizer.atts,
20 **options)
21 self.dict_labels = {}
22 if len(self.int64_vocabulary) > 0:
23 for i, v in enumerate(self.int64_vocabulary):
24 self.dict_labels[v] = i
25 self.is_int = True
26 else:
27 for i, v in enumerate(self.string_vocabulary):
28 self.dict_labels[v.decode('utf-8')] = i
29 self.is_int = False
30 if len(self.dict_labels) == 0:
31 raise RuntimeError( # pragma: no cover
32 "int64_vocabulary and string_vocabulary cannot be both empty.")
34 def _run(self, x, attributes=None, verbose=0, fLOG=None): # pylint: disable=W0221
35 if not isinstance(x, (numpy.ndarray, list)):
36 raise RuntimeTypeError( # pragma: no cover
37 f"x must be iterable not {type(x)}.")
38 values = []
39 rows = []
40 cols = []
41 for i, row in enumerate(x):
42 for k, v in row.items():
43 values.append(v)
44 rows.append(i)
45 cols.append(self.dict_labels[k])
46 values = numpy.array(values)
47 rows = numpy.array(rows)
48 cols = numpy.array(cols)
49 return (coo_matrix((values, (rows, cols)), shape=(len(x), len(self.dict_labels))), )