Coverage for mlprodict/onnxrt/ops_cpu/op_tfidfvectorizer.py: 100%
38 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1# -*- encoding: utf-8 -*-
2# pylint: disable=E0203,E1101,C0111
3"""
4@file
5@brief Runtime operator.
6"""
7import numpy
8from ._op import OpRunUnary
9from .op_tfidfvectorizer_ import RuntimeTfIdfVectorizer # pylint: disable=E0611,E0401
12class TfIdfVectorizer(OpRunUnary):
14 atts = {'max_gram_length': 1,
15 'max_skip_count': 1,
16 'min_gram_length': 1,
17 'mode': b'TF',
18 'ngram_counts': [],
19 'ngram_indexes': [],
20 'pool_int64s': [],
21 'pool_strings': [],
22 'weights': []}
24 def __init__(self, onnx_node, desc=None, **options):
25 OpRunUnary.__init__(self, onnx_node, desc=desc,
26 expected_attributes=TfIdfVectorizer.atts,
27 **options)
28 self.rt_ = RuntimeTfIdfVectorizer()
29 if len(self.pool_strings) != 0:
30 pool_strings_ = numpy.array(
31 [_.decode('utf-8') for _ in self.pool_strings])
32 mapping = {}
33 pool_int64s = []
34 for i, w in enumerate(pool_strings_):
35 if w not in mapping:
36 # 1-gram are processed first.
37 mapping[w] = i
38 pool_int64s.append(mapping[w])
39 else:
40 mapping = None
41 pool_int64s = self.pool_int64s
42 pool_strings_ = None
44 self.mapping_ = mapping
45 self.pool_strings_ = pool_strings_
46 self.rt_.init(
47 self.max_gram_length, self.max_skip_count, self.min_gram_length,
48 self.mode, self.ngram_counts, self.ngram_indexes, pool_int64s,
49 self.weights)
51 def _run(self, x, attributes=None, verbose=0, fLOG=None): # pylint: disable=W0221
52 if self.mapping_ is None:
53 res = self.rt_.compute(x)
54 if len(x.shape) > 1:
55 return (res.reshape((x.shape[0], -1)), )
56 return (res, )
58 xi = numpy.empty(x.shape, dtype=numpy.int64)
59 for i in range(0, x.shape[0]):
60 for j in range(0, x.shape[1]):
61 try:
62 xi[i, j] = self.mapping_[x[i, j]]
63 except KeyError:
64 xi[i, j] = -1
65 res = self.rt_.compute(xi)
66 return (res.reshape((x.shape[0], -1)), )