Coverage for mlprodict/onnxrt/ops_cpu/op_tfidfvectorizer.py: 100%

38 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1# -*- encoding: utf-8 -*- 

2# pylint: disable=E0203,E1101,C0111 

3""" 

4@file 

5@brief Runtime operator. 

6""" 

7import numpy 

8from ._op import OpRunUnary 

9from .op_tfidfvectorizer_ import RuntimeTfIdfVectorizer # pylint: disable=E0611,E0401 

10 

11 

12class TfIdfVectorizer(OpRunUnary): 

13 

14 atts = {'max_gram_length': 1, 

15 'max_skip_count': 1, 

16 'min_gram_length': 1, 

17 'mode': b'TF', 

18 'ngram_counts': [], 

19 'ngram_indexes': [], 

20 'pool_int64s': [], 

21 'pool_strings': [], 

22 'weights': []} 

23 

24 def __init__(self, onnx_node, desc=None, **options): 

25 OpRunUnary.__init__(self, onnx_node, desc=desc, 

26 expected_attributes=TfIdfVectorizer.atts, 

27 **options) 

28 self.rt_ = RuntimeTfIdfVectorizer() 

29 if len(self.pool_strings) != 0: 

30 pool_strings_ = numpy.array( 

31 [_.decode('utf-8') for _ in self.pool_strings]) 

32 mapping = {} 

33 pool_int64s = [] 

34 for i, w in enumerate(pool_strings_): 

35 if w not in mapping: 

36 # 1-gram are processed first. 

37 mapping[w] = i 

38 pool_int64s.append(mapping[w]) 

39 else: 

40 mapping = None 

41 pool_int64s = self.pool_int64s 

42 pool_strings_ = None 

43 

44 self.mapping_ = mapping 

45 self.pool_strings_ = pool_strings_ 

46 self.rt_.init( 

47 self.max_gram_length, self.max_skip_count, self.min_gram_length, 

48 self.mode, self.ngram_counts, self.ngram_indexes, pool_int64s, 

49 self.weights) 

50 

51 def _run(self, x, attributes=None, verbose=0, fLOG=None): # pylint: disable=W0221 

52 if self.mapping_ is None: 

53 res = self.rt_.compute(x) 

54 if len(x.shape) > 1: 

55 return (res.reshape((x.shape[0], -1)), ) 

56 return (res, ) 

57 

58 xi = numpy.empty(x.shape, dtype=numpy.int64) 

59 for i in range(0, x.shape[0]): 

60 for j in range(0, x.shape[1]): 

61 try: 

62 xi[i, j] = self.mapping_[x[i, j]] 

63 except KeyError: 

64 xi[i, j] = -1 

65 res = self.rt_.compute(xi) 

66 return (res.reshape((x.shape[0], -1)), )