Coverage for mlprodict/onnxrt/validate/validate_benchmark.py: 100%

86 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1""" 

2@file 

3@brief Measures time processing for ONNX models. 

4""" 

5import numpy 

6from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version 

7from ... import __version__ as ort_version 

8from .validate_helper import default_time_kwargs, measure_time 

9 

10 

11def make_n_rows(x, n, y=None): 

12 """ 

13 Multiplies or reduces the rows of x to get 

14 exactly *n* rows. 

15 

16 @param x matrix 

17 @param n number of rows 

18 @param y target (optional) 

19 @return new matrix or two new matrices if y is not None 

20 """ 

21 if n < x.shape[0]: 

22 if y is None: 

23 return x[:n].copy() 

24 return x[:n].copy(), y[:n].copy() 

25 if len(x.shape) < 2: 

26 r = numpy.empty((n, ), dtype=x.dtype) 

27 if y is not None: 

28 ry = numpy.empty((n, ), dtype=y.dtype) # pragma: no cover 

29 for i in range(0, n, x.shape[0]): 

30 end = min(i + x.shape[0], n) 

31 r[i: end] = x[0: end - i] 

32 if y is not None: 

33 ry[i: end] = y[0: end - i] # pragma: no cover 

34 else: 

35 r = numpy.empty((n, x.shape[1]), dtype=x.dtype) 

36 if y is not None: 

37 if len(y.shape) < 2: 

38 ry = numpy.empty((n, ), dtype=y.dtype) 

39 else: 

40 ry = numpy.empty((n, y.shape[1]), dtype=y.dtype) 

41 for i in range(0, n, x.shape[0]): 

42 end = min(i + x.shape[0], n) 

43 try: 

44 r[i: end, :] = x[0: end - i, :] 

45 except ValueError as e: # pragma: no cover 

46 raise ValueError( 

47 "Unexpected error: r.shape={} x.shape={} end={} i={}".format( 

48 r.shape, x.shape, end, i)) from e 

49 if y is not None: 

50 if len(y.shape) < 2: 

51 ry[i: end] = y[0: end - i] 

52 else: 

53 ry[i: end, :] = y[0: end - i, :] 

54 if y is None: 

55 return r 

56 return r, ry 

57 

58 

59def benchmark_fct(fct, X, time_limit=4, obs=None, node_time=False, 

60 time_kwargs=None, skip_long_test=True): 

61 """ 

62 Benchmarks a function which takes an array 

63 as an input and changes the number of rows. 

64 

65 @param fct function to benchmark, signature 

66 is `fct(xo)` 

67 @param X array 

68 @param time_limit above this time, measurement is stopped 

69 @param obs all information available in a dictionary 

70 @param node_time measure time execution for each node in the graph 

71 @param time_kwargs to define a more precise way to measure a model 

72 @param skip_long_test skips tests for high values of N if they seem too long 

73 @return dictionary with the results 

74 

75 The function uses *obs* to reduce the number of tries it does. 

76 :epkg:`sklearn:gaussian_process:GaussianProcessRegressor` 

77 produces huge *NxN* if predict method is called 

78 with ``return_cov=True``. 

79 The default for *time_kwargs* is the following: 

80 

81 .. runpython:: 

82 :showcode: 

83 :warningout: DeprecationWarning 

84 

85 from mlprodict.onnxrt.validate.validate_helper import default_time_kwargs 

86 import pprint 

87 pprint.pprint(default_time_kwargs()) 

88 

89 See also notebook :ref:`onnxnodetimerst` to see how this function 

90 can be used to measure time spent in each node. 

91 """ 

92 if time_kwargs is None: 

93 time_kwargs = default_time_kwargs() # pragma: no cover 

94 

95 def make(x, n): 

96 return make_n_rows(x, n) 

97 

98 def allow(N, obs): 

99 if obs is None: 

100 return True # pragma: no cover 

101 prob = obs['problem'] 

102 if "-cov" in prob and N > 1000: 

103 return False # pragma: no cover 

104 return True 

105 

106 Ns = list(sorted(time_kwargs)) 

107 res = {} 

108 for N in Ns: 

109 if not isinstance(N, int): 

110 raise RuntimeError( # pragma: no cover 

111 f"time_kwargs ({type(time_kwargs)}) is wrong:\n{time_kwargs}") 

112 if not allow(N, obs): 

113 continue # pragma: no cover 

114 x = make(X, N) 

115 number = time_kwargs[N]['number'] 

116 repeat = time_kwargs[N]['repeat'] 

117 if node_time: 

118 fct(x) 

119 main = None 

120 for __ in range(repeat): 

121 agg = None 

122 for _ in range(number): 

123 ms = fct(x)[1] 

124 if agg is None: 

125 agg = ms 

126 for row in agg: 

127 row['N'] = N 

128 else: 

129 if len(agg) != len(ms): 

130 raise RuntimeError( # pragma: no cover 

131 f"Not the same number of nodes {len(agg)} != {len(ms)}.") 

132 for a, b in zip(agg, ms): 

133 a['time'] += b['time'] 

134 if main is None: 

135 main = agg 

136 else: 

137 if len(agg) != len(main): 

138 raise RuntimeError( # pragma: no cover 

139 f"Not the same number of nodes {len(agg)} != {len(main)}.") 

140 for a, b in zip(main, agg): 

141 a['time'] += b['time'] 

142 a['max_time'] = max( 

143 a.get('max_time', b['time']), b['time']) 

144 a['min_time'] = min( 

145 a.get('min_time', b['time']), b['time']) 

146 for row in main: 

147 row['repeat'] = repeat 

148 row['number'] = number 

149 row['time'] /= repeat * number 

150 if 'max_time' in row: 

151 row['max_time'] /= number 

152 row['min_time'] /= number 

153 else: 

154 row['max_time'] = row['time'] # pragma: no cover 

155 row['min_time'] = row['time'] # pragma: no cover 

156 res[N] = main 

157 else: 

158 res[N] = measure_time(fct, x, repeat=repeat, 

159 number=number, div_by_number=True) 

160 if (skip_long_test and not node_time and 

161 res[N] is not None and 

162 res[N].get('ttime', time_limit) >= time_limit): 

163 # too long 

164 break # pragma: no cover 

165 if node_time: 

166 rows = [] 

167 for _, v in res.items(): 

168 rows.extend(v) 

169 return rows 

170 return res