Coverage for mlprodict/onnxrt/validate/validate

1"""

2@file

3@brief Measures time processing for ONNX models.

4"""

5import numpy

6from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version

7from ... import __version__ as ort_version

8from .validate_helper import default_time_kwargs, measure_time

11def make_n_rows(x, n, y=None):

12 """

13 Multiplies or reduces the rows of x to get

14 exactly *n* rows.

16 @param x matrix

17 @param n number of rows

18 @param y target (optional)

19 @return new matrix or two new matrices if y is not None

20 """

21 if n < x.shape[0]:

22 if y is None:

23 return x[:n].copy()

24 return x[:n].copy(), y[:n].copy()

25 if len(x.shape) < 2:

26 r = numpy.empty((n, ), dtype=x.dtype)

27 if y is not None:

28 ry = numpy.empty((n, ), dtype=y.dtype) # pragma: no cover

29 for i in range(0, n, x.shape[0]):

30 end = min(i + x.shape[0], n)

31 r[i: end] = x[0: end - i]

32 if y is not None:

33 ry[i: end] = y[0: end - i] # pragma: no cover

34 else:

35 r = numpy.empty((n, x.shape[1]), dtype=x.dtype)

36 if y is not None:

37 if len(y.shape) < 2:

38 ry = numpy.empty((n, ), dtype=y.dtype)

39 else:

40 ry = numpy.empty((n, y.shape[1]), dtype=y.dtype)

41 for i in range(0, n, x.shape[0]):

42 end = min(i + x.shape[0], n)

43 try:

44 r[i: end, :] = x[0: end - i, :]

45 except ValueError as e: # pragma: no cover

46 raise ValueError(

47 "Unexpected error: r.shape={} x.shape={} end={} i={}".format(

48 r.shape, x.shape, end, i)) from e

49 if y is not None:

50 if len(y.shape) < 2:

51 ry[i: end] = y[0: end - i]

52 else:

53 ry[i: end, :] = y[0: end - i, :]

54 if y is None:

55 return r

56 return r, ry

59def benchmark_fct(fct, X, time_limit=4, obs=None, node_time=False,

60 time_kwargs=None, skip_long_test=True):

61 """

62 Benchmarks a function which takes an array

63 as an input and changes the number of rows.

65 @param fct function to benchmark, signature

66 is `fct(xo)`

67 @param X array

68 @param time_limit above this time, measurement is stopped

69 @param obs all information available in a dictionary

70 @param node_time measure time execution for each node in the graph

71 @param time_kwargs to define a more precise way to measure a model

72 @param skip_long_test skips tests for high values of N if they seem too long

73 @return dictionary with the results

75 The function uses *obs* to reduce the number of tries it does.

76 :epkg:`sklearn:gaussian_process:GaussianProcessRegressor`

77 produces huge *NxN* if predict method is called

78 with ``return_cov=True``.

79 The default for *time_kwargs* is the following:

81 .. runpython::

82 :showcode:

83 :warningout: DeprecationWarning

85 from mlprodict.onnxrt.validate.validate_helper import default_time_kwargs

86 import pprint

87 pprint.pprint(default_time_kwargs())

89 See also notebook :ref:`onnxnodetimerst` to see how this function

90 can be used to measure time spent in each node.

91 """

92 if time_kwargs is None:

93 time_kwargs = default_time_kwargs() # pragma: no cover

95 def make(x, n):

96 return make_n_rows(x, n)

98 def allow(N, obs):

99 if obs is None:

100 return True # pragma: no cover

101 prob = obs['problem']

102 if "-cov" in prob and N > 1000:

103 return False # pragma: no cover

104 return True

105

106 Ns = list(sorted(time_kwargs))

107 res = {}

108 for N in Ns:

109 if not isinstance(N, int):

110 raise RuntimeError( # pragma: no cover

111 f"time_kwargs ({type(time_kwargs)}) is wrong:\n{time_kwargs}")

112 if not allow(N, obs):

113 continue # pragma: no cover

114 x = make(X, N)

115 number = time_kwargs[N]['number']

116 repeat = time_kwargs[N]['repeat']

117 if node_time:

118 fct(x)

119 main = None

120 for __ in range(repeat):

121 agg = None

122 for _ in range(number):

123 ms = fct(x)[1]

124 if agg is None:

125 agg = ms

126 for row in agg:

127 row['N'] = N

128 else:

129 if len(agg) != len(ms):

130 raise RuntimeError( # pragma: no cover

131 f"Not the same number of nodes {len(agg)} != {len(ms)}.")

132 for a, b in zip(agg, ms):

133 a['time'] += b['time']

134 if main is None:

135 main = agg

136 else:

137 if len(agg) != len(main):

138 raise RuntimeError( # pragma: no cover

139 f"Not the same number of nodes {len(agg)} != {len(main)}.")

140 for a, b in zip(main, agg):

141 a['time'] += b['time']

142 a['max_time'] = max(

143 a.get('max_time', b['time']), b['time'])

144 a['min_time'] = min(

145 a.get('min_time', b['time']), b['time'])

146 for row in main:

147 row['repeat'] = repeat

148 row['number'] = number

149 row['time'] /= repeat * number

150 if 'max_time' in row:

151 row['max_time'] /= number

152 row['min_time'] /= number

153 else:

154 row['max_time'] = row['time'] # pragma: no cover

155 row['min_time'] = row['time'] # pragma: no cover

156 res[N] = main

157 else:

158 res[N] = measure_time(fct, x, repeat=repeat,

159 number=number, div_by_number=True)

160 if (skip_long_test and not node_time and

161 res[N] is not None and

162 res[N].get('ttime', time_limit) >= time_limit):

163 # too long

164 break # pragma: no cover

165 if node_time:

166 rows = []

167 for _, v in res.items():

168 rows.extend(v)

169 return rows

170 return res

Coverage for mlprodict/onnxrt/validate/validate_benchmark.py: 100%

86 statements