Coverage for mlprodict/testing/model

1"""

2@file

3@brief Complex but recurring testing functions.

4"""

5import random

6import pandas

7import numpy

8from numpy.testing import assert_allclose

9from ..grammar.cc import compile_c_function

10from ..grammar.cc.c_compilation import CompilationError

13def iris_data():

14 """

15 Returns ``(X, y)`` for iris data.

16 """

17 from sklearn.datasets import load_iris

18 iris = load_iris()

19 X = iris.data[:, :2] # pylint: disable=E1101

20 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101

21 rnd = state.randn(*X.shape) / 3

22 X += rnd

23 y = iris.target # pylint: disable=E1101

24 return X, y

27def check_is_almost_equal(xv, exp, precision=1e-5, message=None):

28 """

29 Checks that two floats or two arrays are almost equal.

31 @param xv float or vector

32 @param exp expected value

33 @param precision precision

34 @param message additional message

35 """

36 if isinstance(exp, float) or len(exp.ravel()) == 1:

37 if not (isinstance(xv, float) or len(xv.ravel()) == 1):

38 raise TypeError( # pragma: no cover

39 f"Type mismatch between {type(xv)} and {type(exp)} (expected).")

40 diff = abs(xv - exp)

41 if diff > 1e-5:

42 raise ValueError( # pragma: no cover

43 f"Predictions are different expected={exp}, computed={xv}")

44 else:

45 if not isinstance(xv, numpy.ndarray):

46 raise TypeError(

47 f"Type mismatch between {type(xv)} and {type(exp)} (expected).")

48 xv = xv.ravel()

49 exp = exp.ravel()

50 try:

51 assert_allclose(xv, exp, atol=precision)

52 except AssertionError as e:

53 if message is None:

54 raise e

55 else:

56 raise AssertionError(message) from e # pragma: no cover

59def check_model_representation(model, X, y=None, convs=None,

60 output_names=None, only_float=True,

61 verbose=False, suffix="", fLOG=None):

62 """

63 Checks that a trained model can be exported in a specific list

64 of formats and produces the same outputs if the

65 representation can be used to predict.

67 @param model model (a class or an instance of a model but not trained)

68 @param X features

69 @param y targets

70 @param convs list of format to check, all possible by default ``['json', 'c']``

71 @param output_names list of output columns

72 (can be None, a default value is infered based on scikit-learn output then)

73 @param verbose print some information

74 @param suffix add this to disambiguate module

75 @param fLOG logging function

76 @return function to call to run the prediction

77 """

78 if not only_float:

79 raise NotImplementedError( # pragma: no cover

80 "Only float are allowed.")

81 if isinstance(X, list):

82 X = pandas.DataFrame(X)

83 if len(X.shape) != 2:

84 raise ValueError( # pragma: no cover

85 "X cannot be converted into a proper DataFrame. It has shape {0}."

86 "".format(X.shape))

87 if only_float:

88 X = X.values

89 if isinstance(y, list):

90 y = numpy.array(y)

91 if convs is None:

92 convs = ['json', 'c']

94 # sklearn

95 if not hasattr(model.__class__, "fit"):

96 # It is a class object and not an instance.

97 # We use the default values.

98 model = model()

100 model.fit(X, y)

101 h = random.randint(0, X.shape[0] - 1)

102 if isinstance(X, pandas.DataFrame):

103 oneX = X.iloc[h, :].astype(numpy.float32)

104 else:

105 oneX = X[h, :].ravel().astype(numpy.float32)

106

107 # model or transform

108 moneX = numpy.resize(oneX, (1, len(oneX)))

109 if hasattr(model, "predict"):

110 ske = model.predict(moneX)

111 else:

112 ske = model.transform(moneX)

113

114 if verbose and fLOG:

115 fLOG("---------------------")

116 fLOG(type(oneX), oneX.dtype)

117 fLOG(model)

118 for k, v in sorted(model.__dict__.items()):

119 if k[-1] == '_':

120 fLOG(f" {k}={v}")

121 fLOG("---------------------")

122

123 # grammar

124 from ..grammar.grammar_sklearn import sklearn2graph

125 gr = sklearn2graph(model, output_names=output_names)

126 lot = gr.execute(Features=oneX)

127 if verbose and fLOG:

128 fLOG(gr.graph_execution())

129

130 # verification

131 check_is_almost_equal(lot, ske)

132

133 # default for output_names

134 if output_names is None:

135 if len(ske.shape) == 1:

136 output_names = ["Prediction"]

137 elif len(ske.shape) == 2:

138 output_names = ["p%d" % i for i in range(ske.shape[1])]

139 else:

140 raise ValueError( # pragma: no cover

141 "Cannot guess default values for output_names.")

142

143 for lang in convs:

144 if lang in ('c', ):

145 code_c = gr.export(lang=lang)['code']

146 if code_c is None:

147 raise ValueError("cannot be None") # pragma: no cover

148

149 compile_fct = compile_c_function

150

151 from contextlib import redirect_stdout, redirect_stderr

152 from io import StringIO

153 fout = StringIO()

154 ferr = StringIO()

155 with redirect_stdout(fout):

156 with redirect_stderr(ferr):

157 try:

158 fct = compile_fct(

159 code_c, len(output_names), suffix=suffix,

160 fLOG=lambda s: fout.write(s + "\n"))

161 except Exception as e: # pragma: no cover

162 raise CompilationError(

163 "Unable to compile a code\n-OUT-\n{0}\n-ERR-\n{1}"

164 "\n-CODE-\n{2}\n-----------\n{3}".format(

165 fout.getvalue(), ferr.getvalue(),

166 code_c, e)) from e

167

168 if verbose and fLOG:

169 fLOG("-----------------")

170 fLOG(output_names)

171 fLOG("-----------------")

172 fLOG(code_c)

173 fLOG("-----------------")

174 fLOG("h=", h, "oneX=", oneX)

175 fLOG("-----------------")

176 lotc = fct(oneX)

177 check_is_almost_equal(

178 lotc, ske, message=f"Issue with lang='{lang}'")

179 lotc_exp = lotc.copy()

180 lotc2 = fct(oneX, lotc)

181 if not numpy.array_equal(lotc_exp, lotc2):

182 raise ValueError( # pragma: no cover

183 f"Second call returns different results.\n{lotc_exp}\n{lotc2}")

184 else:

185 ser = gr.export(lang="json", hook={'array': lambda v: v.tolist()})

186 if ser is None:

187 raise ValueError( # pragma: no cover

188 f"No output for long='{lang}'")

Coverage for mlprodict/testing/model_verification.py: 100%

98 statements