Coverage for mlprodict/testing/model_verification.py: 100%
98 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-04 02:28 +0100
1"""
2@file
3@brief Complex but recurring testing functions.
4"""
5import random
6import pandas
7import numpy
8from numpy.testing import assert_allclose
9from ..grammar.cc import compile_c_function
10from ..grammar.cc.c_compilation import CompilationError
13def iris_data():
14 """
15 Returns ``(X, y)`` for iris data.
16 """
17 from sklearn.datasets import load_iris
18 iris = load_iris()
19 X = iris.data[:, :2] # pylint: disable=E1101
20 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
21 rnd = state.randn(*X.shape) / 3
22 X += rnd
23 y = iris.target # pylint: disable=E1101
24 return X, y
27def check_is_almost_equal(xv, exp, precision=1e-5, message=None):
28 """
29 Checks that two floats or two arrays are almost equal.
31 @param xv float or vector
32 @param exp expected value
33 @param precision precision
34 @param message additional message
35 """
36 if isinstance(exp, float) or len(exp.ravel()) == 1:
37 if not (isinstance(xv, float) or len(xv.ravel()) == 1):
38 raise TypeError( # pragma: no cover
39 f"Type mismatch between {type(xv)} and {type(exp)} (expected).")
40 diff = abs(xv - exp)
41 if diff > 1e-5:
42 raise ValueError( # pragma: no cover
43 f"Predictions are different expected={exp}, computed={xv}")
44 else:
45 if not isinstance(xv, numpy.ndarray):
46 raise TypeError(
47 f"Type mismatch between {type(xv)} and {type(exp)} (expected).")
48 xv = xv.ravel()
49 exp = exp.ravel()
50 try:
51 assert_allclose(xv, exp, atol=precision)
52 except AssertionError as e:
53 if message is None:
54 raise e
55 else:
56 raise AssertionError(message) from e # pragma: no cover
59def check_model_representation(model, X, y=None, convs=None,
60 output_names=None, only_float=True,
61 verbose=False, suffix="", fLOG=None):
62 """
63 Checks that a trained model can be exported in a specific list
64 of formats and produces the same outputs if the
65 representation can be used to predict.
67 @param model model (a class or an instance of a model but not trained)
68 @param X features
69 @param y targets
70 @param convs list of format to check, all possible by default ``['json', 'c']``
71 @param output_names list of output columns
72 (can be None, a default value is infered based on scikit-learn output then)
73 @param verbose print some information
74 @param suffix add this to disambiguate module
75 @param fLOG logging function
76 @return function to call to run the prediction
77 """
78 if not only_float:
79 raise NotImplementedError( # pragma: no cover
80 "Only float are allowed.")
81 if isinstance(X, list):
82 X = pandas.DataFrame(X)
83 if len(X.shape) != 2:
84 raise ValueError( # pragma: no cover
85 "X cannot be converted into a proper DataFrame. It has shape {0}."
86 "".format(X.shape))
87 if only_float:
88 X = X.values
89 if isinstance(y, list):
90 y = numpy.array(y)
91 if convs is None:
92 convs = ['json', 'c']
94 # sklearn
95 if not hasattr(model.__class__, "fit"):
96 # It is a class object and not an instance.
97 # We use the default values.
98 model = model()
100 model.fit(X, y)
101 h = random.randint(0, X.shape[0] - 1)
102 if isinstance(X, pandas.DataFrame):
103 oneX = X.iloc[h, :].astype(numpy.float32)
104 else:
105 oneX = X[h, :].ravel().astype(numpy.float32)
107 # model or transform
108 moneX = numpy.resize(oneX, (1, len(oneX)))
109 if hasattr(model, "predict"):
110 ske = model.predict(moneX)
111 else:
112 ske = model.transform(moneX)
114 if verbose and fLOG:
115 fLOG("---------------------")
116 fLOG(type(oneX), oneX.dtype)
117 fLOG(model)
118 for k, v in sorted(model.__dict__.items()):
119 if k[-1] == '_':
120 fLOG(f" {k}={v}")
121 fLOG("---------------------")
123 # grammar
124 from ..grammar.grammar_sklearn import sklearn2graph
125 gr = sklearn2graph(model, output_names=output_names)
126 lot = gr.execute(Features=oneX)
127 if verbose and fLOG:
128 fLOG(gr.graph_execution())
130 # verification
131 check_is_almost_equal(lot, ske)
133 # default for output_names
134 if output_names is None:
135 if len(ske.shape) == 1:
136 output_names = ["Prediction"]
137 elif len(ske.shape) == 2:
138 output_names = ["p%d" % i for i in range(ske.shape[1])]
139 else:
140 raise ValueError( # pragma: no cover
141 "Cannot guess default values for output_names.")
143 for lang in convs:
144 if lang in ('c', ):
145 code_c = gr.export(lang=lang)['code']
146 if code_c is None:
147 raise ValueError("cannot be None") # pragma: no cover
149 compile_fct = compile_c_function
151 from contextlib import redirect_stdout, redirect_stderr
152 from io import StringIO
153 fout = StringIO()
154 ferr = StringIO()
155 with redirect_stdout(fout):
156 with redirect_stderr(ferr):
157 try:
158 fct = compile_fct(
159 code_c, len(output_names), suffix=suffix,
160 fLOG=lambda s: fout.write(s + "\n"))
161 except Exception as e: # pragma: no cover
162 raise CompilationError(
163 "Unable to compile a code\n-OUT-\n{0}\n-ERR-\n{1}"
164 "\n-CODE-\n{2}\n-----------\n{3}".format(
165 fout.getvalue(), ferr.getvalue(),
166 code_c, e)) from e
168 if verbose and fLOG:
169 fLOG("-----------------")
170 fLOG(output_names)
171 fLOG("-----------------")
172 fLOG(code_c)
173 fLOG("-----------------")
174 fLOG("h=", h, "oneX=", oneX)
175 fLOG("-----------------")
176 lotc = fct(oneX)
177 check_is_almost_equal(
178 lotc, ske, message=f"Issue with lang='{lang}'")
179 lotc_exp = lotc.copy()
180 lotc2 = fct(oneX, lotc)
181 if not numpy.array_equal(lotc_exp, lotc2):
182 raise ValueError( # pragma: no cover
183 f"Second call returns different results.\n{lotc_exp}\n{lotc2}")
184 else:
185 ser = gr.export(lang="json", hook={'array': lambda v: v.tolist()})
186 if ser is None:
187 raise ValueError( # pragma: no cover
188 f"No output for long='{lang}'")