Coverage for mlprodict/onnxrt/validate/validate

1"""

2@file

3@brief Scenarios for validation.

4"""

5from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version

6from sklearn.cluster import KMeans

7from sklearn.calibration import CalibratedClassifierCV

8from sklearn.decomposition import SparseCoder, LatentDirichletAllocation

9from sklearn.ensemble import (

10 VotingClassifier, AdaBoostRegressor, VotingRegressor,

11 ExtraTreesRegressor, ExtraTreesClassifier,

12 RandomForestRegressor, RandomForestClassifier,

13 HistGradientBoostingRegressor, HistGradientBoostingClassifier,

14 AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor,

15 IsolationForest)

16from sklearn.feature_extraction import DictVectorizer, FeatureHasher

17from sklearn.feature_selection import (

18 SelectFromModel, SelectPercentile, RFE, RFECV,

19 SelectKBest, SelectFwe)

20from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier

21from sklearn.gaussian_process.kernels import ExpSineSquared, DotProduct, RationalQuadratic, RBF

22from sklearn.linear_model import (

23 LogisticRegression, LogisticRegressionCV, SGDClassifier,

24 LinearRegression, Perceptron, RidgeClassifier, RidgeClassifierCV,

25 PassiveAggressiveClassifier)

26from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

27from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier

28from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain

29from sklearn.neighbors import (

30 LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier,

31 RadiusNeighborsRegressor, RadiusNeighborsClassifier)

32from sklearn.neural_network import MLPClassifier

33from sklearn.preprocessing import Normalizer, PowerTransformer

34from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection

35from sklearn.svm import SVC, NuSVC, SVR

36from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier

38try:

39 from sklearn.ensemble import StackingClassifier, StackingRegressor

40except ImportError: # pragma: no cover

41 # new in 0.22

42 StackingClassifier, StackingRegressor = None, None

45def build_custom_scenarios():

46 """

47 Defines parameters values for some operators.

49 .. runpython::

50 :showcode:

51 :warningout: DeprecationWarning

53 from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios

54 import pprint

55 pprint.pprint(build_custom_scenarios())

56 """

57 options = {

58 # skips

59 SparseCoder: None,

60 # scenarios

61 AdaBoostClassifier: [

62 ('default', {'n_estimators': 10},

63 {'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}),

64 ],

65 AdaBoostRegressor: [

66 ('default', {'n_estimators': 10}),

67 ],

68 CalibratedClassifierCV: [

69 ('sgd', {

70 'base_estimator': SGDClassifier(),

71 }),

72 ('default', {}),

73 ],

74 ClassifierChain: [

75 ('logreg', {

76 'base_estimator': LogisticRegression(solver='liblinear'),

77 })

78 ],

79 DecisionTreeClassifier: [

80 ('default', {}, {'conv_options': [

81 {DecisionTreeClassifier: {'zipmap': False}}]})

82 ],

83 DictVectorizer: [

84 ('default', {}),

85 ],

86 ExtraTreeClassifier: [

87 ('default', {},

88 {'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}),

89 ],

90 ExtraTreesClassifier: [

91 ('default', {'n_estimators': 10},

92 {'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}),

93 ],

94 ExtraTreesRegressor: [

95 ('default', {'n_estimators': 10}),

96 ],

97 FeatureHasher: [

98 ('default', {}),

99 ],

100 GaussianProcessClassifier: [

101 ('expsine', {

102 'kernel': ExpSineSquared(),

103 }, {'conv_options': [{}, {GaussianProcessClassifier: {'optim': 'cdist'}}]}),

104 ('dotproduct', {

105 'kernel': DotProduct(),

106 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),

107 ('rational', {

108 'kernel': RationalQuadratic(),

109 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),

110 ('rbf', {

111 'kernel': RBF(),

112 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),

113 ],

114 GaussianProcessRegressor: [

115 ('expsine', {

116 'kernel': ExpSineSquared(),

117 'alpha': 20.,

118 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),

119 ('dotproduct', {

120 'kernel': DotProduct(),

121 'alpha': 100.,

122 }, {'conv_options': [{}, {GaussianProcessRegressor: {'optim': 'cdist'}}]}),

123 ('rational', {

124 'kernel': RationalQuadratic(),

125 'alpha': 100.,

126 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),

127 ('rbf', {

128 'kernel': RBF(),

129 'alpha': 100.,

130 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),

131 ],

132 GaussianRandomProjection: [

133 ('eps95', {'eps': 0.95}),

134 ],

135 GradientBoostingClassifier: [

136 ('default', {'n_estimators': 200},

137 {'conv_options': [{GradientBoostingClassifier: {'zipmap': False}}]}),

138 ],

139 GradientBoostingRegressor: [

140 ('default', {'n_estimators': 200}),

141 ],

142 GridSearchCV: [

143 ('cl', {

144 'estimator': LogisticRegression(solver='liblinear'),

145 'n_jobs': 1,

146 'param_grid': {'fit_intercept': [False, True]}},

147 {'conv_options': [{GridSearchCV: {'zipmap': False}}],

148 'subset_problems': ['b-cl', 'm-cl', '~b-cl-64']}),

149 ('reg', {

150 'estimator': LinearRegression(), 'n_jobs': 1,

151 'param_grid': {'fit_intercept': [False, True]},

152 }, ['b-reg', 'm-reg', '~b-reg-64']),

153 ('reg', {

154 'estimator': KMeans(), 'n_jobs': 1,

155 'param_grid': {'n_clusters': [2, 3]},

156 }, ['cluster']),

157 ],

158 HistGradientBoostingClassifier: [

159 ('default', {'max_iter': 100},

160 {'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}),

161 ],

162 HistGradientBoostingRegressor: [

163 ('default', {'max_iter': 100}),

164 ],

165 IsolationForest: [

166 ('default', {'n_estimators': 10}),

167 ],

168 KNeighborsClassifier: [

169 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},

170 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),

171 ('weights_k3', {'algorithm': 'brute',

172 'weights': 'distance', 'n_neighbors': 3},

173 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),

174 ],

175 KNeighborsRegressor: [

176 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},

177 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),

178 ('weights_k3', {'algorithm': 'brute',

179 'weights': 'distance', 'n_neighbors': 3},

180 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),

181 ],

182 LatentDirichletAllocation: [

183 ('default', {'n_components': 2}),

184 ],

185 LocalOutlierFactor: [

186 ('novelty', {'novelty': True}),

187 ],

188 LogisticRegression: [

189 ('liblinear', {'solver': 'liblinear', },

190 {'optim': [None, 'onnx'],

191 'conv_options': [{}, {LogisticRegression: {'zipmap': False}}],

192 'subset_problems': ['b-cl', '~b-cl-64', 'm-cl']}),

193 ('liblinear-dec',

194 {'solver': 'liblinear', },

195 {'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}],

196 'subset_problems': ['~b-cl-dec', '~m-cl-dec']}),

197 ],

198 LogisticRegressionCV: [

199 ('default', {},

200 {'conv_options': [{LogisticRegressionCV: {'zipmap': False}}]}),

201 ],

202 MLPClassifier: [

203 ('default', {}, {'conv_options': [

204 {MLPClassifier: {'zipmap': False}}]}),

205 ],

206 MultiOutputClassifier: [

207 ('logreg', {

208 'estimator': LogisticRegression(solver='liblinear')},

209 {'conv_options': [{MultiOutputClassifier: {'zipmap': False}}]},)

210 ],

211 MultiOutputRegressor: [

212 ('linreg', {

213 'estimator': LinearRegression(),

214 })

215 ],

216 Normalizer: [

217 ('l2', {'norm': 'l2', }),

218 ('l1', {'norm': 'l1', }),

219 ('max', {'norm': 'max', }),

220 ],

221 NuSVC: [

222 ('prob', {

223 'probability': True,

224 }),

225 ],

226 OneVsOneClassifier: [

227 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},

228 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})

229 ],

230 OneVsRestClassifier: [

231 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},

232 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})

233 ],

234 OutputCodeClassifier: [

235 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},

236 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})

237 ],

238 PassiveAggressiveClassifier: [

239 ('logreg', {}, {'conv_options': [

240 {PassiveAggressiveClassifier: {'zipmap': False}}]})

241 ],

242 Perceptron: [

243 ('logreg', {}, {'conv_options': [{Perceptron: {'zipmap': False}}]})

244 ],

245 PowerTransformer: [

246 ('yeo-johnson', {'method': 'yeo-johnson'}),

247 ('box-cox', {'method': 'box-cox'}),

248 ],

249 RadiusNeighborsClassifier: [

250 ('default_k3', {'algorithm': 'brute'},

251 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),

252 ('weights_k3', {'algorithm': 'brute', 'weights': 'distance'},

253 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),

254 ],

255 RadiusNeighborsRegressor: [

256 ('default_k3', {'algorithm': 'brute'},

257 {'conv_options': [{}, {RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),

258 ('weights_k3', {'algorithm': 'brute', 'weights': 'distance'},

259 {'conv_options': [{RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),

260 ],

261 RandomForestClassifier: [

262 ('default', {'n_estimators': 10},

263 {'conv_options': [{RandomForestClassifier: {'zipmap': False}}]}),

264 ],

265 RandomForestRegressor: [

266 ('default', {'n_estimators': 10}),

267 ],

268 RandomizedSearchCV: [

269 ('cl', {

270 'estimator': LogisticRegression(solver='liblinear'),

271 'param_distributions': {'fit_intercept': [False, True]},

272 }),

273 ('reg', {

274 'estimator': LinearRegression(),

275 'param_distributions': {'fit_intercept': [False, True]},

276 }),

277 ],

278 RegressorChain: [

279 ('linreg', {

280 'base_estimator': LinearRegression(),

281 })

282 ],

283 RidgeClassifier: [

284 ('default', {},

285 {'conv_options': [{RidgeClassifier: {'zipmap': False}}]}),

286 ],

287 RidgeClassifierCV: [

288 ('default', {},

289 {'conv_options': [{RidgeClassifierCV: {'zipmap': False}}]}),

290 ],

291 RFE: [

292 ('reg', {

293 'estimator': LinearRegression(),

294 })

295 ],

296 RFECV: [

297 ('reg', {

298 'estimator': LinearRegression(),

299 })

300 ],

301 SelectFromModel: [

302 ('rf', {

303 'estimator': DecisionTreeRegressor(),

304 }),

305 ],

306 SelectFwe: [

307 ('alpha100', {

308 'alpha': 0.5,

309 }),

310 ],

311 SelectKBest: [

312 ('k2', {

313 'k': 2,

314 }),

315 ],

316 SelectPercentile: [

317 ('p50', {

318 'percentile': 50,

319 }),

320 ],

321 SGDClassifier: [

322 ('log', {'loss': 'log'},

323 {'conv_options': [{SGDClassifier: {'zipmap': False}}]}),

324 ],

325 SparseRandomProjection: [

326 ('eps95', {'eps': 0.95}),

327 ],

328 SVC: [

329 ('linear', {'probability': True, 'kernel': 'linear'},

330 {'conv_options': [{SVC: {'zipmap': False}}]}),

331 ('poly', {'probability': True, 'kernel': 'poly'},

332 {'conv_options': [{SVC: {'zipmap': False}}]}),

333 ('rbf', {'probability': True, 'kernel': 'rbf'},

334 {'conv_options': [{SVC: {'zipmap': False}}]}),

335 ('sigmoid', {'probability': True, 'kernel': 'sigmoid'},

336 {'conv_options': [{SVC: {'zipmap': False}}]}),

337 ],

338 SVR: [

339 ('linear', {'kernel': 'linear'}),

340 ('poly', {'kernel': 'poly'}),

341 ('rbf', {'kernel': 'rbf'}),

342 ('sigmoid', {'kernel': 'sigmoid'}),

343 ],

344 VotingClassifier: [

345 ('logreg-noflatten', {

346 'voting': 'soft',

347 'flatten_transform': False,

348 'estimators': [

349 ('lr1', LogisticRegression(

350 solver='liblinear', fit_intercept=True)),

351 ('lr2', LogisticRegression(

352 solver='liblinear', fit_intercept=False)),

353 ],

354 }, {'conv_options': [{VotingClassifier: {'zipmap': False}}]})

355 ],

356 VotingRegressor: [

357 ('linreg', {

358 'estimators': [

359 ('lr1', LinearRegression()),

360 ('lr2', LinearRegression(fit_intercept=False)),

361 ],

362 })

363 ],

364 }

365 if StackingClassifier is not None and StackingRegressor is not None:

366 options.update({

367 StackingClassifier: [

368 ('logreg', {

369 'estimators': [

370 ('lr1', LogisticRegression(solver='liblinear')),

371 ('lr2', LogisticRegression(

372 solver='liblinear', fit_intercept=False)),

373 ],

374 }, {'conv_options': [{StackingClassifier: {'zipmap': False}}]})

375 ],

376 StackingRegressor: [

377 ('linreg', {

378 'estimators': [

379 ('lr1', LinearRegression()),

380 ('lr2', LinearRegression(fit_intercept=False)),

381 ],

382 })

383 ],

384 })

385 return options

386

387

388def interpret_options_from_string(st):

389 """

390 Converts a string into a dictionary.

391

392 @param st string

393 @return evaluated object

394 """

395 if isinstance(st, dict):

396 return st # pragma: no cover

397 value = eval(st) # pylint: disable=W0123

398 return value

399

400

401_extra_parameters = build_custom_scenarios()

Coverage for mlprodict/onnxrt/validate/validate_scenarios.py: 100%

31 statements