Coverage for mlprodict/onnxrt/validate/validate_scenarios.py: 100%

31 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-04 02:28 +0100

1""" 

2@file 

3@brief Scenarios for validation. 

4""" 

5from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version 

6from sklearn.cluster import KMeans 

7from sklearn.calibration import CalibratedClassifierCV 

8from sklearn.decomposition import SparseCoder, LatentDirichletAllocation 

9from sklearn.ensemble import ( 

10 VotingClassifier, AdaBoostRegressor, VotingRegressor, 

11 ExtraTreesRegressor, ExtraTreesClassifier, 

12 RandomForestRegressor, RandomForestClassifier, 

13 HistGradientBoostingRegressor, HistGradientBoostingClassifier, 

14 AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor, 

15 IsolationForest) 

16from sklearn.feature_extraction import DictVectorizer, FeatureHasher 

17from sklearn.feature_selection import ( 

18 SelectFromModel, SelectPercentile, RFE, RFECV, 

19 SelectKBest, SelectFwe) 

20from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier 

21from sklearn.gaussian_process.kernels import ExpSineSquared, DotProduct, RationalQuadratic, RBF 

22from sklearn.linear_model import ( 

23 LogisticRegression, LogisticRegressionCV, SGDClassifier, 

24 LinearRegression, Perceptron, RidgeClassifier, RidgeClassifierCV, 

25 PassiveAggressiveClassifier) 

26from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 

27from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier 

28from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain 

29from sklearn.neighbors import ( 

30 LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier, 

31 RadiusNeighborsRegressor, RadiusNeighborsClassifier) 

32from sklearn.neural_network import MLPClassifier 

33from sklearn.preprocessing import Normalizer, PowerTransformer 

34from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection 

35from sklearn.svm import SVC, NuSVC, SVR 

36from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier 

37 

38try: 

39 from sklearn.ensemble import StackingClassifier, StackingRegressor 

40except ImportError: # pragma: no cover 

41 # new in 0.22 

42 StackingClassifier, StackingRegressor = None, None 

43 

44 

45def build_custom_scenarios(): 

46 """ 

47 Defines parameters values for some operators. 

48 

49 .. runpython:: 

50 :showcode: 

51 :warningout: DeprecationWarning 

52 

53 from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios 

54 import pprint 

55 pprint.pprint(build_custom_scenarios()) 

56 """ 

57 options = { 

58 # skips 

59 SparseCoder: None, 

60 # scenarios 

61 AdaBoostClassifier: [ 

62 ('default', {'n_estimators': 10}, 

63 {'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}), 

64 ], 

65 AdaBoostRegressor: [ 

66 ('default', {'n_estimators': 10}), 

67 ], 

68 CalibratedClassifierCV: [ 

69 ('sgd', { 

70 'base_estimator': SGDClassifier(), 

71 }), 

72 ('default', {}), 

73 ], 

74 ClassifierChain: [ 

75 ('logreg', { 

76 'base_estimator': LogisticRegression(solver='liblinear'), 

77 }) 

78 ], 

79 DecisionTreeClassifier: [ 

80 ('default', {}, {'conv_options': [ 

81 {DecisionTreeClassifier: {'zipmap': False}}]}) 

82 ], 

83 DictVectorizer: [ 

84 ('default', {}), 

85 ], 

86 ExtraTreeClassifier: [ 

87 ('default', {}, 

88 {'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}), 

89 ], 

90 ExtraTreesClassifier: [ 

91 ('default', {'n_estimators': 10}, 

92 {'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}), 

93 ], 

94 ExtraTreesRegressor: [ 

95 ('default', {'n_estimators': 10}), 

96 ], 

97 FeatureHasher: [ 

98 ('default', {}), 

99 ], 

100 GaussianProcessClassifier: [ 

101 ('expsine', { 

102 'kernel': ExpSineSquared(), 

103 }, {'conv_options': [{}, {GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

104 ('dotproduct', { 

105 'kernel': DotProduct(), 

106 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

107 ('rational', { 

108 'kernel': RationalQuadratic(), 

109 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

110 ('rbf', { 

111 'kernel': RBF(), 

112 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

113 ], 

114 GaussianProcessRegressor: [ 

115 ('expsine', { 

116 'kernel': ExpSineSquared(), 

117 'alpha': 20., 

118 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

119 ('dotproduct', { 

120 'kernel': DotProduct(), 

121 'alpha': 100., 

122 }, {'conv_options': [{}, {GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

123 ('rational', { 

124 'kernel': RationalQuadratic(), 

125 'alpha': 100., 

126 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

127 ('rbf', { 

128 'kernel': RBF(), 

129 'alpha': 100., 

130 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

131 ], 

132 GaussianRandomProjection: [ 

133 ('eps95', {'eps': 0.95}), 

134 ], 

135 GradientBoostingClassifier: [ 

136 ('default', {'n_estimators': 200}, 

137 {'conv_options': [{GradientBoostingClassifier: {'zipmap': False}}]}), 

138 ], 

139 GradientBoostingRegressor: [ 

140 ('default', {'n_estimators': 200}), 

141 ], 

142 GridSearchCV: [ 

143 ('cl', { 

144 'estimator': LogisticRegression(solver='liblinear'), 

145 'n_jobs': 1, 

146 'param_grid': {'fit_intercept': [False, True]}}, 

147 {'conv_options': [{GridSearchCV: {'zipmap': False}}], 

148 'subset_problems': ['b-cl', 'm-cl', '~b-cl-64']}), 

149 ('reg', { 

150 'estimator': LinearRegression(), 'n_jobs': 1, 

151 'param_grid': {'fit_intercept': [False, True]}, 

152 }, ['b-reg', 'm-reg', '~b-reg-64']), 

153 ('reg', { 

154 'estimator': KMeans(), 'n_jobs': 1, 

155 'param_grid': {'n_clusters': [2, 3]}, 

156 }, ['cluster']), 

157 ], 

158 HistGradientBoostingClassifier: [ 

159 ('default', {'max_iter': 100}, 

160 {'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}), 

161 ], 

162 HistGradientBoostingRegressor: [ 

163 ('default', {'max_iter': 100}), 

164 ], 

165 IsolationForest: [ 

166 ('default', {'n_estimators': 10}), 

167 ], 

168 KNeighborsClassifier: [ 

169 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3}, 

170 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

171 ('weights_k3', {'algorithm': 'brute', 

172 'weights': 'distance', 'n_neighbors': 3}, 

173 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

174 ], 

175 KNeighborsRegressor: [ 

176 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3}, 

177 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}), 

178 ('weights_k3', {'algorithm': 'brute', 

179 'weights': 'distance', 'n_neighbors': 3}, 

180 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}), 

181 ], 

182 LatentDirichletAllocation: [ 

183 ('default', {'n_components': 2}), 

184 ], 

185 LocalOutlierFactor: [ 

186 ('novelty', {'novelty': True}), 

187 ], 

188 LogisticRegression: [ 

189 ('liblinear', {'solver': 'liblinear', }, 

190 {'optim': [None, 'onnx'], 

191 'conv_options': [{}, {LogisticRegression: {'zipmap': False}}], 

192 'subset_problems': ['b-cl', '~b-cl-64', 'm-cl']}), 

193 ('liblinear-dec', 

194 {'solver': 'liblinear', }, 

195 {'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}], 

196 'subset_problems': ['~b-cl-dec', '~m-cl-dec']}), 

197 ], 

198 LogisticRegressionCV: [ 

199 ('default', {}, 

200 {'conv_options': [{LogisticRegressionCV: {'zipmap': False}}]}), 

201 ], 

202 MLPClassifier: [ 

203 ('default', {}, {'conv_options': [ 

204 {MLPClassifier: {'zipmap': False}}]}), 

205 ], 

206 MultiOutputClassifier: [ 

207 ('logreg', { 

208 'estimator': LogisticRegression(solver='liblinear')}, 

209 {'conv_options': [{MultiOutputClassifier: {'zipmap': False}}]},) 

210 ], 

211 MultiOutputRegressor: [ 

212 ('linreg', { 

213 'estimator': LinearRegression(), 

214 }) 

215 ], 

216 Normalizer: [ 

217 ('l2', {'norm': 'l2', }), 

218 ('l1', {'norm': 'l1', }), 

219 ('max', {'norm': 'max', }), 

220 ], 

221 NuSVC: [ 

222 ('prob', { 

223 'probability': True, 

224 }), 

225 ], 

226 OneVsOneClassifier: [ 

227 ('logreg', {'estimator': LogisticRegression(solver='liblinear')}, 

228 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]}) 

229 ], 

230 OneVsRestClassifier: [ 

231 ('logreg', {'estimator': LogisticRegression(solver='liblinear')}, 

232 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]}) 

233 ], 

234 OutputCodeClassifier: [ 

235 ('logreg', {'estimator': LogisticRegression(solver='liblinear')}, 

236 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]}) 

237 ], 

238 PassiveAggressiveClassifier: [ 

239 ('logreg', {}, {'conv_options': [ 

240 {PassiveAggressiveClassifier: {'zipmap': False}}]}) 

241 ], 

242 Perceptron: [ 

243 ('logreg', {}, {'conv_options': [{Perceptron: {'zipmap': False}}]}) 

244 ], 

245 PowerTransformer: [ 

246 ('yeo-johnson', {'method': 'yeo-johnson'}), 

247 ('box-cox', {'method': 'box-cox'}), 

248 ], 

249 RadiusNeighborsClassifier: [ 

250 ('default_k3', {'algorithm': 'brute'}, 

251 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

252 ('weights_k3', {'algorithm': 'brute', 'weights': 'distance'}, 

253 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

254 ], 

255 RadiusNeighborsRegressor: [ 

256 ('default_k3', {'algorithm': 'brute'}, 

257 {'conv_options': [{}, {RadiusNeighborsRegressor: {'optim': 'cdist'}}]}), 

258 ('weights_k3', {'algorithm': 'brute', 'weights': 'distance'}, 

259 {'conv_options': [{RadiusNeighborsRegressor: {'optim': 'cdist'}}]}), 

260 ], 

261 RandomForestClassifier: [ 

262 ('default', {'n_estimators': 10}, 

263 {'conv_options': [{RandomForestClassifier: {'zipmap': False}}]}), 

264 ], 

265 RandomForestRegressor: [ 

266 ('default', {'n_estimators': 10}), 

267 ], 

268 RandomizedSearchCV: [ 

269 ('cl', { 

270 'estimator': LogisticRegression(solver='liblinear'), 

271 'param_distributions': {'fit_intercept': [False, True]}, 

272 }), 

273 ('reg', { 

274 'estimator': LinearRegression(), 

275 'param_distributions': {'fit_intercept': [False, True]}, 

276 }), 

277 ], 

278 RegressorChain: [ 

279 ('linreg', { 

280 'base_estimator': LinearRegression(), 

281 }) 

282 ], 

283 RidgeClassifier: [ 

284 ('default', {}, 

285 {'conv_options': [{RidgeClassifier: {'zipmap': False}}]}), 

286 ], 

287 RidgeClassifierCV: [ 

288 ('default', {}, 

289 {'conv_options': [{RidgeClassifierCV: {'zipmap': False}}]}), 

290 ], 

291 RFE: [ 

292 ('reg', { 

293 'estimator': LinearRegression(), 

294 }) 

295 ], 

296 RFECV: [ 

297 ('reg', { 

298 'estimator': LinearRegression(), 

299 }) 

300 ], 

301 SelectFromModel: [ 

302 ('rf', { 

303 'estimator': DecisionTreeRegressor(), 

304 }), 

305 ], 

306 SelectFwe: [ 

307 ('alpha100', { 

308 'alpha': 0.5, 

309 }), 

310 ], 

311 SelectKBest: [ 

312 ('k2', { 

313 'k': 2, 

314 }), 

315 ], 

316 SelectPercentile: [ 

317 ('p50', { 

318 'percentile': 50, 

319 }), 

320 ], 

321 SGDClassifier: [ 

322 ('log', {'loss': 'log'}, 

323 {'conv_options': [{SGDClassifier: {'zipmap': False}}]}), 

324 ], 

325 SparseRandomProjection: [ 

326 ('eps95', {'eps': 0.95}), 

327 ], 

328 SVC: [ 

329 ('linear', {'probability': True, 'kernel': 'linear'}, 

330 {'conv_options': [{SVC: {'zipmap': False}}]}), 

331 ('poly', {'probability': True, 'kernel': 'poly'}, 

332 {'conv_options': [{SVC: {'zipmap': False}}]}), 

333 ('rbf', {'probability': True, 'kernel': 'rbf'}, 

334 {'conv_options': [{SVC: {'zipmap': False}}]}), 

335 ('sigmoid', {'probability': True, 'kernel': 'sigmoid'}, 

336 {'conv_options': [{SVC: {'zipmap': False}}]}), 

337 ], 

338 SVR: [ 

339 ('linear', {'kernel': 'linear'}), 

340 ('poly', {'kernel': 'poly'}), 

341 ('rbf', {'kernel': 'rbf'}), 

342 ('sigmoid', {'kernel': 'sigmoid'}), 

343 ], 

344 VotingClassifier: [ 

345 ('logreg-noflatten', { 

346 'voting': 'soft', 

347 'flatten_transform': False, 

348 'estimators': [ 

349 ('lr1', LogisticRegression( 

350 solver='liblinear', fit_intercept=True)), 

351 ('lr2', LogisticRegression( 

352 solver='liblinear', fit_intercept=False)), 

353 ], 

354 }, {'conv_options': [{VotingClassifier: {'zipmap': False}}]}) 

355 ], 

356 VotingRegressor: [ 

357 ('linreg', { 

358 'estimators': [ 

359 ('lr1', LinearRegression()), 

360 ('lr2', LinearRegression(fit_intercept=False)), 

361 ], 

362 }) 

363 ], 

364 } 

365 if StackingClassifier is not None and StackingRegressor is not None: 

366 options.update({ 

367 StackingClassifier: [ 

368 ('logreg', { 

369 'estimators': [ 

370 ('lr1', LogisticRegression(solver='liblinear')), 

371 ('lr2', LogisticRegression( 

372 solver='liblinear', fit_intercept=False)), 

373 ], 

374 }, {'conv_options': [{StackingClassifier: {'zipmap': False}}]}) 

375 ], 

376 StackingRegressor: [ 

377 ('linreg', { 

378 'estimators': [ 

379 ('lr1', LinearRegression()), 

380 ('lr2', LinearRegression(fit_intercept=False)), 

381 ], 

382 }) 

383 ], 

384 }) 

385 return options 

386 

387 

388def interpret_options_from_string(st): 

389 """ 

390 Converts a string into a dictionary. 

391 

392 @param st string 

393 @return evaluated object 

394 """ 

395 if isinstance(st, dict): 

396 return st # pragma: no cover 

397 value = eval(st) # pylint: disable=W0123 

398 return value 

399 

400 

401_extra_parameters = build_custom_scenarios()