Coverage for mlinsights/search_rank/search_engine_predictions.py: 91%

33 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-08-09 08:45 +0200

1""" 

2@file 

3@brief Implements a way to get close examples based 

4on the output of a machine learned model. 

5""" 

6from ..mlmodel import model_featurizer 

7from ..helpers.parameters import format_function_call 

8from .search_engine_vectors import SearchEngineVectors 

9 

10 

11class SearchEnginePredictions(SearchEngineVectors): 

12 """ 

13 Extends class @see cl SearchEngineVectors by 

14 looking for neighbors to a vector *X* by 

15 looking neighbors to *f(X)* and not *X*. 

16 *f* can be any function which converts a vector 

17 into another one or a machine learned model. 

18 In that case, *f* will be set to a default behavior. 

19 See function @see fn model_featurizer. 

20 """ 

21 

22 def __init__(self, fct, fct_params=None, **knn): 

23 """ 

24 @param fct function *f* applied before looking for neighbors, 

25 it can also be a machine learned model 

26 @param fct_params parameters sent to function @see fn model_featurizer 

27 @param pknn list of parameters, see 

28 :epkg:`sklearn:neighborsNearestNeighbors` 

29 """ 

30 super().__init__(**knn) 

31 self._fct_params = fct_params 

32 self._fct_init = fct 

33 if (callable(fct) and not hasattr(fct, 'predict') and 

34 not hasattr(fct, 'forward')): 

35 self.fct = fct 

36 else: 

37 if fct_params is None: 

38 fct_params = {} 

39 self.fct = model_featurizer(fct, **fct_params) 

40 

41 def __repr__(self): 

42 """ 

43 usual 

44 """ 

45 if self.pknn: 

46 pp = self.pknn.copy() 

47 else: 

48 pp = {} 

49 pp['fct'] = self._fct_init 

50 pp['fct_params'] = self._fct_params 

51 return format_function_call(self.__class__.__name__, pp) 

52 

53 def fit(self, data=None, features=None, metadata=None): 

54 """ 

55 Every vector comes with a list of metadata. 

56 

57 @param data a :epkg:`dataframe` or None if the 

58 the features and the metadata 

59 are specified with an array and a 

60 dictionary 

61 @param features features columns or an array 

62 @param metadata data 

63 """ 

64 iterate = self._is_iterable(data) 

65 if iterate: 

66 self._prepare_fit(data=data, features=features, 

67 metadata=metadata, transform=self.fct) 

68 else: 

69 self._prepare_fit(data=data, features=features, metadata=metadata) 

70 if isinstance(self.features_, list): 

71 raise TypeError( # pragma: no cover 

72 "features_ cannot be a list when training the model.") 

73 self.features_ = self.fct(self.features_, True) 

74 return self._fit_knn() 

75 

76 def kneighbors(self, X, n_neighbors=None): 

77 """ 

78 Searches for neighbors close to *X*. 

79 

80 @param X features 

81 @return score, ind, meta 

82 

83 *score* is an array representing the lengths to points, 

84 *ind* contains the indices of the nearest points in the population matrix, 

85 *meta* is the metadata. 

86 """ 

87 xp = self.fct(X, False) 

88 if len(xp.shape) == 1: 

89 xp = xp.reshape((1, len(xp))) 

90 return super().kneighbors(xp, n_neighbors=n_neighbors)