Coverage for mlinsights/mlmodel/anmf_predictor.py: 100%

38 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-28 08:46 +0100

1""" 

2@file 

3@brief Featurizers for machine learned models. 

4""" 

5import numpy 

6from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin 

7from sklearn.decomposition import NMF, TruncatedSVD 

8 

9 

10class ApproximateNMFPredictor(BaseEstimator, RegressorMixin, MultiOutputMixin): 

11 """ 

12 Converts :epkg:`sklearn:decomposition:NMF` into 

13 a predictor so that the prediction does not involve 

14 training even for new observations. The class uses a 

15 :epkg:`sklearn:decomposition:TruncatedSVD` of the components 

16 found by the :epkg:`sklearn:decomposition:NMF`. 

17 The prediction projects the test data into 

18 the components vector space and retrieves them back 

19 into their original space. The issue is it does not 

20 necessarily produce results with only positive 

21 results as the :epkg:`sklearn:decomposition:NMF` 

22 would do unless parameter *force_positive* is True. 

23 

24 .. runpython:: 

25 :showcode: 

26 

27 import numpy 

28 from mlinsights.mlmodel.anmf_predictor import ApproximateNMFPredictor 

29 

30 train = numpy.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], 

31 [1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float64) 

32 train[:train.shape[1], :] += numpy.identity(train.shape[1]) 

33 

34 model = ApproximateNMFPredictor(n_components=2, 

35 force_positive=True) 

36 model .fit(train) 

37 

38 test = numpy.array([[1, 1, 1, 0]], dtype=numpy.float64) 

39 pred = model.predict(test) 

40 print(pred) 

41 """ 

42 

43 def __init__(self, force_positive=False, **kwargs): 

44 """ 

45 *kwargs* should contains parameters 

46 for :epkg:`sklearn:decomposition:NMF`. 

47 The parameter *force_positive* removes all 

48 negative predictions and replaces by zero. 

49 """ 

50 BaseEstimator.__init__(self) 

51 RegressorMixin.__init__(self) 

52 MultiOutputMixin.__init__(self) 

53 for k, v in kwargs.items(): 

54 setattr(self, k, v) 

55 self.force_positive = force_positive 

56 

57 @classmethod 

58 def _get_param_names(cls): 

59 """ 

60 Returns the list of parameters 

61 of the estimator. 

62 """ 

63 res = NMF._get_param_names() 

64 res = res + ["force_positive"] 

65 return res 

66 

67 def get_params(self, deep=True): 

68 """ 

69 Returns the parameters of the estimator 

70 as a dictionary. 

71 """ 

72 res = {} 

73 for k in self.__class__._get_param_names(): 

74 if hasattr(self, k): 

75 res[k] = getattr(self, k) 

76 return res 

77 

78 def fit(self, X, y=None): 

79 """ 

80 Trains a :epkg:`sklearn:decomposition:NMF` 

81 then a multi-output regressor. 

82 """ 

83 params = self.get_params() 

84 if 'force_positive' in params: 

85 del params['force_positive'] 

86 self.estimator_nmf_ = NMF(**params) 

87 self.estimator_nmf_.fit(X) 

88 self.estimator_svd_ = TruncatedSVD( 

89 n_components=self.estimator_nmf_.n_components_) 

90 self.estimator_svd_.fit(self.estimator_nmf_.components_) 

91 return self 

92 

93 def predict(self, X): 

94 """ 

95 Predicts based on the multi-output regressor. 

96 The output has the same dimension as *X*. 

97 """ 

98 proj = self.estimator_svd_.transform(X) 

99 pred = self.estimator_svd_.inverse_transform(proj) 

100 if self.force_positive: 

101 zeros = numpy.zeros( 

102 (1, pred.shape[1]), dtype=pred.dtype) # pylint: disable=E1101,E1136 

103 pred = numpy.maximum(pred, zeros) # pylint: disable=E1111 

104 return pred