Coverage for mlinsights/mlmodel/anmf

1"""

2@file

3@brief Featurizers for machine learned models.

4"""

5import numpy

6from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin

7from sklearn.decomposition import NMF, TruncatedSVD

10class ApproximateNMFPredictor(BaseEstimator, RegressorMixin, MultiOutputMixin):

11 """

12 Converts :epkg:`sklearn:decomposition:NMF` into

13 a predictor so that the prediction does not involve

14 training even for new observations. The class uses a

15 :epkg:`sklearn:decomposition:TruncatedSVD` of the components

16 found by the :epkg:`sklearn:decomposition:NMF`.

17 The prediction projects the test data into

18 the components vector space and retrieves them back

19 into their original space. The issue is it does not

20 necessarily produce results with only positive

21 results as the :epkg:`sklearn:decomposition:NMF`

22 would do unless parameter *force_positive* is True.

24 .. runpython::

25 :showcode:

27 import numpy

28 from mlinsights.mlmodel.anmf_predictor import ApproximateNMFPredictor

30 train = numpy.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0],

31 [1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float64)

32 train[:train.shape[1], :] += numpy.identity(train.shape[1])

34 model = ApproximateNMFPredictor(n_components=2,

35 force_positive=True)

36 model .fit(train)

38 test = numpy.array([[1, 1, 1, 0]], dtype=numpy.float64)

39 pred = model.predict(test)

40 print(pred)

41 """

43 def __init__(self, force_positive=False, **kwargs):

44 """

45 *kwargs* should contains parameters

46 for :epkg:`sklearn:decomposition:NMF`.

47 The parameter *force_positive* removes all

48 negative predictions and replaces by zero.

49 """

50 BaseEstimator.__init__(self)

51 RegressorMixin.__init__(self)

52 MultiOutputMixin.__init__(self)

53 for k, v in kwargs.items():

54 setattr(self, k, v)

55 self.force_positive = force_positive

57 @classmethod

58 def _get_param_names(cls):

59 """

60 Returns the list of parameters

61 of the estimator.

62 """

63 res = NMF._get_param_names()

64 res = res + ["force_positive"]

65 return res

67 def get_params(self, deep=True):

68 """

69 Returns the parameters of the estimator

70 as a dictionary.

71 """

72 res = {}

73 for k in self.__class__._get_param_names():

74 if hasattr(self, k):

75 res[k] = getattr(self, k)

76 return res

78 def fit(self, X, y=None):

79 """

80 Trains a :epkg:`sklearn:decomposition:NMF`

81 then a multi-output regressor.

82 """

83 params = self.get_params()

84 if 'force_positive' in params:

85 del params['force_positive']

86 self.estimator_nmf_ = NMF(**params)

87 self.estimator_nmf_.fit(X)

88 self.estimator_svd_ = TruncatedSVD(

89 n_components=self.estimator_nmf_.n_components_)

90 self.estimator_svd_.fit(self.estimator_nmf_.components_)

91 return self

93 def predict(self, X):

94 """

95 Predicts based on the multi-output regressor.

96 The output has the same dimension as *X*.

97 """

98 proj = self.estimator_svd_.transform(X)

99 pred = self.estimator_svd_.inverse_transform(proj)

100 if self.force_positive:

101 zeros = numpy.zeros(

102 (1, pred.shape[1]), dtype=pred.dtype) # pylint: disable=E1101,E1136

103 pred = numpy.maximum(pred, zeros) # pylint: disable=E1111

104 return pred

Coverage for mlinsights/mlmodel/anmf_predictor.py: 100%

38 statements