Coverage for mlinsights/mlmodel/predictable_tsne.py: 98%

63 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-28 08:46 +0100

1""" 

2@file 

3@brief Implements a predicatable *t-SNE*. 

4""" 

5import inspect 

6from sklearn.base import BaseEstimator, TransformerMixin, clone 

7from sklearn.manifold import TSNE 

8from sklearn.neural_network import MLPRegressor 

9from sklearn.metrics import mean_squared_error 

10 

11 

12class PredictableTSNE(BaseEstimator, TransformerMixin): 

13 """ 

14 :epkg:`t-SNE` is an interesting 

15 transform which can only be used to study data as there is no 

16 way to reproduce the result once it was fitted. That's why 

17 the class :epkg:`TSNE` does not have any method *transform*, only 

18 `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_. 

19 This example proposes a way to train a machine learned model 

20 which approximates the outputs of a :epkg:`TSNE` transformer. 

21 Notebooks :ref:`predictabletsnerst` gives an example on how to 

22 use this class. 

23 

24 :param normalizer: None by default 

25 :param transformer: :epkg:`sklearn:manifold:TSNE` by default 

26 :param estimator: :epkg:`sklearn:neural_network:MLPRegressor` by default 

27 :param normalize: normalizes the outputs, centers and normalizes 

28 the output of the *t-SNE* and applies that same 

29 normalization to he prediction of the estimator 

30 :param keep_tsne_output: if True, keep raw outputs of 

31 :epkg:`TSNE` is stored in member `tsne_outputs_` 

32 """ 

33 

34 def __init__(self, normalizer=None, transformer=None, estimator=None, 

35 normalize=True, keep_tsne_outputs=False): 

36 TransformerMixin.__init__(self) 

37 BaseEstimator.__init__(self) 

38 if estimator is None: 

39 estimator = MLPRegressor() 

40 if transformer is None: 

41 transformer = TSNE() 

42 self.estimator = estimator 

43 self.transformer = transformer 

44 self.normalizer = normalizer 

45 self.keep_tsne_outputs = keep_tsne_outputs 

46 if normalizer is not None and not hasattr(normalizer, "transform"): 

47 raise AttributeError( # pragma: no cover 

48 f"normalizer {type(normalizer)} does not have a 'transform' method.") 

49 if not hasattr(transformer, "fit_transform"): 

50 raise AttributeError( # pragma: no cover 

51 f"transformer {type(transformer)} does not have a " 

52 f"'fit_transform' method.") 

53 if not hasattr(estimator, "predict"): 

54 raise AttributeError( # pragma: no cover 

55 f"estimator {type(estimator)} does not have a 'predict' method.") 

56 self.normalize = normalize 

57 

58 def fit(self, X, y, sample_weight=None): 

59 """ 

60 Trains a :epkg:`TSNE` then trains an estimator 

61 to approximate its outputs. 

62 

63 :param X: numpy array or sparse matrix of shape [n_samples,n_features] 

64 Training data 

65 :param y: numpy array of shape [n_samples, n_targets] 

66 Target values. Will be cast to X's dtype if necessary 

67 :param sample_weight: numpy array of shape [n_samples] 

68 Individual weights for each sample 

69 :return: self, returns an instance of self. 

70 

71 Fitted attributes: 

72 

73 * `normalizer_`: trained normalier 

74 * `transformer_`: trained transformeer 

75 * `estimator_`: trained regressor 

76 * `tsne_outputs_`: t-SNE outputs if *keep_tsne_outputs* is True 

77 * `mean_`: average of the *t-SNE* output on each dimension 

78 * `inv_std_`: inverse of the standard deviation of the *t-SNE* 

79 output on each dimension 

80 * `loss_`: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions 

81 and the outputs of t-SNE 

82 """ 

83 params = dict(y=y, sample_weight=sample_weight) 

84 

85 if self.normalizer is not None: 

86 sig = inspect.signature(self.normalizer.transform) 

87 pars = {} 

88 for p in ['sample_weight', 'y']: 

89 if p in sig.parameters and p in params: 

90 pars[p] = params[p] 

91 self.normalizer_ = clone(self.normalizer).fit(X, **pars) 

92 X = self.normalizer_.transform(X) 

93 else: 

94 self.normalizer_ = None 

95 

96 self.transformer_ = clone(self.transformer) 

97 if (hasattr(self.transformer_, 'perplexity') and 

98 self.transformer_.perplexity >= X.shape[0]): 

99 self.transformer_.perplexity = X.shape[0] - 1 

100 

101 sig = inspect.signature(self.transformer.fit_transform) 

102 pars = {} 

103 for p in ['sample_weight', 'y']: 

104 if p in sig.parameters and p in params: 

105 pars[p] = params[p] 

106 target = self.transformer_.fit_transform(X, **pars) 

107 

108 sig = inspect.signature(self.estimator.fit) 

109 if 'sample_weight' in sig.parameters: 

110 self.estimator_ = clone(self.estimator).fit( 

111 X, target, sample_weight=sample_weight) 

112 else: 

113 self.estimator_ = clone(self.estimator).fit(X, target) 

114 mean = target.mean(axis=0) 

115 var = target.std(axis=0) 

116 self.mean_ = mean 

117 self.inv_std_ = 1. / var 

118 exp = (target - mean) * self.inv_std_ 

119 got = (self.estimator_.predict(X) - mean) * self.inv_std_ 

120 self.loss_ = mean_squared_error(exp, got) 

121 if self.keep_tsne_outputs: 

122 self.tsne_outputs_ = exp if self.normalize else target 

123 return self 

124 

125 def transform(self, X): 

126 """ 

127 Runs the predictions. 

128 

129 :param X: numpy array or sparse matrix of shape [n_samples,n_features] 

130 Training data 

131 :return: tranformed *X* 

132 """ 

133 if self.normalizer_ is not None: 

134 X = self.normalizer_.transform(X) 

135 pred = self.estimator_.predict(X) 

136 if self.normalize: 

137 pred -= self.mean_ 

138 pred *= self.inv_std_ 

139 return pred