Coverage for mlinsights/mlmodel/predictable_tsne.py: 98%
63 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Implements a predicatable *t-SNE*.
4"""
5import inspect
6from sklearn.base import BaseEstimator, TransformerMixin, clone
7from sklearn.manifold import TSNE
8from sklearn.neural_network import MLPRegressor
9from sklearn.metrics import mean_squared_error
12class PredictableTSNE(BaseEstimator, TransformerMixin):
13 """
14 :epkg:`t-SNE` is an interesting
15 transform which can only be used to study data as there is no
16 way to reproduce the result once it was fitted. That's why
17 the class :epkg:`TSNE` does not have any method *transform*, only
18 `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_.
19 This example proposes a way to train a machine learned model
20 which approximates the outputs of a :epkg:`TSNE` transformer.
21 Notebooks :ref:`predictabletsnerst` gives an example on how to
22 use this class.
24 :param normalizer: None by default
25 :param transformer: :epkg:`sklearn:manifold:TSNE` by default
26 :param estimator: :epkg:`sklearn:neural_network:MLPRegressor` by default
27 :param normalize: normalizes the outputs, centers and normalizes
28 the output of the *t-SNE* and applies that same
29 normalization to he prediction of the estimator
30 :param keep_tsne_output: if True, keep raw outputs of
31 :epkg:`TSNE` is stored in member `tsne_outputs_`
32 """
34 def __init__(self, normalizer=None, transformer=None, estimator=None,
35 normalize=True, keep_tsne_outputs=False):
36 TransformerMixin.__init__(self)
37 BaseEstimator.__init__(self)
38 if estimator is None:
39 estimator = MLPRegressor()
40 if transformer is None:
41 transformer = TSNE()
42 self.estimator = estimator
43 self.transformer = transformer
44 self.normalizer = normalizer
45 self.keep_tsne_outputs = keep_tsne_outputs
46 if normalizer is not None and not hasattr(normalizer, "transform"):
47 raise AttributeError( # pragma: no cover
48 f"normalizer {type(normalizer)} does not have a 'transform' method.")
49 if not hasattr(transformer, "fit_transform"):
50 raise AttributeError( # pragma: no cover
51 f"transformer {type(transformer)} does not have a "
52 f"'fit_transform' method.")
53 if not hasattr(estimator, "predict"):
54 raise AttributeError( # pragma: no cover
55 f"estimator {type(estimator)} does not have a 'predict' method.")
56 self.normalize = normalize
58 def fit(self, X, y, sample_weight=None):
59 """
60 Trains a :epkg:`TSNE` then trains an estimator
61 to approximate its outputs.
63 :param X: numpy array or sparse matrix of shape [n_samples,n_features]
64 Training data
65 :param y: numpy array of shape [n_samples, n_targets]
66 Target values. Will be cast to X's dtype if necessary
67 :param sample_weight: numpy array of shape [n_samples]
68 Individual weights for each sample
69 :return: self, returns an instance of self.
71 Fitted attributes:
73 * `normalizer_`: trained normalier
74 * `transformer_`: trained transformeer
75 * `estimator_`: trained regressor
76 * `tsne_outputs_`: t-SNE outputs if *keep_tsne_outputs* is True
77 * `mean_`: average of the *t-SNE* output on each dimension
78 * `inv_std_`: inverse of the standard deviation of the *t-SNE*
79 output on each dimension
80 * `loss_`: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions
81 and the outputs of t-SNE
82 """
83 params = dict(y=y, sample_weight=sample_weight)
85 if self.normalizer is not None:
86 sig = inspect.signature(self.normalizer.transform)
87 pars = {}
88 for p in ['sample_weight', 'y']:
89 if p in sig.parameters and p in params:
90 pars[p] = params[p]
91 self.normalizer_ = clone(self.normalizer).fit(X, **pars)
92 X = self.normalizer_.transform(X)
93 else:
94 self.normalizer_ = None
96 self.transformer_ = clone(self.transformer)
97 if (hasattr(self.transformer_, 'perplexity') and
98 self.transformer_.perplexity >= X.shape[0]):
99 self.transformer_.perplexity = X.shape[0] - 1
101 sig = inspect.signature(self.transformer.fit_transform)
102 pars = {}
103 for p in ['sample_weight', 'y']:
104 if p in sig.parameters and p in params:
105 pars[p] = params[p]
106 target = self.transformer_.fit_transform(X, **pars)
108 sig = inspect.signature(self.estimator.fit)
109 if 'sample_weight' in sig.parameters:
110 self.estimator_ = clone(self.estimator).fit(
111 X, target, sample_weight=sample_weight)
112 else:
113 self.estimator_ = clone(self.estimator).fit(X, target)
114 mean = target.mean(axis=0)
115 var = target.std(axis=0)
116 self.mean_ = mean
117 self.inv_std_ = 1. / var
118 exp = (target - mean) * self.inv_std_
119 got = (self.estimator_.predict(X) - mean) * self.inv_std_
120 self.loss_ = mean_squared_error(exp, got)
121 if self.keep_tsne_outputs:
122 self.tsne_outputs_ = exp if self.normalize else target
123 return self
125 def transform(self, X):
126 """
127 Runs the predictions.
129 :param X: numpy array or sparse matrix of shape [n_samples,n_features]
130 Training data
131 :return: tranformed *X*
132 """
133 if self.normalizer_ is not None:
134 X = self.normalizer_.transform(X)
135 pred = self.estimator_.predict(X)
136 if self.normalize:
137 pred -= self.mean_
138 pred *= self.inv_std_
139 return pred