Coverage for aftercovid/optim/sgd.py: 100%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2Implements simple stochastic gradient optimisation.

3It is inspired from `_stochastic_optimizers.py

4<https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/

5neural_network/_stochastic_optimizers.py>`_.

6"""

7import numpy

8from numpy.core._exceptions import UFuncTypeError

11class BaseOptimizer:

12 """

13 Base stochastic gradient descent optimizer.

15 :param coef: array, initial coefficient

16 :param learning_rate_init: float

17 The initial learning rate used. It controls the step-size

18 in updating the weights.

19 :param min_threshold: coefficients must be higher than *min_thresold*

20 :param max_threshold: coefficients must be below than *max_thresold*

22 The class holds the following attributes:

24 * *learning_rate*: float, the current learning rate

25 """

27 def __init__(self, coef, learning_rate_init=0.1,

28 min_threshold=None, max_threshold=None):

29 if not isinstance(coef, numpy.ndarray):

30 raise TypeError("coef must be an array.")

31 self.coef = coef

32 self.learning_rate_init = learning_rate_init

33 self.learning_rate = float(learning_rate_init)

34 self.min_threshold = min_threshold

35 self.max_threshold = max_threshold

37 def _get_updates(self, grad):

38 raise NotImplementedError("Must be overwritten.") # pragma no cover

40 def update_coef(self, grad):

41 """

42 Updates coefficients with given gradient.

44 :param grad: array, gradient

45 """

46 if self.coef.shape != grad.shape:

47 raise ValueError("coef and grad must have the same shape.")

48 update = self._get_updates(grad)

49 self.coef += update

50 if self.min_threshold is not None:

51 try:

52 self.coef = numpy.maximum(self.coef, self.min_threshold)

53 except UFuncTypeError: # pragma: no cover

54 raise RuntimeError(

55 "Unable to compute an upper bound with coef={} "

56 "max_threshold={}".format(self.coef, self.min_threshold))

57 if self.max_threshold is not None:

58 try:

59 self.coef = numpy.minimum(self.coef, self.max_threshold)

60 except UFuncTypeError: # pragma: no cover

61 raise RuntimeError(

62 "Unable to compute a lower bound with coef={} "

63 "max_threshold={}".format(self.coef, self.max_threshold))

65 def iteration_ends(self, time_step):

66 """

67 Performs update to learning rate and potentially other states at the

68 end of an iteration.

69 """

70 pass # pragma: no cover

72 def train(self, X, y, fct_loss, fct_grad, max_iter=100,

73 early_th=None, verbose=False):

74 """

75 Optimizes the coefficients.

77 :param X: datasets (array)

78 :param y: expected target

79 :param fct_loss: loss function, signature: `f(coef, X, y) -> float`

80 :param fct_grad: gradient function,

81 signature: `g(coef, x, y, i) -> array`

82 :param max_iter: number maximum of iteration

83 :param early_th: stops the training if the error goes below

84 this threshold

85 :param verbose: display information

86 :return: loss

87 """

88 if not isinstance(X, numpy.ndarray):

89 raise TypeError("X must be an array.")

90 if not isinstance(y, numpy.ndarray):

91 raise TypeError("y must be an array.")

92 if X.shape[0] != y.shape[0]:

93 raise ValueError("X and y must have the same number of rows.")

94 if any(numpy.isnan(X.ravel())):

95 raise ValueError("X contains nan value.")

96 if any(numpy.isnan(y.ravel())):

97 raise ValueError("y contains nan value.")

99 loss = fct_loss(self.coef, X, y)

100 losses = [loss]

101 if verbose:

102 self._display_progress(0, max_iter, loss)

103 n_samples = 0

104 for it in range(max_iter):

105 irows = numpy.random.choice(X.shape[0], X.shape[0])

106 for irow in irows:

107 grad = fct_grad(self.coef, X[irow, :], y[irow], irow)

108 if isinstance(verbose, int) and verbose >= 10:

109 self._display_progress( # pragma: no cover

110 0, max_iter, loss, grad, 'grad')

111 if numpy.isnan(grad).sum() > 0:

112 raise RuntimeError( # pragma: no cover

113 "The gradient has nan values.")

114 self.update_coef(grad)

115 n_samples += 1

116

117 self.iteration_ends(n_samples)

118 loss = fct_loss(self.coef, X, y)

119 if verbose:

120 self._display_progress(it + 1, max_iter, loss)

121 self.iter_ = it + 1

122 losses.append(loss)

123 if self._evaluate_early_stopping(

124 it, max_iter, losses, early_th, verbose=verbose):

125 break

126 return loss

127

128 def _evaluate_early_stopping(

129 self,

130 it,

131 max_iter,

132 losses,

133 early_th,

134 verbose=False):

135 if len(losses) < 5 or early_th is None:

136 return False

137 if numpy.isnan(losses[-5]):

138 if numpy.isnan(losses[-1]): # pragma: no cover

139 if verbose:

140 self._display_progress(it + 1, max_iter, losses[-1],

141 losses=losses[-5:])

142 return True

143 return False # pragma: no cover

144 if numpy.isnan(losses[-1]):

145 if verbose: # pragma: no cover

146 self._display_progress(it + 1, max_iter, losses[-1],

147 losses=losses[-5:])

148 return True # pragma: no cover

149 if abs(losses[-1] - losses[-5]) <= early_th:

150 if verbose: # pragma: no cover

151 self._display_progress(it + 1, max_iter, losses[-1],

152 losses=losses[-5:])

153 return True

154 return False

155

156 def _display_progress(self, it, max_iter, loss, losses=None):

157 'Displays training progress.'

158 if losses is None: # pragma: no cover

159 print('{}/{}: loss: {:1.4g}'.format(it, max_iter, loss))

160 else:

161 print( # pragma: no cover

162 '{}/{}: loss: {:1.4g} losses: {}'.format(

163 it, max_iter, loss, losses))

164

165

166class SGDOptimizer(BaseOptimizer):

167 """

168 Stochastic gradient descent optimizer with momentum.

169

170 :param coef: array, initial coefficient

171 :param learning_rate_init: float

172 The initial learning rate used. It controls the step-size

173 in updating the weights,

174 :param lr_schedule: `{'constant', 'adaptive', 'invscaling'}`,

175 learning rate schedule for weight updates,

176 `'constant'` for a constant learning rate given by

177 *learning_rate_init*. `'invscaling'` gradually decreases

178 the learning rate *learning_rate_* at each time step *t*

179 using an inverse scaling exponent of *power_t*.

180 `learning_rate_ = learning_rate_init / pow(t, power_t)`,

181 `'adaptive'`, keeps the learning rate constant to

182 *learning_rate_init* as long as the training keeps decreasing.

183 Each time 2 consecutive epochs fail to decrease the training loss by

184 tol, or fail to increase validation score by tol if 'early_stopping'

185 is on, the current learning rate is divided by 5.

186 :param momentum: float

187 Value of momentum used, must be larger than or equal to 0

188 :param power_t: double

189 The exponent for inverse scaling learning rate.

190 :param early_th: stops if the error goes below that threshold

191 :param min_threshold: lower bound for parameters (can be None)

192 :param max_threshold: upper bound for parameters (can be None)

193

194 The class holds the following attributes:

195

196 * *learning_rate*: float, the current learning rate

197 * velocity*: array, velocity that are used to update params

198

199 .. exref::

200 :title: Stochastic Gradient Descent applied to linear regression

201

202 The following example how to optimize a simple linear regression.

203

204 .. runpython::

205 :showcode:

206

207 import numpy

208 from aftercovid.optim import SGDOptimizer

209

210

211 def fct_loss(c, X, y):

212 return numpy.linalg.norm(X @ c - y) ** 2

213

214

215 def fct_grad(c, x, y, i=0):

216 return x * (x @ c - y) * 0.1

217

218

219 coef = numpy.array([0.5, 0.6, -0.7])

220 X = numpy.random.randn(10, 3)

221 y = X @ coef

222

223 sgd = SGDOptimizer(numpy.random.randn(3))

224 sgd.train(X, y, fct_loss, fct_grad, max_iter=15, verbose=True)

225 print('optimized coefficients:', sgd.coef)

226 """

227

228 def __init__(self, coef, learning_rate_init=0.1, lr_schedule='constant',

229 momentum=0.9, power_t=0.5, early_th=None,

230 min_threshold=None, max_threshold=None):

231 super().__init__(coef, learning_rate_init,

232 min_threshold=min_threshold,

233 max_threshold=max_threshold)

234 self.lr_schedule = lr_schedule

235 self.momentum = momentum

236 self.power_t = power_t

237 self.early_th = early_th

238 self.velocity = numpy.zeros_like(coef)

239

240 def iteration_ends(self, time_step):

241 """

242 Performs updates to learning rate and potential other states at the

243 end of an iteration.

244

245 :param time_step: int

246 number of training samples trained on so far, used to update

247 learning rate for 'invscaling'

248 """

249 if self.lr_schedule == 'invscaling':

250 self.learning_rate = (float(self.learning_rate_init) /

251 (time_step + 1) ** self.power_t)

252

253 def _get_updates(self, grad):

254 """

255 Gets the values used to update params with given gradients.

256

257 :param grad: array, gradient

258 :return: updates, array, the values to add to params

259 """

260 update = self.momentum * self.velocity - self.learning_rate * grad

261 self.velocity = update

262 return update

263

264 def _display_progress(self, it, max_iter, loss, losses=None, msg='loss'):

265 'Displays training progress.'

266 if losses is None:

267 print('{}/{}: {}: {:1.4g} lr={:1.3g}'.format(

268 it, max_iter, msg, loss, self.learning_rate))

269 else:

270 print( # pragma: no cover

271 '{}/{}: {}: {:1.4g} lr={:1.3g} {}es: {}'.format(

272 it, max_iter, msg, loss, self.learning_rate, msg, losses))

Coverage for aftercovid/optim/sgd.py : 100%

84 statements

Coverage for aftercovid/optim/sgd.py : 100%

84 statements 84 run 0 missing 22 excluded

84 statements