Coverage for src/mlstatpy/ml/matrices.py: 100%
124 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Algorithms about matrices.
5"""
6import warnings
7import numpy
8import numpy.linalg
9from scipy.linalg.lapack import dtrtri # pylint: disable=E0611
12def gram_schmidt(mat, change=False):
13 """
14 Applies the `Gram–Schmidt process
15 <https://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process>`_.
16 Due to performance, every row is considered as a vector.
18 @param mat matrix
19 @param change returns the matrix to change the basis
20 @return new matrix or (new matrix, change matrix)
22 The function assumes the matrix *mat* is
23 horizontal: it has more columns than rows.
25 .. note::
26 The implementation could be improved
27 by directly using :epkg:`BLAS` function.
29 .. runpython::
30 :showcode:
32 import numpy
33 from mlstatpy.ml.matrices import gram_schmidt
35 X = numpy.array([[1., 2., 3., 4.],
36 [5., 6., 6., 6.],
37 [5., 6., 7., 8.]])
38 T, P = gram_schmidt(X, change=True)
39 print(T)
40 print(P)
41 """
42 if len(mat.shape) != 2:
43 raise ValueError("mat must be a matrix.") # pragma: no cover
44 if mat.shape[1] < mat.shape[0]:
45 raise RuntimeError( # pragma: no cover
46 "The function only works if the number of rows is less "
47 "than the number of columns.")
48 if change:
49 base = numpy.identity(mat.shape[0])
50 # The following code is equivalent to:
51 # res = numpy.empty(mat.shape)
52 # for i in range(0, mat.shape[0]):
53 # res[i, :] = mat[i, :]
54 # for j in range(0, i):
55 # d = numpy.dot(res[j, :], mat[i, :])
56 # res[i, :] -= res[j, :] * d
57 # if change:
58 # base[i, :] -= base[j, :] * d
59 # d = numpy.dot(res[i, :], res[i, :])
60 # if d > 0:
61 # d **= 0.5
62 # res[i, :] /= d
63 # if change:
64 # base[i, :] /= d
65 # But it is faster to write it this way:
66 res = numpy.empty(mat.shape)
67 for i in range(0, mat.shape[0]):
68 res[i, :] = mat[i, :]
69 if i > 0:
70 d = numpy.dot(res[:i, :], mat[i, :])
71 m = numpy.multiply(res[:i, :], d.reshape((len(d), 1)))
72 m = numpy.sum(m, axis=0)
73 res[i, :] -= m
74 if change:
75 m = numpy.multiply(base[:i, :], d.reshape((len(d), 1)))
76 m = numpy.sum(m, axis=0)
77 base[i, :] -= m
78 d = numpy.dot(res[i, :], res[i, :])
79 if d > 0:
80 d **= 0.5
81 res[i, :] /= d
82 if change:
83 base[i, :] /= d
84 return (res, base) if change else res
87def linear_regression(X, y, algo=None):
88 """
89 Solves the linear regression problem,
90 find :math:`\\beta` which minimizes
91 :math:`\\norme{y - X\\beta}`, based on the algorithm
92 :ref:`Arbre de décision optimisé pour les régressions linéaires
93 <algo_decision_tree_mselin>`.
95 @param X features
96 @param y targets
97 @param algo None to use the standard algorithm
98 :math:`\\beta = (X'X)^{-1} X'y`,
99 `'gram'`, `'qr'`
100 @return beta
102 .. runpython::
103 :showcode:
105 import numpy
106 from mlstatpy.ml.matrices import linear_regression
108 X = numpy.array([[1., 2., 3., 4.],
109 [5., 6., 6., 6.],
110 [5., 6., 7., 8.]]).T
111 y = numpy.array([0.1, 0.2, 0.19, 0.29])
112 beta = linear_regression(X, y, algo="gram")
113 print(beta)
115 ``algo=None`` computes :math:`\\beta = (X'X)^{-1} X'y`.
116 ``algo='qr'`` uses a `QR <https://docs.scipy.org/doc/numpy/reference
117 /generated/numpy.linalg.qr.html>`_ decomposition and calls function
118 `dtrtri <https://docs.scipy.org/doc/scipy/reference/generated/scipy.
119 linalg.lapack.dtrtri.html>`_ to invert an upper triangular matrix.
120 ``algo='gram'`` uses :func:`gram_schmidt
121 <mlstatpy.ml.matrices.gram_schmidt>` and then computes
122 the solution of the linear regression (see above for a link
123 to the algorithm).
124 """
125 if len(y.shape) != 1:
126 warnings.warn( # pragma: no cover
127 "This function is not tested for a multidimensional linear regression.")
128 if algo is None:
129 inv = numpy.linalg.inv(X.T @ X)
130 return inv @ (X.T @ y)
131 if algo == "gram":
132 T, P = gram_schmidt(X.T, change=True)
133 # T = P X
134 return (y.T @ T.T @ P).ravel()
135 if algo == "qr":
136 Q, R = numpy.linalg.qr(X, "full")
137 Ri = dtrtri(R)[0]
138 gamma = (y.T @ Q).ravel()
139 return (gamma @ Ri.T).ravel()
140 raise ValueError( # pragma: no cover
141 f"Unknwown algo='{algo}'.")
144def norm2(X):
145 """
146 Computes the square norm for all rows of a
147 matrix.
148 """
149 res = numpy.empty(X.shape[1])
150 for i in range(X.shape[1]):
151 res[i] = numpy.dot(X[i], X[i])
152 return res
155def streaming_gram_schmidt_update(Xk, Pk):
156 """
157 Updates matrix :math:`P_k` to produce :math:`P_{k+1}`
158 which is the matrix *P* in algorithm
159 :ref:`Streaming Linear Regression
160 <algo_reg_lin_gram_schmidt_streaming>`.
161 The function modifies the matrix *Pk*
162 given as an input.
164 @param Xk kth row
165 @param Pk matrix *P* at iteration *k-1*
166 """
167 tki = Pk @ Xk
168 idi = numpy.identity(Pk.shape[0])
170 for i in range(0, Pk.shape[0]):
171 val = tki[i]
173 if i > 0:
174 # for j in range(0, i):
175 # d = tki[j] * val
176 # tki[i] -= tki[j] * d
177 # Pk[i, :] -= Pk[j, :] * d
178 # idi[i, :] -= idi[j, :] * d
180 dv = tki[:i] * val
181 tki[i] -= numpy.dot(dv, tki[:i])
182 dv = dv.reshape((i, 1))
183 Pk[i, :] -= numpy.multiply(Pk[:i, :], dv).sum(axis=0)
184 idi[i, :] -= numpy.multiply(idi[:i, :], dv).sum(axis=0)
186 d = numpy.square(idi[i, :]).sum() # pylint: disable=E1101
187 d = tki[i] ** 2 + d
188 if d > 0:
189 d **= 0.5
190 d = 1. / d
191 tki[i] *= d
192 Pk[i, :] *= d
193 idi[i, :] *= d
196def streaming_gram_schmidt(mat, start=None):
197 """
198 Solves the linear regression problem,
199 find :math:`\\beta` which minimizes
200 :math:`\\norme{y - X\\beta}`, based on
201 algorithm :ref:`Streaming Gram-Schmidt
202 <algo_reg_lin_gram_schmidt_streaming>`.
204 @param mat matrix
205 @param start first row to start iteration, ``X.shape[1]`` by default
206 @return iterator on
208 The function assumes the matrix *mat* is
209 horizontal: it has more columns than rows.
211 .. runpython::
212 :showcode:
214 import numpy
215 from mlstatpy.ml.matrices import streaming_gram_schmidt
217 X = numpy.array([[1, 0.5, 10., 5., -2.],
218 [0, 0.4, 20, 4., 2.],
219 [0, 0.7, 20, 4., 2.]], dtype=float).T
221 for i, p in enumerate(streaming_gram_schmidt(X.T)):
222 print("iteration", i, "\\n", p)
223 t = X[:i+3] @ p.T
224 print(t.T @ t)
225 """
226 if len(mat.shape) != 2:
227 raise ValueError("mat must be a matrix.") # pragma: no cover
228 if mat.shape[1] < mat.shape[0]:
229 raise RuntimeError("The function only works if the number of rows is less "
230 "than the number of columns.")
231 if start is None:
232 start = mat.shape[0]
233 mats = mat[:, :start]
234 _, Pk = gram_schmidt(mats, change=True)
235 yield Pk
237 k = start
238 while k < mat.shape[1]:
239 streaming_gram_schmidt_update(mat[:, k], Pk)
240 yield Pk
241 k += 1
244def streaming_linear_regression_update(Xk, yk, XkXk, bk):
245 """
246 Updates coefficients :math:`\\beta_k` to produce :math:`\\beta_{k+1}`
247 in :ref:`l-piecewise-linear-regression`.
248 The function modifies the matrix *Pk*
249 given as an input.
251 @param Xk kth row
252 @param yk kth target
253 @param XkXk matrix :math:`X_{1..k}'X_{1..k}', updated by the function
254 @param bk current coefficient (updated by the function)
255 """
256 Xk = Xk.reshape((1, XkXk.shape[0]))
257 xxk = Xk.T @ Xk
258 XkXk += xxk
259 err = Xk.T * (yk - Xk @ bk)
260 bk[:] += (numpy.linalg.inv(XkXk) @ err).flatten()
263def streaming_linear_regression(mat, y, start=None):
264 """
265 Streaming algorithm to solve a linear regression.
266 See :ref:`l-piecewise-linear-regression`.
268 @param mat features
269 @param y expected target
270 @return iterator on coefficients
272 .. runpython::
273 :showcode:
275 import numpy
276 from mlstatpy.ml.matrices import streaming_linear_regression, linear_regression
278 X = numpy.array([[1, 0.5, 10., 5., -2.],
279 [0, 0.4, 20, 4., 2.],
280 [0, 0.7, 20, 4., 3.]], dtype=float).T
281 y = numpy.array([1., 0.3, 10, 5.1, -3.])
283 for i, bk in enumerate(streaming_linear_regression(X, y)):
284 bk0 = linear_regression(X[:i+3], y[:i+3])
285 print("iteration", i, bk, bk0)
286 """
287 if len(mat.shape) != 2:
288 raise ValueError("mat must be a matrix.") # pragma: no cover
289 if mat.shape[0] < mat.shape[1]:
290 raise RuntimeError("The function only works if the number of rows is more "
291 "than the number of columns.")
292 if len(y.shape) != 1:
293 warnings.warn( # pragma: no cover
294 "This function is not tested for a multidimensional linear regression.")
295 if start is None:
296 start = mat.shape[1]
298 Xk = mat[:start]
299 XkXk = Xk.T @ Xk
300 bk = numpy.linalg.inv(XkXk) @ (Xk.T @ y[:start])
301 yield bk
303 k = start
304 while k < mat.shape[0]:
305 streaming_linear_regression_update(mat[k], y[k], XkXk, bk)
306 yield bk
307 k += 1
310def streaming_linear_regression_gram_schmidt_update(Xk, yk, Xkyk, Pk, bk):
311 """
312 Updates coefficients :math:`\\beta_k` to produce :math:`\\beta_{k+1}`
313 in :ref:`Streaming Linear Regression
314 <l-piecewise-linear-regression-gram_schmidt>`.
315 The function modifies the matrix *Pk*
316 given as an input.
318 @param Xk kth row
319 @param yk kth target
320 @param Xkyk matrix :math:`X_{1..k}' y_{1..k}' (updated by the function)
321 @param Pk Gram-Schmidt matrix produced by the streaming algorithm
322 (updated by the function)
323 @param bk current coefficient (updated by the function)
324 """
325 Xk = Xk.T
326 streaming_gram_schmidt_update(Xk, Pk)
327 Xkyk += (Xk * yk).reshape(Xkyk.shape)
328 bk[:] = Pk @ Xkyk @ Pk
331def streaming_linear_regression_gram_schmidt(mat, y, start=None):
332 """
333 Streaming algorithm to solve a linear regression with
334 Gram-Schmidt algorithm.
335 See :ref:`l-piecewise-linear-regression-gram_schmidt`.
337 @param mat features
338 @param y expected target
339 @return iterator on coefficients
341 .. runpython::
342 :showcode:
344 import numpy
345 from mlstatpy.ml.matrices import streaming_linear_regression, linear_regression
347 X = numpy.array([[1, 0.5, 10., 5., -2.],
348 [0, 0.4, 20, 4., 2.],
349 [0, 0.7, 20, 4., 3.]], dtype=float).T
350 y = numpy.array([1., 0.3, 10, 5.1, -3.])
352 for i, bk in enumerate(streaming_linear_regression(X, y)):
353 bk0 = linear_regression(X[:i+3], y[:i+3])
354 print("iteration", i, bk, bk0)
355 """
356 if len(mat.shape) != 2:
357 raise ValueError("mat must be a matrix.") # pragma: no cover
358 if mat.shape[0] < mat.shape[1]:
359 raise RuntimeError("The function only works if the number of rows is more "
360 "than the number of columns.")
361 if len(y.shape) != 1:
362 warnings.warn( # pragma: no cover
363 "This function is not tested for a multidimensional linear regression.")
364 if start is None:
365 start = mat.shape[1]
367 Xk = mat[:start]
368 xyk = Xk.T @ y[:start]
369 _, Pk = gram_schmidt(Xk.T, change=True)
370 bk = Pk @ xyk @ Pk
371 yield bk
373 k = start
374 while k < mat.shape[0]:
375 streaming_linear_regression_gram_schmidt_update(
376 mat[k], y[k], xyk, Pk, bk)
377 yield bk
378 k += 1