Coverage for mlinsights/timeseries/utils.py: 94%
80 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Timeseries data manipulations.
4"""
5import numpy
6from sklearn import get_config
9def build_ts_X_y(model, X, y, weights=None, same_rows=False):
10 """
11 Builds standard *X, y* based in the given one.
13 @param model a timeseries model (@see cl BaseTimeSeries)
14 @param X times series, used as features, [n_obs, n_features],
15 X may be empty (None)
16 @param y timeseries (one single vector), [n_obs]
17 @param weights weights None or array [n_obs]
18 @param same_rows keep the same number of rows
19 as the original datasets, use nan when no value is
20 available
21 @return *(X, y, weights)*: X is array of features [nrows, n_features + past]
22 where `nrows = n_obs + model.delay2 - model.past + 2`,
23 y is an array of targets [nrows],
24 weights is None or array [nrows]
26 .. runpython::
27 :showcode:
29 import numpy
30 from mlinsights.timeseries import build_ts_X_y
31 from mlinsights.timeseries.base import BaseTimeSeries
33 X = numpy.arange(10).reshape(5, 2)
34 y = numpy.arange(5) * 100
35 weights = numpy.arange(5) * 1000
36 bs = BaseTimeSeries(past=2)
37 nx, ny, nw = build_ts_X_y(bs, X, y, weights)
38 print('X=', X)
39 print('y=', y)
40 print('nx=', nx)
41 print('ny=', ny)
43 With ``use_all_past=True``:
45 .. runpython::
46 :showcode:
48 import numpy
49 from mlinsights.timeseries.base import BaseTimeSeries
50 from mlinsights.timeseries import build_ts_X_y
52 X = numpy.arange(10).reshape(5, 2)
53 y = numpy.arange(5) * 100
54 weights = numpy.arange(5) * 1000
55 bs = BaseTimeSeries(past=2, use_all_past=True)
56 nx, ny, nw = build_ts_X_y(bs, X, y, weights)
57 print('X=', X)
58 print('y=', y)
59 print('nx=', nx)
60 print('ny=', ny)
61 """
62 if not hasattr(model, "use_all_past") or not hasattr(model, "past"):
63 raise TypeError( # pragma: no cover
64 f"model must be of type BaseTimeSeries not {type(model)}")
65 if same_rows:
66 if model.use_all_past:
67 ncol = X.shape[1] if X is not None else 0
68 nrow = y.shape[0] - model.delay2 - model.past + 2
70 new_X = numpy.full(
71 (y.shape[0], ncol * model.past + model.past), numpy.nan, dtype=y.dtype)
72 first = y.shape[0] - nrow
73 if X is not None:
74 for i in range(0, model.past):
75 begin = i * ncol
76 end = begin + ncol
77 new_X[i:, begin:end] = X[i:]
78 for i in range(0, model.past):
79 end = y.shape[0] + i + model.delay1 - 1 - model.delay2
80 new_X[first - i:first - i + end - i,
81 i + ncol * model.past] = y[i: end]
83 new_y = numpy.full(
84 (y.shape[0], model.delay2 - model.delay1), numpy.nan, dtype=y.dtype)
85 for i in range(model.delay1, model.delay2):
86 new_y[first:, i - model.delay1] = y[i + 1:i + nrow + 1]
88 new_weights = weights
89 else:
90 ncol = X.shape[1] if X is not None else 0
91 nrow = y.shape[0] - model.delay2 - model.past + 2
92 first = y.shape[0] - nrow
94 new_X = numpy.full(
95 (y.shape[0], ncol + model.past), numpy.nan, dtype=y.dtype)
96 if X is not None:
97 new_X[first:, :X.shape[1]] = (
98 X[model.past - 1: X.shape[0] - model.delay2 + 1])
99 for i in range(model.past):
100 end = y.shape[0] + i + model.delay1 - \
101 1 - model.delay2 - model.past + 2
102 new_X[first:, i + ncol] = y[i: end]
104 new_y = numpy.full(
105 (y.shape[0], model.delay2 - model.delay1), numpy.nan, dtype=y.dtype)
106 for i in range(model.delay1, model.delay2):
107 dec = model.past - 1
108 new_y[first:, i - model.delay1] = y[i + dec:i + nrow + dec]
109 new_weights = weights
110 else:
111 if model.use_all_past:
112 ncol = X.shape[1] if X is not None else 0
113 nrow = y.shape[0] - model.delay2 - model.past + 2
115 new_X = numpy.empty(
116 (nrow, ncol * model.past + model.past), dtype=y.dtype)
117 if X is not None:
118 for i in range(0, model.past):
119 begin = i * ncol
120 end = begin + ncol
121 new_X[:, begin:end] = X[i: i + nrow]
122 for i in range(0, model.past):
123 end = y.shape[0] + i + model.delay1 - 1 - model.delay2
124 new_X[:, i + ncol * model.past] = y[i: end]
126 new_y = numpy.empty(
127 (nrow, model.delay2 - model.delay1), dtype=y.dtype)
128 for i in range(model.delay1, model.delay2):
129 new_y[:, i - model.delay1] = y[i + 1:i + nrow + 1]
131 new_weights = (None if weights is None
132 else weights[model.past - 1:model.past - 1 + nrow])
133 else:
134 ncol = X.shape[1] if X is not None else 0
135 nrow = y.shape[0] - model.delay2 - model.past + 2
137 new_X = numpy.empty((nrow, ncol + model.past), dtype=y.dtype)
138 if X is not None:
139 new_X[:, :X.shape[1]] = X[model.past -
140 1: X.shape[0] - model.delay2 + 1]
141 for i in range(model.past):
142 end = y.shape[0] + i + model.delay1 - \
143 1 - model.delay2 - model.past + 2
144 new_X[:, i + ncol] = y[i: end]
146 new_y = numpy.empty(
147 (nrow, model.delay2 - model.delay1), dtype=y.dtype)
148 for i in range(model.delay1, model.delay2):
149 dec = model.past - 1
150 new_y[:, i - model.delay1] = y[i + dec:i + nrow + dec]
151 new_weights = (None if weights is None
152 else weights[model.past - 1:model.past - 1 + nrow])
153 return new_X, new_y, new_weights
156def check_ts_X_y(model, X, y):
157 """
158 Checks that datasets *(X, y)* was built with function
159 @see fn build_ts_X_y.
160 """
161 cfg = get_config()
162 if cfg.get('assume_finite', True):
163 return # pragma: no cover
164 if X.dtype not in (numpy.float32, numpy.float64):
165 raise TypeError(
166 f"Features must be of type float32 and float64 not {X.dtype}.")
167 if y is not None and y.dtype not in (numpy.float32, numpy.float64):
168 raise TypeError( # pragma: no cover
169 f"Features must be of type float32 and float64 not {y.dtype}.")
170 cst = model.past
171 if (hasattr(model, 'preprocessing_') and model.preprocessing_ is not None):
172 cst += model.preprocessing_.context_length
173 if y is None:
174 if cst > 0:
175 raise AssertionError( # pragma: no cover
176 f"y must be specified to give the model past data to predict, "
177 f"it requires at least {cst} observations.")
178 return # pragma: no cover
179 if y.shape[0] != X.shape[0]:
180 raise AssertionError( # pragma: no cover
181 f"X and y must have the same number of rows {X.shape[0]} != {y.shape[0]}.")
182 if len(y.shape) > 1 and y.shape[1] != 1:
183 raise AssertionError( # pragma: no cover
184 f"y must be 1-dimensional not has shape {y.shape}.")
185 if y.shape[0] < cst:
186 raise AssertionError( # pragma: no cover
187 f"y is not enough past data to predict, "
188 f"it requires at least {cst} observations.")