Coverage for aftercovid/preprocess/ts.py: 98%
101 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-03-28 03:09 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2024-03-28 03:09 +0100
1"""
2Preprocesses timeseries about COVID.
3"""
4import numpy
7def ts_remove_decreasing_values(series):
8 """
9 Returns a series with no decreasing values
10 (only growing). Data are sometimes normalized and
11 show negative values but the past remains unchanged.
12 This functions decreases past values until the series
13 is growing.
15 :param series: series
16 :return: new series
17 """
18 def normalize(series, index):
19 origin = series[0]
20 diff = series[1:] - series[:-1]
21 delta = series[index - 1] - series[index]
22 h = int(float(delta) / index) + 1
23 delta += h
24 delta_ = delta + 1
25 while delta > 0 and delta < delta_:
26 delta_ = delta
27 pos = index - 2
28 while pos > 0 and delta > 0:
29 if diff[pos] > h:
30 d = min(h, delta)
31 diff[pos] -= d
32 delta -= d
33 elif diff[pos] > 1:
34 diff[pos] -= 1
35 delta -= 1
36 pos -= 1
37 diff[index - 1] = h
38 series[1:] = origin + diff.cumsum()
40 if series.dtype in (numpy.int64, numpy.int32):
41 if hasattr(series, 'values'):
42 values = series.values.copy()
43 else:
44 values = series.copy()
45 points = []
46 for i in range(1, len(values)):
47 if values[i] < values[i - 1]:
48 points.append(i)
49 for p in reversed(points):
50 normalize(values, p)
51 return values
53 raise NotImplementedError(
54 "Not implemented for real types.")
57def ts_moving_average(series, n=7, center=True):
58 """
59 Computes the moving average of a differential series.
60 The function handles nan as well. The outputs
61 does not contain any nan unless there are too many
62 consecutive nans.
64 :param series: timeseries
65 :param n: window
66 :param center: centered average
67 :return: moving average (of same size)
68 """
69 if hasattr(series, 'values'):
70 cls = series.__class__
71 columns = getattr(series, 'columns', None)
72 name = getattr(series, 'name', None)
73 index = series.index
74 series = series.values
75 as_df = True
76 else:
77 as_df = False
78 cls = None
80 if center and n % 2 != 1:
81 raise ValueError("If center is True, n should be odd.")
83 dtype = numpy.float64 if series.dtype != numpy.float32 else numpy.float32
85 series = series.astype(dtype)
86 weights = numpy.ones(series.shape, dtype=dtype)
87 isna = numpy.isnan(series)
88 weights[isna] = 0
89 series[isna] = 0
91 ret = numpy.cumsum(series.astype(dtype), axis=0)
92 wet = numpy.cumsum(weights.astype(dtype), axis=0)
93 res = numpy.zeros(ret.shape, dtype)
94 if center:
95 d = n // 2
96 res[d + 1:-d] = (ret[n:] - ret[:-n]) / (wet[n:] - wet[:-n])
97 for i in range(0, d + 1):
98 res[i] = numpy.divide(ret[i + d - 1], wet[i + d - 1])
99 res[-i - 1] = numpy.divide(ret[-1] - ret[-(i + d) - 1],
100 wet[-1] - wet[-(i + d) - 1])
101 else:
102 res[n:] = (ret[n:] - ret[:-n]) / (wet[n:] - wet[:-n])
103 for i in range(0, n):
104 res[i] = numpy.divide(ret[i], wet[i])
106 if as_df:
107 if columns is not None:
108 return cls(series, columns=columns, index=index)
109 if name is not None:
110 return cls(series, name=name, index=index)
111 return res
114def ts_normalise_negative_values(series, n=7, extreme=4):
115 """
116 *series* is a differential series which should not
117 have any negative values. The function removes
118 unexpected high value and negative value. These extremes
119 are replaced by a local average.
120 The function handles nan as well. The outputs
121 does not contain any nan unless there are too many
122 consecutive nans.
124 :param series: differential values
125 :param n: moving average
126 :param extreme: removes extreme values,
127 if the series is higher or lower than its moverage * th or / th
128 :return: corrected series
129 """
130 if hasattr(series, 'values'):
131 cls = series.__class__
132 columns = getattr(series, 'columns', None)
133 name = getattr(series, 'name', None)
134 index = series.index
135 series = series.values
136 as_df = True
137 else:
138 as_df = False
140 mov = ts_moving_average(series, n=n, center=True)
141 series = series.astype(mov.dtype)
142 isna = numpy.isnan(series)
143 series_raw = series
144 series = series.copy().astype(mov.dtype)
145 series[isna] = 0
146 total = numpy.sum(series, axis=0)
147 rep = (numpy.isnan(series_raw) | (series < 0) |
148 (mov / extreme > series) | (mov * extreme < series))
149 series[rep] = mov[rep]
150 nonan = series.copy()
151 isna = numpy.isnan(nonan)
152 nonan[isna] = 0
153 series = numpy.maximum(series, 0)
154 new_total = numpy.sum(nonan, axis=0)
155 series *= total / new_total
156 if as_df:
157 if columns is not None:
158 return cls(series, columns=columns, index=index)
159 if name is not None:
160 return cls(series, name=name, index=index)
161 return series