 r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""

3"""

4import numpy

7def ts_remove_decreasing_values(series):

8 """

9 Returns a series with no decreasing values

10 (only growing). Data are sometimes normalized and

11 show negative values but the past remains unchanged.

12 This functions decreases past values until the series

13 is growing.

15 :param series: series

16 :return: new series

17 """

18 def normalize(series, index):

19 origin = series

20 diff = series[1:] - series[:-1]

21 delta = series[index - 1] - series[index]

22 h = int(float(delta) / index) + 1

23 delta += h

24 delta_ = delta + 1

25 while delta > 0 and delta < delta_:

26 delta_ = delta

27 pos = index - 2

28 while pos > 0 and delta > 0:

29 if diff[pos] > h:

30 d = min(h, delta)

31 diff[pos] -= d

32 delta -= d

33 elif diff[pos] > 1:

34 diff[pos] -= 1

35 delta -= 1

36 pos -= 1

37 diff[index - 1] = h

38 series[1:] = origin + diff.cumsum()

40 if series.dtype in (numpy.int64, numpy.int32):

41 if hasattr(series, 'values'):

42 values = series.values.copy()

43 else:

44 values = series.copy()

45 points = []

46 for i in range(1, len(values)):

47 if values[i] < values[i - 1]:

48 points.append(i)

49 for p in reversed(points):

50 normalize(values, p)

51 return values

53 raise NotImplementedError(

54 "Not implemented for real types.")

57def ts_moving_average(series, n=7, center=True):

58 """

59 Computes the moving average of a differential series.

60 The function handles nan as well. The outputs

61 does not contain any nan unless there are too many

62 consecutive nans.

64 :param series: timeseries

65 :param n: window

66 :param center: centered average

67 :return: moving average (of same size)

68 """

69 if hasattr(series, 'values'):

70 cls = series.__class__

71 columns = getattr(series, 'columns', None)

72 name = getattr(series, 'name', None)

73 index = series.index

74 series = series.values

75 as_df = True

76 else:

77 as_df = False

78 cls = None

80 if center and n % 2 != 1:

81 raise ValueError("If center is True, n should be odd.")

83 dtype = numpy.float64 if series.dtype != numpy.float32 else numpy.float32

85 series = series.astype(dtype)

86 weights = numpy.ones(series.shape, dtype=dtype)

87 isna = numpy.isnan(series)

88 weights[isna] = 0

89 series[isna] = 0

91 ret = numpy.cumsum(series.astype(dtype), axis=0)

92 wet = numpy.cumsum(weights.astype(dtype), axis=0)

93 res = numpy.zeros(ret.shape, dtype)

94 if center:

95 d = n // 2

96 res[d + 1:-d] = (ret[n:] - ret[:-n]) / (wet[n:] - wet[:-n])

97 for i in range(0, d + 1):

98 res[i] = numpy.divide(ret[i + d - 1], wet[i + d - 1])

99 res[-i - 1] = numpy.divide(ret[-1] - ret[-(i + d) - 1],

100 wet[-1] - wet[-(i + d) - 1])

101 else:

102 res[n:] = (ret[n:] - ret[:-n]) / (wet[n:] - wet[:-n])

103 for i in range(0, n):

104 res[i] = numpy.divide(ret[i], wet[i])

106 if as_df:

107 if columns is not None:

108 return cls(series, columns=columns, index=index)

109 if name is not None:

110 return cls(series, name=name, index=index)

111 return res

114def ts_normalise_negative_values(series, n=7, extreme=4):

115 """

116 *series* is a differential series which should not

117 have any negative values. The function removes

118 unexpected high value and negative value. These extremes

119 are replaced by a local average.

120 The function handles nan as well. The outputs

121 does not contain any nan unless there are too many

122 consecutive nans.

124 :param series: differential values

125 :param n: moving average

126 :param extreme: removes extreme values,

127 if the series is higher or lower than its moverage * th or / th

128 :return: corrected series

129 """

130 if hasattr(series, 'values'):

131 cls = series.__class__

132 columns = getattr(series, 'columns', None)

133 name = getattr(series, 'name', None)

134 index = series.index

135 series = series.values

136 as_df = True

137 else:

138 as_df = False

140 mov = ts_moving_average(series, n=n, center=True)

141 series = series.astype(mov.dtype)

142 isna = numpy.isnan(series)

143 series_raw = series

144 series = series.copy().astype(mov.dtype)

145 series[isna] = 0

146 total = numpy.sum(series, axis=0)

147 rep = (numpy.isnan(series_raw) | (series < 0) |

148 (mov / extreme > series) | (mov * extreme < series))

149 series[rep] = mov[rep]

150 nonan = series.copy()

151 isna = numpy.isnan(nonan)

152 nonan[isna] = 0

153 series = numpy.maximum(series, 0)

154 new_total = numpy.sum(nonan, axis=0)

155 series *= total / new_total

156 if as_df:

157 if columns is not None:

158 return cls(series, columns=columns, index=index)

159 if name is not None:

160 return cls(series, name=name, index=index)

161 return series