Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Addition for :epkg:`pandas`. 

4""" 

5from itertools import chain 

6from typing import Sequence, Type 

7import numpy 

8from pandas import Series 

9from pandas.api.extensions import ( 

10 register_series_accessor, ExtensionDtype, register_extension_dtype) 

11from pandas.core.arrays.base import ExtensionArrayT 

12from pandas.arrays import PandasArray 

13from pandas.core.arrays.numpy_ import PandasDtype 

14from .weighted_number import WeightedDouble # pylint: disable=E0611 

15 

16 

17class WeightedSeriesDtype(ExtensionDtype): 

18 """ 

19 Defines a custom type for a @see cl WeightedSeries. 

20 """ 

21 

22 dtype = numpy.dtype(WeightedDouble) 

23 

24 def __str__(self): 

25 """ 

26 usual 

27 """ 

28 return self.name 

29 

30 @property 

31 def type(self): 

32 # type: () -> type 

33 """The scalar type for the array, e.g. ``int`` 

34 It's expected ``ExtensionArray[item]`` returns an instance 

35 of ``ExtensionDtype.type`` for scalar ``item``. 

36 """ 

37 return WeightedSeriesDtype 

38 

39 def __repr__(self): 

40 "usual" 

41 return "WeightedSeriesDtype()" 

42 

43 @property 

44 def kind(self): 

45 # type () -> str 

46 """ 

47 A character code (one of 'biufcmMOSUV'), default 'O' 

48 This should match the NumPy dtype used when the array is 

49 converted to an ndarray, 'O' in this case. 

50 type. 

51 

52 See Also 

53 -------- 

54 numpy.dtype.kind 

55 """ 

56 return WeightedSeriesDtype.dtype.kind 

57 

58 @property 

59 def name(self): 

60 """ 

61 A string identifying the data type. 

62 Will be used for display in, e.g. ``Series.dtype`` 

63 """ 

64 return "WeightedDouble" 

65 

66 @classmethod 

67 def construct_from_string(cls, string): 

68 """ 

69 Attempt to construct this type from a string. 

70 Parameters 

71 ---------- 

72 string : str 

73 

74 Returns 

75 ------- 

76 self : instance of 'WeightedDouble' 

77 

78 Raises 

79 ------ 

80 TypeError 

81 If a class cannot be constructed from this 'string'. 

82 """ 

83 if not string.startswith("WD"): # pragma no cover 

84 raise TypeError("Unable to parse '{0}'".format(string)) 

85 val = string[2:].strip('() ').split(",") 

86 if len(val) == 1 and val[0]: 

87 val = float(val[0]) 

88 elif len(val) == 2: 

89 val = float(val[0]), float(val[1]) 

90 elif len(val) == 0 or (len(val) == 1 and val[0] == ''): 

91 val = numpy.nan 

92 else: # pragma no cover 

93 raise TypeError("Unable to parse '{0}'".format(string)) 

94 if isinstance(val, tuple): 

95 if len(val) != 2: # pragma no cover 

96 raise TypeError("Unable to parse '{0}'".format(string)) 

97 return WeightedDouble(val[0], val[1]) 

98 return WeightedDouble(val) 

99 

100 @classmethod 

101 def construct_array_type(cls): 

102 """ 

103 Return the array type associated with this dtype. 

104 

105 Returns 

106 ------- 

107 type 

108 """ 

109 return WeightedArray 

110 

111 

112register_extension_dtype(WeightedSeriesDtype) 

113 

114 

115@register_series_accessor("wdouble") 

116class WeightedDoubleAccessor: 

117 """ 

118 Extends :epkg:`pandas` with new accessor for 

119 series based on @see cl WeightedDouble. 

120 """ 

121 

122 def __init__(self, obj): 

123 self.obj = obj 

124 

125 def __len__(self): 

126 return len(self.obj) 

127 

128 @property 

129 def value(self): 

130 "Returns the values." 

131 return self._new_series(lambda s: s.value) 

132 

133 @property 

134 def weight(self): 

135 "Returns the weights." 

136 return self._new_series(lambda s: s.weight) 

137 

138 def isnan(self): 

139 "Tells if values are missing." 

140 return self._new_series(lambda s: numpy.isnan(s.value)) 

141 

142 def _new_series(self, fct): 

143 if len(self) == 0: # pragma no cover 

144 raise ValueError("Series cannot be empty.") 

145 if isinstance(self.obj, WeightedArray) or isinstance(self.obj[0], WeightedDouble): 

146 return WeightedArray([fct(s) for s in self.obj], 

147 index=self.obj.index, dtype=float) 

148 raise TypeError( # pragma no cover 

149 "Unexpected type, array is '{0}', first element is '{1}'".format( 

150 type(self.obj), type(self.obj[0]))) 

151 

152 

153class WeightedSeries(Series): 

154 """ 

155 Implements a series holding @see WeightedDouble numbers. 

156 Does not add anything to *Series*. 

157 """ 

158 

159 def __init__(self, *args, **kwargs): 

160 """ 

161 Overwrites the constructor to force 

162 dtype to be @see cl WeightedSeriesDtype. 

163 """ 

164 dt = kwargs.pop('dtype', WeightedSeriesDtype()) 

165 Series.__init__(self, *args, dtype=dt, **kwargs) 

166 

167 def __getattr__(self, attr): 

168 """ 

169 Tries first to see if class *Series* has this attribute 

170 and then tries @see cl WeightedDoubleAccessor. 

171 """ 

172 if hasattr(Series, attr): 

173 return getattr(self, attr) 

174 if hasattr(WeightedDoubleAccessor, attr): 

175 obj = WeightedDoubleAccessor(self) 

176 return getattr(obj, attr) 

177 if attr == '_ndarray': 

178 return numpy.array(self) 

179 raise AttributeError("Unkown attribute '{0}'".format(attr)) 

180 

181 

182class WeightedArray(PandasArray): 

183 """ 

184 Implements an array holding @see WeightedDouble numbers. 

185 This leverages a new concept introduced in :epkg:`pandas` 0.24 

186 implemented in class :epkg:`PandasArray`. It can be used 

187 to define a new column type in a dataframe. 

188 """ 

189 

190 def __init__(self, *args, **kwargs): 

191 """ 

192 Overwrites the constructor to force 

193 *dtype* to be @see cl WeightedSeriesDtype. 

194 """ 

195 if "data" in kwargs and isinstance(kwargs["data"], WeightedSeries): 

196 serie = kwargs["data"] 

197 elif len(args) == 1 and isinstance(args[0], numpy.ndarray): 

198 PandasArray.__init__(self, args[0]) 

199 else: 

200 serie = WeightedSeries(*args, **kwargs) 

201 PandasArray.__init__(self, serie._ndarray) 

202 

203 @property 

204 def dtype(self): 

205 """ 

206 Returns @see cl WeightedSeriesDtype. 

207 """ 

208 return self._dtype 

209 

210 @property 

211 def name(self): 

212 """ 

213 A string identifying the data type. 

214 Will be used for display in, e.g. ``Series.dtype`` 

215 """ 

216 return "WeightedArray" 

217 

218 def __add__(self, other): 

219 "Addition" 

220 return WeightedArray([a + b for a, b in zip(self, other)]) 

221 

222 def __sub__(self, other): 

223 "Soustraction" 

224 return WeightedArray([a - b for a, b in zip(self, other)]) 

225 

226 def __mul__(self, other): 

227 "Multiplication" 

228 return WeightedArray([a * b for a, b in zip(self, other)]) 

229 

230 def __truediv__(self, other): 

231 "Division" 

232 return WeightedArray([a / b for a, b in zip(self, other)]) 

233 

234 def isna(self): 

235 "is nan?" 

236 return numpy.array([numpy.isnan(s.value) for s in self]) 

237 

238 @classmethod 

239 def _concat_same_type(cls: Type[ExtensionArrayT], # pylint: disable=W0221 

240 to_concat: Sequence[ExtensionArrayT]) -> ExtensionArrayT: 

241 """Concatenate multiple array 

242 

243 Parameters 

244 ---------- 

245 to_concat : sequence of this type 

246 

247 Returns 

248 ------- 

249 @see cl WeightedArray 

250 """ 

251 for s in to_concat: 

252 if not isinstance(s.dtype, (WeightedSeriesDtype, object)): 

253 raise TypeError( # pragma no cover 

254 "All arrays must be of type WeightedSeriesDtype not {}-{}".format( 

255 type(s), type(s.dtype))) 

256 return WeightedArray(list(chain(*to_concat))) 

257 

258 @classmethod 

259 def _from_sequence(cls, scalars, *, dtype=None, copy=False): 

260 if isinstance(dtype, PandasDtype): 

261 dtype = dtype._dtype 

262 

263 result = numpy.asarray(scalars, dtype=dtype) 

264 if copy and result is scalars: 

265 result = result.copy() 

266 return cls(result)