Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Addition for :epkg:`pandas`.
4"""
5from itertools import chain
6from typing import Sequence, Type
7import numpy
8from pandas import Series
9from pandas.api.extensions import (
10 register_series_accessor, ExtensionDtype, register_extension_dtype)
11from pandas.core.arrays.base import ExtensionArrayT
12from pandas.arrays import PandasArray
13from pandas.core.arrays.numpy_ import PandasDtype
14from .weighted_number import WeightedDouble # pylint: disable=E0611
17class WeightedSeriesDtype(ExtensionDtype):
18 """
19 Defines a custom type for a @see cl WeightedSeries.
20 """
22 dtype = numpy.dtype(WeightedDouble)
24 def __str__(self):
25 """
26 usual
27 """
28 return self.name
30 @property
31 def type(self):
32 # type: () -> type
33 """The scalar type for the array, e.g. ``int``
34 It's expected ``ExtensionArray[item]`` returns an instance
35 of ``ExtensionDtype.type`` for scalar ``item``.
36 """
37 return WeightedSeriesDtype
39 def __repr__(self):
40 "usual"
41 return "WeightedSeriesDtype()"
43 @property
44 def kind(self):
45 # type () -> str
46 """
47 A character code (one of 'biufcmMOSUV'), default 'O'
48 This should match the NumPy dtype used when the array is
49 converted to an ndarray, 'O' in this case.
50 type.
52 See Also
53 --------
54 numpy.dtype.kind
55 """
56 return WeightedSeriesDtype.dtype.kind
58 @property
59 def name(self):
60 """
61 A string identifying the data type.
62 Will be used for display in, e.g. ``Series.dtype``
63 """
64 return "WeightedDouble"
66 @classmethod
67 def construct_from_string(cls, string):
68 """
69 Attempt to construct this type from a string.
70 Parameters
71 ----------
72 string : str
74 Returns
75 -------
76 self : instance of 'WeightedDouble'
78 Raises
79 ------
80 TypeError
81 If a class cannot be constructed from this 'string'.
82 """
83 if not string.startswith("WD"): # pragma no cover
84 raise TypeError("Unable to parse '{0}'".format(string))
85 val = string[2:].strip('() ').split(",")
86 if len(val) == 1 and val[0]:
87 val = float(val[0])
88 elif len(val) == 2:
89 val = float(val[0]), float(val[1])
90 elif len(val) == 0 or (len(val) == 1 and val[0] == ''):
91 val = numpy.nan
92 else: # pragma no cover
93 raise TypeError("Unable to parse '{0}'".format(string))
94 if isinstance(val, tuple):
95 if len(val) != 2: # pragma no cover
96 raise TypeError("Unable to parse '{0}'".format(string))
97 return WeightedDouble(val[0], val[1])
98 return WeightedDouble(val)
100 @classmethod
101 def construct_array_type(cls):
102 """
103 Return the array type associated with this dtype.
105 Returns
106 -------
107 type
108 """
109 return WeightedArray
112register_extension_dtype(WeightedSeriesDtype)
115@register_series_accessor("wdouble")
116class WeightedDoubleAccessor:
117 """
118 Extends :epkg:`pandas` with new accessor for
119 series based on @see cl WeightedDouble.
120 """
122 def __init__(self, obj):
123 self.obj = obj
125 def __len__(self):
126 return len(self.obj)
128 @property
129 def value(self):
130 "Returns the values."
131 return self._new_series(lambda s: s.value)
133 @property
134 def weight(self):
135 "Returns the weights."
136 return self._new_series(lambda s: s.weight)
138 def isnan(self):
139 "Tells if values are missing."
140 return self._new_series(lambda s: numpy.isnan(s.value))
142 def _new_series(self, fct):
143 if len(self) == 0: # pragma no cover
144 raise ValueError("Series cannot be empty.")
145 if isinstance(self.obj, WeightedArray) or isinstance(self.obj[0], WeightedDouble):
146 return WeightedArray([fct(s) for s in self.obj],
147 index=self.obj.index, dtype=float)
148 raise TypeError( # pragma no cover
149 "Unexpected type, array is '{0}', first element is '{1}'".format(
150 type(self.obj), type(self.obj[0])))
153class WeightedSeries(Series):
154 """
155 Implements a series holding @see WeightedDouble numbers.
156 Does not add anything to *Series*.
157 """
159 def __init__(self, *args, **kwargs):
160 """
161 Overwrites the constructor to force
162 dtype to be @see cl WeightedSeriesDtype.
163 """
164 dt = kwargs.pop('dtype', WeightedSeriesDtype())
165 Series.__init__(self, *args, dtype=dt, **kwargs)
167 def __getattr__(self, attr):
168 """
169 Tries first to see if class *Series* has this attribute
170 and then tries @see cl WeightedDoubleAccessor.
171 """
172 if hasattr(Series, attr):
173 return getattr(self, attr)
174 if hasattr(WeightedDoubleAccessor, attr):
175 obj = WeightedDoubleAccessor(self)
176 return getattr(obj, attr)
177 if attr == '_ndarray':
178 return numpy.array(self)
179 raise AttributeError("Unkown attribute '{0}'".format(attr))
182class WeightedArray(PandasArray):
183 """
184 Implements an array holding @see WeightedDouble numbers.
185 This leverages a new concept introduced in :epkg:`pandas` 0.24
186 implemented in class :epkg:`PandasArray`. It can be used
187 to define a new column type in a dataframe.
188 """
190 def __init__(self, *args, **kwargs):
191 """
192 Overwrites the constructor to force
193 *dtype* to be @see cl WeightedSeriesDtype.
194 """
195 if "data" in kwargs and isinstance(kwargs["data"], WeightedSeries):
196 serie = kwargs["data"]
197 elif len(args) == 1 and isinstance(args[0], numpy.ndarray):
198 PandasArray.__init__(self, args[0])
199 else:
200 serie = WeightedSeries(*args, **kwargs)
201 PandasArray.__init__(self, serie._ndarray)
203 @property
204 def dtype(self):
205 """
206 Returns @see cl WeightedSeriesDtype.
207 """
208 return self._dtype
210 @property
211 def name(self):
212 """
213 A string identifying the data type.
214 Will be used for display in, e.g. ``Series.dtype``
215 """
216 return "WeightedArray"
218 def __add__(self, other):
219 "Addition"
220 return WeightedArray([a + b for a, b in zip(self, other)])
222 def __sub__(self, other):
223 "Soustraction"
224 return WeightedArray([a - b for a, b in zip(self, other)])
226 def __mul__(self, other):
227 "Multiplication"
228 return WeightedArray([a * b for a, b in zip(self, other)])
230 def __truediv__(self, other):
231 "Division"
232 return WeightedArray([a / b for a, b in zip(self, other)])
234 def isna(self):
235 "is nan?"
236 return numpy.array([numpy.isnan(s.value) for s in self])
238 @classmethod
239 def _concat_same_type(cls: Type[ExtensionArrayT], # pylint: disable=W0221
240 to_concat: Sequence[ExtensionArrayT]) -> ExtensionArrayT:
241 """Concatenate multiple array
243 Parameters
244 ----------
245 to_concat : sequence of this type
247 Returns
248 -------
249 @see cl WeightedArray
250 """
251 for s in to_concat:
252 if not isinstance(s.dtype, (WeightedSeriesDtype, object)):
253 raise TypeError( # pragma no cover
254 "All arrays must be of type WeightedSeriesDtype not {}-{}".format(
255 type(s), type(s.dtype)))
256 return WeightedArray(list(chain(*to_concat)))
258 @classmethod
259 def _from_sequence(cls, scalars, *, dtype=None, copy=False):
260 if isinstance(dtype, PandasDtype):
261 dtype = dtype._dtype
263 result = numpy.asarray(scalars, dtype=dtype)
264 if copy and result is scalars:
265 result = result.copy()
266 return cls(result)