Coverage for mlinsights/timeseries/patterns.py: 100%
49 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Find patterns in timeseries.
4"""
5import numpy
6import pandas
7from sklearn.cluster import KMeans
8from .agg import aggregate_timeseries
11def find_ts_group_pattern(ttime, values, names, name_subset=None,
12 per='week', unit='half-hour', agg='sum',
13 estimator=None, fLOG=None):
14 """
15 Clusters times series to find similar patterns.
17 @param ttime time column
18 @param values features to use to cluster
19 @param names column which holds group name
20 @param name_subset subset of groups to study, None for all
21 @param per aggragation per week
22 @param estimator estimator used to find pattern,
23 :epkg:`sklearn:cluster:KMeans` and
24 10 groups
25 @param fLOG logging function
26 @return found clusters, distances
27 """
28 for var, na in zip([ttime, values, names], ['ttime', 'values', 'names']):
29 if not isinstance(var, numpy.ndarray):
30 raise TypeError(f"'{na}' must an array not {type(var)}")
31 # builds features
32 set_names = set(names)
33 if name_subset is not None:
34 set_names &= set(name_subset)
35 if fLOG:
36 fLOG( # pragma: no cover
37 f'[find_ts_group_pattern] build features, {len(set_names)} groups')
38 gr_names = []
39 to_merge = []
40 for name in set_names:
41 indices = names == name
42 gr_ttime = ttime[indices]
43 gr_values = values[indices]
44 gr = aggregate_timeseries(None, gr_ttime, gr_values,
45 unit=unit, agg=agg, per=per)
46 gr.set_index(gr.columns[0], inplace=True)
47 gr_names.append(name)
48 to_merge.append(gr)
50 if fLOG:
51 fLOG( # pragma: no cover
52 '[find_ts_group_pattern] merge features')
53 all_merged = pandas.concat(to_merge, axis=1)
54 all_merged.fillna(0, inplace=True)
55 ncol = all_merged.shape[1] // len(gr_names)
56 gr_feats = []
57 for i, name in enumerate(gr_names):
58 feats = all_merged.iloc[:, i * ncol: (i + 1) * ncol].values.ravel()
59 gr_feats.append(feats)
61 gr_feats = numpy.vstack(gr_feats)
63 # cluster
64 if fLOG:
65 fLOG( # pragma: no cover
66 f'[find_ts_group_pattern] clustering, shape={gr_feats.shape}')
67 if estimator is None:
68 estimator = KMeans()
69 estimator.fit(gr_feats)
71 # predicted clusters
72 pred = estimator.predict(gr_feats)
73 dist = estimator.transform(gr_feats)
74 if fLOG:
75 fLOG( # pragma: no cover
76 f'[find_ts_group_pattern] number of clusters: {len(set(pred))}')
78 row_name = {n: i for i, n in enumerate(gr_names)}
79 clusters = numpy.empty(ttime.shape[0], dtype=pred.dtype)
80 dists = numpy.empty((ttime.shape[0], dist.shape[1]), dtype=dist.dtype)
82 for i in range(ttime.shape[0]):
83 if names[i] in row_name:
84 index = row_name[names[i]]
85 clusters[i] = pred[index]
86 dists[i, :] = dist[index, :]
87 else:
88 clusters[i] = -1
89 dists[i, :] = numpy.nan
91 return clusters, dists