Coverage for mlinsights/timeseries/datasets.py: 100%
27 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Datasets for timeseries.
4"""
5import datetime
6import numpy
7import pandas
10def artificial_data(dt1, dt2, minutes=1):
11 """
12 Generates articial data every minutes.
14 @param dt1 first date
15 @param dt2 second date
16 @param minutes interval between two observations
17 @return dataframe
19 .. runpython::
20 :showcode:
22 import datetime
23 from mlinsights.timeseries.datasets import artificial_data
25 now = datetime.datetime.now()
26 data = artificial_data(now - datetime.timedelta(40), now)
27 print(data.head())
28 """
30 def fxweek(x):
31 return 2 - x * (1 - x)
33 def sat(x):
34 return 2 * x + 2
36 data = []
37 dt = datetime.timedelta(minutes=minutes)
38 while dt1 < dt2:
39 if dt1.weekday() == 6:
40 dt1 += dt
41 continue
42 if minutes <= 120 and not (dt1.hour >= 8 and dt1.hour <= 18):
43 dt1 += dt
44 continue
45 x = (dt1.hour - 8) / 10
46 if dt1.weekday() == 5:
47 y = sat(x)
48 else:
49 y = fxweek(x)
50 data.append({'time': dt1, 'y': y})
51 dt1 += dt
52 df = pandas.DataFrame(data)
53 df['y'] += numpy.random.randn(df.shape[0]) * 0.1
54 df['time'] = pandas.DatetimeIndex(df['time'])
55 return df