Coverage for aftercovid/data/temperatures.py: 100%

35 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-04-19 03:10 +0200

1""" 

2Loads data about temperatures. 

3""" 

4import os 

5import numpy 

6import pandas 

7 

8 

9def load_temperatures(country='France'): 

10 """ 

11 Loads a dataframe containing temperatures. 

12 :param name: picture name 

13 

14 Source: 

15 

16 * `temperature_france.xlsx`: 

17 `meteociel <https://www.meteociel.fr/climatologie/obs_villes.php? 

18 code2=75107005&mois=11&annee=2020>`_ 

19 """ 

20 this = os.path.abspath(os.path.dirname(__file__)) 

21 filename = os.path.join(this, f"temperature_2020_{country.lower()}.xlsx") 

22 if not os.path.exists(filename): 

23 raise ValueError( 

24 f"Unable to load data for country {country!r}.") 

25 

26 def to_float(val, c, cls=float): 

27 if val == '---': 

28 return numpy.nan 

29 if isinstance(val, (str, numpy.str_)): 

30 return cls(val.split()[c]) 

31 return val 

32 

33 def _process(df, month): 

34 columns = [_ for _ in df.columns if 'Unnamed' not in _] 

35 if len(columns) != 5: 

36 raise ValueError( # pragma: no cover 

37 f"Unexpected number of columns {df.columns!r} " 

38 f"for month {month!r}.") 

39 

40 df = df[columns] 

41 df.columns = ["day", "tmax", "tmin", "rain", "sun"] 

42 df['day'] = df['day'].apply(lambda c: to_float(c, -1, int)) 

43 df['tmax'] = df['tmax'].apply(lambda c: to_float(c, 0)) 

44 df['tmin'] = df['tmin'].apply(lambda c: to_float(c, 0)) 

45 return df 

46 

47 dfs = [] 

48 for month in range(1, 13): 

49 sheet = "%02d" % month 

50 df = pandas.read_excel( 

51 filename, sheet_name=sheet, header=1, engine="openpyxl") 

52 if df.shape[0] == 0: 

53 continue # pragma: no cover 

54 df = _process(df, month) 

55 df['month'] = month 

56 df['year'] = 2020 

57 dfs.append(df) 

58 res = pandas.concat(dfs) 

59 res = res[(~res['tmin'].isna()) & (~res['day'].isna())].copy() 

60 return res