Coverage for aftercovid/data/pandas_cache.py: 100%

16 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2024-04-18 03:09 +0200

1""" 

2Caches a file updated every day. 

3""" 

4import os 

5from datetime import datetime 

6from urllib.error import HTTPError 

7import pandas 

8 

9 

10def read_csv_cache(cache, url, **kwargs): 

11 """ 

12 Checks that the data is not cached before loading it 

13 again. 

14 

15 :param cache: filename 

16 :param url: data url 

17 :param kwargs: see :epkg:`pandas:read_csv` 

18 :return: see :epkg:`pandas:read_csv` 

19 """ 

20 now = datetime.now() 

21 ext = "%s-%04d-%02d-%02d.csv" % (cache, now.year, now.month, now.day) 

22 if os.path.exists(ext): 

23 return pandas.read_csv(ext, **kwargs) 

24 df = pandas.read_csv(url, **kwargs) # pragma: no cover 

25 df.to_csv(ext, sep=kwargs.get('sep', ','), index=False) # pragma: no cover 

26 return df # pragma: no cover 

27 

28 

29def geo_read_csv_cache(cache, url, backup=None, **kwargs): 

30 """ 

31 Checks that the data is not cached before loading it 

32 again. 

33 

34 :param cache: filename 

35 :param url: data url 

36 :param backup: backup file (geojson), 

37 used when the connection has failed 

38 :param kwargs: see :epkg:`pandas:read_csv` 

39 :return: see :epkg:`pandas:read_csv` 

40 """ 

41 import geopandas 

42 now = datetime.now() 

43 ext = "%s-%04d-%02d-%02d.geojson" % (cache, now.year, now.month, now.day) 

44 if os.path.exists(ext): 

45 with open(ext, 'r', encoding='utf-8'): 

46 return geopandas.read_file(ext, **kwargs) 

47 try: # pragma: no cover 

48 df = geopandas.read_file(url, **kwargs) # pragma: no cover 

49 except HTTPError as e: # pragma: no cover 

50 if backup is None: 

51 raise e 

52 # use a backup in case the connection failed. 

53 df = geopandas.read_file(backup, **kwargs) 

54 with open(ext, 'w', encoding='utf-8') as f: # pragma: no cover 

55 f.write(df.to_json(), **kwargs) 

56 return df # pragma: no cover