Coverage for src/ensae_teaching_cs/data/data_helper.py: 72%

25 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-01-27 05:44 +0100

1""" 

2@file 

3@brief Helpers to get data including in the module itself. 

4""" 

5import os 

6from pyquickhelper.filehelper import unzip_files 

7 

8 

9def any_local_file(name, subfolder, local=True, cache_folder=".", 

10 filename=True, unzip=False, encoding=None): 

11 """ 

12 Returns a local data file, reads its content or returns its content. 

13 

14 @param name file to download 

15 @param subfolder sub folder 

16 @param local local data or web 

17 @param cache_folder where to cache the data if downloaded a second time 

18 @param filename return the filename (True) or the content (False) 

19 @param unzip unzip as well 

20 @param encoding encoding 

21 @return text content (str) 

22 """ 

23 if local: 

24 this = os.path.abspath(os.path.dirname(__file__)) 

25 this = os.path.join(this, subfolder, name) 

26 if not os.path.exists(this): 

27 raise FileNotFoundError(this) 

28 else: 

29 import pyensae.datasource 

30 if not unzip and name.endswith(".zip"): 

31 raise ValueError( 

32 f"The file will be unzipped anyway: {name}") 

33 this = pyensae.datasource.download_data(name, whereTo=cache_folder) 

34 unzip = False 

35 if unzip: 

36 this = unzip_files(this, where_to=cache_folder) 

37 if filename: 

38 return this 

39 else: 

40 if isinstance(this, list): 

41 if len(this) > 1: 

42 raise ValueError( 

43 f"more than one file for: {name}\n{this}") 

44 else: 

45 this = this[0] 

46 if os.path.splitext(this)[-1] in (".zip", ".gz", ".tar", ".7z"): 

47 raise ValueError(f"Cannot read file as text: {this}") 

48 with open(this, "r", encoding=encoding) as f: 

49 return f.read()