Coverage for src/ensae_teaching_cs/helpers/code_helper.py: 70%

27 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-01-27 05:44 +0100

1""" 

2@file 

3@brief Helpers about code. 

4""" 

5import os 

6import re 

7from pyquickhelper.filehelper import explore_folder_iterfile 

8 

9 

10def enumerate_inspect_source_code(folder, file_pattern=".*[.]((py)|(ipynb))$", 

11 neg_pattern=".*(([-]checkpoint)|(_todo)|(_temp)).*", 

12 line_patterns="from sklearn[_0-9a-zA-Z.]* import ([_a-zA-Z0-9]+);;import sklearn[.]([_a-z]+)", 

13 fullname=False): 

14 """ 

15 Counts groups extracted from source file. We assume all selected files 

16 can be opened as text files encoded in :epkg:`utf-8` character set. 

17 

18 @param folder folder to dig into 

19 @param file_pattern files to consider 

20 @param neg_pattern negative patterns for filenames 

21 @param line_patterns patterns to look into, separated by ``;;`` 

22 @param fullname if True, include the subfolder while checking the regex 

23 @return list of dictionaries 

24 """ 

25 regs = [re.compile(reg) for reg in line_patterns.split(';;')] 

26 nb = 0 

27 for name in explore_folder_iterfile(folder, pattern=file_pattern, 

28 neg_pattern=neg_pattern, fullname=fullname): 

29 nb += 1 

30 try: 

31 with open(name, "r", encoding="utf-8", errors='ignore') as f: 

32 for li, line in enumerate(f): 

33 for pi, reg in enumerate(regs): 

34 r = reg.search(line) 

35 if r: 

36 for g in r.groups(): 

37 obs = dict(group=g, name=name, line=li) 

38 obs['patid'] = pi 

39 yield obs 

40 except UnicodeDecodeError as e: 

41 raise FileNotFoundError( 

42 f"Unable to process '{name}' due to '{e}'.") from e 

43 if nb == 0: 

44 found = os.listdir(folder) 

45 founds = "\n".join(found) if found else "EMPTY" 

46 pos_found = list(explore_folder_iterfile( 

47 folder, pattern=file_pattern, fullname=fullname)) 

48 pos_founds = "\n".join(pos_found) if pos_found else "EMPTY" 

49 mes = "No file found in folder '{0}' with pattern '{1}' (neg='{2}')\n--IN--\n{3}\n--IN--\n{4}" 

50 raise FileNotFoundError(mes.format( 

51 folder, file_pattern, neg_pattern, founds, pos_founds))