Coverage for src/ensae_teaching_cs/homeblog/filename_helper.py: 74%

84 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-04-28 06:23 +0200

1""" 

2@file 

3@brief Helpers around file names. 

4""" 

5import os 

6import re 

7from pyquickhelper.loghelper import noLOG 

8from pyquickhelper.filehelper import explore_folder 

9 

10 

11def get_file_per_folder(folder, deep=1): 

12 """ 

13 extract all folders in a folder and then all files in these folders 

14 

15 @param folder folder 

16 @param deep number of folders to considers before the filename 

17 @return dictionary 

18 """ 

19 files = explore_folder(folder)[1] 

20 res = {} 

21 for f in files: 

22 spl = f.replace("\\", "/").split("/") 

23 if deep == 1: 

24 te = spl[-2] 

25 fi = spl[-1] 

26 if te not in res: 

27 res[te] = [] 

28 res[te].append(fi) 

29 elif deep == 2: 

30 te = spl[-3:-1] 

31 fi = spl[-1] 

32 if te not in res: 

33 res[te] = [] 

34 res[te].append(fi) 

35 else: 

36 raise RuntimeError("deep should be 1 or 2") 

37 return res 

38 

39 

40def normalize_name_and_numbers(files): 

41 """ 

42 tries to match names and number in a file 

43 

44 @param files list of files 

45 @return list of tuple (number, normalized name, extension, suggested name, original name) 

46 """ 

47 exp = re.compile( 

48 "([0-9a-z;() ]+([-][a-z ]+)?) ?[-] ?([0-9]{2,3})[ .v_CF[]") 

49 exp2 = re.compile("([0-9a-z;() ]+) episode ([0-9]{2,3})[ .v_CF[]") 

50 exp3 = re.compile("([a-z0-9 ]+[.][0-9]+) ?[-] ?([0-9]{2,3})[ .v_CF[]") 

51 res = [] 

52 for fi in files: 

53 name = fi.lower().replace("_", " ").replace("!", " ") 

54 ext = os.path.splitext(fi)[-1] 

55 

56 solution = None 

57 for ex, ind in [(exp, 2), (exp2, 1), (exp3, 1)]: 

58 num = ex.search(name) 

59 if num: 

60 grs = num.groups() 

61 nam = grs[0].strip() 

62 num = grs[ind] 

63 words = nam.split() 

64 for i in range(len(words)): 

65 words[i] = words[i][0].upper() + words[i][1:] 

66 nam = " ".join(words) 

67 sugg = f"{nam} - {num}{ext}" 

68 if solution is None or len(nam) > len(solution[1]): 

69 solution = (num, nam, ext, sugg, fi) 

70 if solution is not None: 

71 res.append(solution) 

72 

73 res.sort() 

74 return res 

75 

76 

77def normalize_folder(folder, fLOG=noLOG): 

78 """ 

79 normalize the filename of a whole folder and subfolders 

80 

81 @param folder folder 

82 @return list of tuple (number, normalized name, extension, suggested name, original name) 

83 """ 

84 alls = [] 

85 files = get_file_per_folder(folder) 

86 for d in sorted(files): 

87 norm = normalize_name_and_numbers(files[d]) 

88 for r in norm: 

89 if r[-2] != r[-1]: 

90 pat = os.path.join(folder, d, r[-1]) 

91 nee = os.path.join(folder, d, r[-2]) 

92 fLOG("rename", pat, " in ", nee) 

93 neelast = os.path.split(nee)[-1] 

94 if neelast[0] < 'A' or neelast[0] > 'Z': 

95 raise RuntimeError(f"Bad name for {neelast} ({nee}).") 

96 os.rename(pat, nee) 

97 alls.extend(norm) 

98 return alls 

99 

100 

101def music_statistics(folder): 

102 """ 

103 provides statistics on a folder 

104 

105 @param folder folder 

106 @return dictionary { "folder": { "last": ..., "missing": } } 

107 """ 

108 res = {} 

109 files = get_file_per_folder(folder) 

110 for d in sorted(files): 

111 norm = normalize_name_and_numbers(files[d]) 

112 for r in norm: 

113 if d not in res: 

114 res[d] = [] 

115 res[d].append(int(r[0])) 

116 

117 comp = {} 

118 for k, v in res.items(): 

119 mi, ma = min(v), max(v) 

120 ke = {_: 1 for _ in v} 

121 li = [0 for i in range(ma + 1)] 

122 for _ in ke: 

123 li[_] = 1 

124 missing = [i for i, _ in enumerate(li) if _ == 0 and i >= mi] 

125 comp[k] = {"min": mi, "max": ma, "missing": missing} 

126 return comp