Coverage for src/ensae_teaching_cs/homeblog/filename_helper.py: 74%
84 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Helpers around file names.
4"""
5import os
6import re
7from pyquickhelper.loghelper import noLOG
8from pyquickhelper.filehelper import explore_folder
11def get_file_per_folder(folder, deep=1):
12 """
13 extract all folders in a folder and then all files in these folders
15 @param folder folder
16 @param deep number of folders to considers before the filename
17 @return dictionary
18 """
19 files = explore_folder(folder)[1]
20 res = {}
21 for f in files:
22 spl = f.replace("\\", "/").split("/")
23 if deep == 1:
24 te = spl[-2]
25 fi = spl[-1]
26 if te not in res:
27 res[te] = []
28 res[te].append(fi)
29 elif deep == 2:
30 te = spl[-3:-1]
31 fi = spl[-1]
32 if te not in res:
33 res[te] = []
34 res[te].append(fi)
35 else:
36 raise RuntimeError("deep should be 1 or 2")
37 return res
40def normalize_name_and_numbers(files):
41 """
42 tries to match names and number in a file
44 @param files list of files
45 @return list of tuple (number, normalized name, extension, suggested name, original name)
46 """
47 exp = re.compile(
48 "([0-9a-z;() ]+([-][a-z ]+)?) ?[-] ?([0-9]{2,3})[ .v_CF[]")
49 exp2 = re.compile("([0-9a-z;() ]+) episode ([0-9]{2,3})[ .v_CF[]")
50 exp3 = re.compile("([a-z0-9 ]+[.][0-9]+) ?[-] ?([0-9]{2,3})[ .v_CF[]")
51 res = []
52 for fi in files:
53 name = fi.lower().replace("_", " ").replace("!", " ")
54 ext = os.path.splitext(fi)[-1]
56 solution = None
57 for ex, ind in [(exp, 2), (exp2, 1), (exp3, 1)]:
58 num = ex.search(name)
59 if num:
60 grs = num.groups()
61 nam = grs[0].strip()
62 num = grs[ind]
63 words = nam.split()
64 for i in range(len(words)):
65 words[i] = words[i][0].upper() + words[i][1:]
66 nam = " ".join(words)
67 sugg = f"{nam} - {num}{ext}"
68 if solution is None or len(nam) > len(solution[1]):
69 solution = (num, nam, ext, sugg, fi)
70 if solution is not None:
71 res.append(solution)
73 res.sort()
74 return res
77def normalize_folder(folder, fLOG=noLOG):
78 """
79 normalize the filename of a whole folder and subfolders
81 @param folder folder
82 @return list of tuple (number, normalized name, extension, suggested name, original name)
83 """
84 alls = []
85 files = get_file_per_folder(folder)
86 for d in sorted(files):
87 norm = normalize_name_and_numbers(files[d])
88 for r in norm:
89 if r[-2] != r[-1]:
90 pat = os.path.join(folder, d, r[-1])
91 nee = os.path.join(folder, d, r[-2])
92 fLOG("rename", pat, " in ", nee)
93 neelast = os.path.split(nee)[-1]
94 if neelast[0] < 'A' or neelast[0] > 'Z':
95 raise RuntimeError(f"Bad name for {neelast} ({nee}).")
96 os.rename(pat, nee)
97 alls.extend(norm)
98 return alls
101def music_statistics(folder):
102 """
103 provides statistics on a folder
105 @param folder folder
106 @return dictionary { "folder": { "last": ..., "missing": } }
107 """
108 res = {}
109 files = get_file_per_folder(folder)
110 for d in sorted(files):
111 norm = normalize_name_and_numbers(files[d])
112 for r in norm:
113 if d not in res:
114 res[d] = []
115 res[d].append(int(r[0]))
117 comp = {}
118 for k, v in res.items():
119 mi, ma = min(v), max(v)
120 ke = {_: 1 for _ in v}
121 li = [0 for i in range(ma + 1)]
122 for _ in ke:
123 li[_] = 1
124 missing = [i for i, _ in enumerate(li) if _ == 0 and i >= mi]
125 comp[k] = {"min": mi, "max": ma, "missing": missing}
126 return comp