Coverage for pyquickhelper/texthelper/code_helper.py: 92%

186 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Some functions about diacritics 

5""" 

6import importlib 

7import inspect 

8import keyword 

9import os 

10import re 

11from textwrap import dedent 

12import warnings 

13 

14 

15def change_style(name): 

16 """ 

17 Switches from *AaBb* into *aa_bb*. 

18 

19 @param name name to convert 

20 @return converted name 

21 

22 Example: 

23 

24 .. runpython:: 

25 :showcode: 

26 

27 from pyquickhelper.texthelper import change_style 

28 

29 print("changeStyle --> {0}".format(change_style('change_style'))) 

30 """ 

31 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) 

32 s2 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() 

33 return s2 if not keyword.iskeyword(s2) else s2 + "_" 

34 

35 

36def add_rst_links(text, values, tag="epkg", n=4): 

37 """ 

38 Replaces words by something like ``:epkg:'word'``. 

39 

40 @param text text to process 

41 @param values values 

42 @param tag tag to use 

43 @param n number of consecutive words to look at 

44 @return new text 

45 

46 .. runpython:: 

47 :showcode: 

48 

49 from pyquickhelper.texthelper import add_rst_links 

50 text = "Maybe... Python is winning the competition for machine learning language." 

51 values = {'Python': 'https://www.python.org/', 

52 'machine learning': 'https://en.wikipedia.org/wiki/Machine_learning'} 

53 print(add_rst_links(text, values)) 

54 """ 

55 def replace(words, i, n): 

56 mx = max(len(words), i + n) 

57 for last in range(mx, i, -1): 

58 w = ''.join(words[i:last]) 

59 if w in values: 

60 return last, f":{tag}:`{w}`" 

61 return i + 1, words[i] 

62 

63 reg = re.compile("(([\\\"_*`\\w']+)|([\\W]+)|([ \\n]+))") 

64 words = reg.findall(text) 

65 words = [_[0] for _ in words] 

66 res = [] 

67 i = 0 

68 while i < len(words): 

69 i, w = replace(words, i, n) 

70 res.append(w) 

71 return ''.join(res) 

72 

73 

74def _measure_documentation_append(counts, kind, doc, code): 

75 if kind not in counts: 

76 counts[kind] = { 

77 ("raw_length", "doc"): 0, 

78 ("raw_length", "code"): 0, 

79 ("length", "doc"): 0, 

80 ("length", "code"): 0, 

81 ("line", "doc"): 0, 

82 ("line", "code"): 0, 

83 } 

84 c = counts[kind] 

85 doc = "" if doc is None else dedent(doc) 

86 code = "" if code is None else dedent(code) 

87 

88 c["raw_length", "doc"] += len(doc) 

89 c["raw_length", "code"] += len(code) 

90 

91 c["length", "doc"] += len(doc.replace(" ", "").replace("\n", "")) 

92 c["length", "code"] += len(code.replace(" ", "").replace("\n", "")) 

93 

94 c["line", "doc"] += 0 if len(doc) == 0 else len(doc.split("\n")) 

95 c["line", "code"] += 0 if len(code) == 0 else len(code.split("\n")) 

96 

97 

98def _measure_documentation_update(counts, c): 

99 for key in c: 

100 if key not in counts: 

101 counts[key] = c[key] 

102 else: 

103 for k, v in c[key].items(): 

104 counts[key][k] += v 

105 

106 

107def _measure_documentation_ratio(counts): 

108 for _, d in counts.items(): 

109 up = {} 

110 for k in d: 

111 if k[1] == "code": 

112 up[k[0], "ratio"] = ( 

113 d[k[0], "doc"] / max(d[k] + d[k[0], "doc"], 1)) 

114 d.update(up) 

115 

116 

117def _dictionary_to_dataframe(doc, cols=None): 

118 data = [] 

119 for k, v in doc.items(): 

120 if isinstance(k, str): 

121 ks = [k] 

122 else: 

123 ks = list(k) 

124 if isinstance(v, (float, int)): 

125 obs = (cols or []) + ks + [v] 

126 data.append(obs) 

127 else: 

128 lines = _dictionary_to_dataframe(v, (cols or []) + ks) 

129 data.extend(lines) 

130 if cols is None: 

131 import pandas 

132 df = pandas.DataFrame(data) 

133 if df.shape[1] == 4: 

134 df.columns = ['kind', 'stat', 'doc_code', 'value'] 

135 return df 

136 return data 

137 

138 

139def measure_documentation(mod, ratio=False, include_hidden=False, f_kind=None, as_df=False): 

140 """ 

141 Measures the fact a module is documented. 

142 

143 :param mod: module 

144 :param ratio: compute ratios 

145 :param include_hidden: includes hidden function (starting with `"_"`) 

146 :param f_kind: function `f(obj: python_object) -> str` which returns 

147 the fist key the result must be indexed by, the function cannot 

148 returns `'function'` or `'class'` 

149 :param as_df: return the result as a dataframe 

150 :return: dictionary 

151 

152 .. runpython:: 

153 :showcode: 

154 

155 import pprint 

156 from pyquickhelper.texthelper import code_helper 

157 from pyquickhelper.texthelper.code_helper import measure_documentation 

158 pprint.pprint(measure_documentation(code_helper)) 

159 """ 

160 counts = {} 

161 code_mod = None 

162 if inspect.ismodule(mod): 

163 code_mod = mod.__name__ 

164 if inspect.isclass(mod): 

165 doc = mod.__doc__ 

166 if hasattr(mod, "__init__"): 

167 try: 

168 code = inspect.getsource(mod.__init__) 

169 except TypeError: 

170 code = "" 

171 else: 

172 code = "" 

173 _measure_documentation_append(counts, "class", doc, code) 

174 if f_kind is not None: 

175 kind = f_kind(mod) 

176 doc = mod.__doc__ 

177 if hasattr(mod, "__init__"): 

178 try: 

179 code = inspect.getsource(mod.__init__) 

180 except TypeError: 

181 code = "" 

182 else: 

183 code = "" 

184 _measure_documentation_append(counts, kind, doc, code) 

185 names = dir(mod) 

186 for name in names: 

187 if name[0] == "_" and not include_hidden: 

188 continue 

189 obj = getattr(mod, name) 

190 if inspect.ismethod(obj): 

191 doc = obj.__doc__ 

192 code = inspect.getsource(obj) 

193 _measure_documentation_append(counts, "class", doc, code) 

194 if f_kind is not None: 

195 kind = f_kind(obj) 

196 _measure_documentation_append(counts, kind, doc, code) 

197 elif inspect.isfunction(obj): 

198 if obj.__module__ != code_mod: 

199 continue 

200 doc = obj.__doc__ 

201 kind = "function" 

202 try: 

203 code = inspect.getsource(obj) 

204 except TypeError: 

205 kind = "function_c" 

206 code = "" 

207 _measure_documentation_append(counts, kind, doc, code) 

208 if f_kind is not None: 

209 kind = f_kind(obj) 

210 _measure_documentation_append(counts, kind, doc, code) 

211 elif inspect.isclass(obj): 

212 if obj.__module__ != code_mod: 

213 continue 

214 c = measure_documentation(obj, include_hidden=include_hidden) 

215 _measure_documentation_update(counts, c) 

216 if ratio: 

217 _measure_documentation_ratio(counts) 

218 if as_df: 

219 return _dictionary_to_dataframe(counts) 

220 return counts 

221 

222 

223def measure_documentation_module(mod, ratio=False, include_hidden=False, f_kind=None, silent=True, as_df=False): 

224 """ 

225 Measures the fact a module is documented. 

226 

227 :param mod: module or a list of modules, in case of a list 

228 of modules, a dictionary is returned per module 

229 :param ratio: compute ratios 

230 :param include_hidden: includes hidden function (starting with `"_"`) 

231 :param f_kind: function `f(obj: python_object) -> str` which returns 

232 the fist key the result must be indexed by, the function cannot 

233 returns `'function'` or `'class'` 

234 :param silent: continue even if the import of a module failed 

235 :param as_df: return the result as a dataframe 

236 :return: dictionary 

237 

238 .. runpython:: 

239 :showcode: 

240 

241 import pprint 

242 import pyquickhelper 

243 from pyquickhelper.texthelper.code_helper import measure_documentation_module 

244 pprint.pprint(measure_documentation_module(pyquickhelper)) 

245 """ 

246 with warnings.catch_warnings(): 

247 warnings.simplefilter("ignore") 

248 if isinstance(mod, list): 

249 counts = {} 

250 for m in mod: 

251 c = measure_documentation_module( 

252 m, ratio=ratio, include_hidden=include_hidden, f_kind=f_kind, silent=silent) 

253 counts[m.__name__] = c 

254 if as_df: 

255 df = _dictionary_to_dataframe(counts) 

256 df.columns = ['module', 'kind', 'stat', 'doc_code', 'value'] 

257 return df 

258 return counts 

259 counts = measure_documentation( 

260 mod, include_hidden=include_hidden, f_kind=f_kind) 

261 path = os.path.dirname(mod.__file__) 

262 for sub in os.listdir(path): 

263 if sub in {'.'}: 

264 continue 

265 name, ext = os.path.splitext(sub) 

266 if ext: 

267 if hasattr(mod, sub): 

268 c = measure_documentation( 

269 getattr(mod, sub), include_hidden=include_hidden, f_kind=f_kind) 

270 _measure_documentation_update(counts, c) 

271 else: 

272 full_name = f"{mod.__name__}.{name}" 

273 try: 

274 sub_mod = importlib.import_module(full_name) 

275 except ImportError as e: 

276 if silent: 

277 continue 

278 raise ImportError( 

279 f"Unable to import {full_name!r}.") from e 

280 c = measure_documentation( 

281 sub_mod, include_hidden=include_hidden, f_kind=f_kind) 

282 _measure_documentation_update(counts, c) 

283 continue 

284 

285 init = os.path.join(path, sub, "__init__.py") 

286 if not os.path.exists(init): 

287 continue 

288 if hasattr(mod, sub): 

289 c = measure_documentation( 

290 getattr(mod, sub), include_hidden=include_hidden, f_kind=f_kind) 

291 _measure_documentation_update(counts, c) 

292 else: 

293 full_name = f"{mod.__name__}.{name}" 

294 try: 

295 sub_mod = importlib.import_module(full_name) 

296 except ImportError as e: 

297 if silent: 

298 continue 

299 raise ImportError( 

300 f"Unable to import {full_name!r}.") from e 

301 c = measure_documentation( 

302 sub_mod, include_hidden=include_hidden, f_kind=f_kind) 

303 _measure_documentation_update(counts, c) 

304 

305 if ratio: 

306 _measure_documentation_ratio(counts) 

307 if as_df: 

308 df = _dictionary_to_dataframe(counts) 

309 return df 

310 return counts