Coverage for pyquickhelper/texthelper/code_helper.py: 92%
186 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Some functions about diacritics
5"""
6import importlib
7import inspect
8import keyword
9import os
10import re
11from textwrap import dedent
12import warnings
15def change_style(name):
16 """
17 Switches from *AaBb* into *aa_bb*.
19 @param name name to convert
20 @return converted name
22 Example:
24 .. runpython::
25 :showcode:
27 from pyquickhelper.texthelper import change_style
29 print("changeStyle --> {0}".format(change_style('change_style')))
30 """
31 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
32 s2 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
33 return s2 if not keyword.iskeyword(s2) else s2 + "_"
36def add_rst_links(text, values, tag="epkg", n=4):
37 """
38 Replaces words by something like ``:epkg:'word'``.
40 @param text text to process
41 @param values values
42 @param tag tag to use
43 @param n number of consecutive words to look at
44 @return new text
46 .. runpython::
47 :showcode:
49 from pyquickhelper.texthelper import add_rst_links
50 text = "Maybe... Python is winning the competition for machine learning language."
51 values = {'Python': 'https://www.python.org/',
52 'machine learning': 'https://en.wikipedia.org/wiki/Machine_learning'}
53 print(add_rst_links(text, values))
54 """
55 def replace(words, i, n):
56 mx = max(len(words), i + n)
57 for last in range(mx, i, -1):
58 w = ''.join(words[i:last])
59 if w in values:
60 return last, f":{tag}:`{w}`"
61 return i + 1, words[i]
63 reg = re.compile("(([\\\"_*`\\w']+)|([\\W]+)|([ \\n]+))")
64 words = reg.findall(text)
65 words = [_[0] for _ in words]
66 res = []
67 i = 0
68 while i < len(words):
69 i, w = replace(words, i, n)
70 res.append(w)
71 return ''.join(res)
74def _measure_documentation_append(counts, kind, doc, code):
75 if kind not in counts:
76 counts[kind] = {
77 ("raw_length", "doc"): 0,
78 ("raw_length", "code"): 0,
79 ("length", "doc"): 0,
80 ("length", "code"): 0,
81 ("line", "doc"): 0,
82 ("line", "code"): 0,
83 }
84 c = counts[kind]
85 doc = "" if doc is None else dedent(doc)
86 code = "" if code is None else dedent(code)
88 c["raw_length", "doc"] += len(doc)
89 c["raw_length", "code"] += len(code)
91 c["length", "doc"] += len(doc.replace(" ", "").replace("\n", ""))
92 c["length", "code"] += len(code.replace(" ", "").replace("\n", ""))
94 c["line", "doc"] += 0 if len(doc) == 0 else len(doc.split("\n"))
95 c["line", "code"] += 0 if len(code) == 0 else len(code.split("\n"))
98def _measure_documentation_update(counts, c):
99 for key in c:
100 if key not in counts:
101 counts[key] = c[key]
102 else:
103 for k, v in c[key].items():
104 counts[key][k] += v
107def _measure_documentation_ratio(counts):
108 for _, d in counts.items():
109 up = {}
110 for k in d:
111 if k[1] == "code":
112 up[k[0], "ratio"] = (
113 d[k[0], "doc"] / max(d[k] + d[k[0], "doc"], 1))
114 d.update(up)
117def _dictionary_to_dataframe(doc, cols=None):
118 data = []
119 for k, v in doc.items():
120 if isinstance(k, str):
121 ks = [k]
122 else:
123 ks = list(k)
124 if isinstance(v, (float, int)):
125 obs = (cols or []) + ks + [v]
126 data.append(obs)
127 else:
128 lines = _dictionary_to_dataframe(v, (cols or []) + ks)
129 data.extend(lines)
130 if cols is None:
131 import pandas
132 df = pandas.DataFrame(data)
133 if df.shape[1] == 4:
134 df.columns = ['kind', 'stat', 'doc_code', 'value']
135 return df
136 return data
139def measure_documentation(mod, ratio=False, include_hidden=False, f_kind=None, as_df=False):
140 """
141 Measures the fact a module is documented.
143 :param mod: module
144 :param ratio: compute ratios
145 :param include_hidden: includes hidden function (starting with `"_"`)
146 :param f_kind: function `f(obj: python_object) -> str` which returns
147 the fist key the result must be indexed by, the function cannot
148 returns `'function'` or `'class'`
149 :param as_df: return the result as a dataframe
150 :return: dictionary
152 .. runpython::
153 :showcode:
155 import pprint
156 from pyquickhelper.texthelper import code_helper
157 from pyquickhelper.texthelper.code_helper import measure_documentation
158 pprint.pprint(measure_documentation(code_helper))
159 """
160 counts = {}
161 code_mod = None
162 if inspect.ismodule(mod):
163 code_mod = mod.__name__
164 if inspect.isclass(mod):
165 doc = mod.__doc__
166 if hasattr(mod, "__init__"):
167 try:
168 code = inspect.getsource(mod.__init__)
169 except TypeError:
170 code = ""
171 else:
172 code = ""
173 _measure_documentation_append(counts, "class", doc, code)
174 if f_kind is not None:
175 kind = f_kind(mod)
176 doc = mod.__doc__
177 if hasattr(mod, "__init__"):
178 try:
179 code = inspect.getsource(mod.__init__)
180 except TypeError:
181 code = ""
182 else:
183 code = ""
184 _measure_documentation_append(counts, kind, doc, code)
185 names = dir(mod)
186 for name in names:
187 if name[0] == "_" and not include_hidden:
188 continue
189 obj = getattr(mod, name)
190 if inspect.ismethod(obj):
191 doc = obj.__doc__
192 code = inspect.getsource(obj)
193 _measure_documentation_append(counts, "class", doc, code)
194 if f_kind is not None:
195 kind = f_kind(obj)
196 _measure_documentation_append(counts, kind, doc, code)
197 elif inspect.isfunction(obj):
198 if obj.__module__ != code_mod:
199 continue
200 doc = obj.__doc__
201 kind = "function"
202 try:
203 code = inspect.getsource(obj)
204 except TypeError:
205 kind = "function_c"
206 code = ""
207 _measure_documentation_append(counts, kind, doc, code)
208 if f_kind is not None:
209 kind = f_kind(obj)
210 _measure_documentation_append(counts, kind, doc, code)
211 elif inspect.isclass(obj):
212 if obj.__module__ != code_mod:
213 continue
214 c = measure_documentation(obj, include_hidden=include_hidden)
215 _measure_documentation_update(counts, c)
216 if ratio:
217 _measure_documentation_ratio(counts)
218 if as_df:
219 return _dictionary_to_dataframe(counts)
220 return counts
223def measure_documentation_module(mod, ratio=False, include_hidden=False, f_kind=None, silent=True, as_df=False):
224 """
225 Measures the fact a module is documented.
227 :param mod: module or a list of modules, in case of a list
228 of modules, a dictionary is returned per module
229 :param ratio: compute ratios
230 :param include_hidden: includes hidden function (starting with `"_"`)
231 :param f_kind: function `f(obj: python_object) -> str` which returns
232 the fist key the result must be indexed by, the function cannot
233 returns `'function'` or `'class'`
234 :param silent: continue even if the import of a module failed
235 :param as_df: return the result as a dataframe
236 :return: dictionary
238 .. runpython::
239 :showcode:
241 import pprint
242 import pyquickhelper
243 from pyquickhelper.texthelper.code_helper import measure_documentation_module
244 pprint.pprint(measure_documentation_module(pyquickhelper))
245 """
246 with warnings.catch_warnings():
247 warnings.simplefilter("ignore")
248 if isinstance(mod, list):
249 counts = {}
250 for m in mod:
251 c = measure_documentation_module(
252 m, ratio=ratio, include_hidden=include_hidden, f_kind=f_kind, silent=silent)
253 counts[m.__name__] = c
254 if as_df:
255 df = _dictionary_to_dataframe(counts)
256 df.columns = ['module', 'kind', 'stat', 'doc_code', 'value']
257 return df
258 return counts
259 counts = measure_documentation(
260 mod, include_hidden=include_hidden, f_kind=f_kind)
261 path = os.path.dirname(mod.__file__)
262 for sub in os.listdir(path):
263 if sub in {'.'}:
264 continue
265 name, ext = os.path.splitext(sub)
266 if ext:
267 if hasattr(mod, sub):
268 c = measure_documentation(
269 getattr(mod, sub), include_hidden=include_hidden, f_kind=f_kind)
270 _measure_documentation_update(counts, c)
271 else:
272 full_name = f"{mod.__name__}.{name}"
273 try:
274 sub_mod = importlib.import_module(full_name)
275 except ImportError as e:
276 if silent:
277 continue
278 raise ImportError(
279 f"Unable to import {full_name!r}.") from e
280 c = measure_documentation(
281 sub_mod, include_hidden=include_hidden, f_kind=f_kind)
282 _measure_documentation_update(counts, c)
283 continue
285 init = os.path.join(path, sub, "__init__.py")
286 if not os.path.exists(init):
287 continue
288 if hasattr(mod, sub):
289 c = measure_documentation(
290 getattr(mod, sub), include_hidden=include_hidden, f_kind=f_kind)
291 _measure_documentation_update(counts, c)
292 else:
293 full_name = f"{mod.__name__}.{name}"
294 try:
295 sub_mod = importlib.import_module(full_name)
296 except ImportError as e:
297 if silent:
298 continue
299 raise ImportError(
300 f"Unable to import {full_name!r}.") from e
301 c = measure_documentation(
302 sub_mod, include_hidden=include_hidden, f_kind=f_kind)
303 _measure_documentation_update(counts, c)
305 if ratio:
306 _measure_documentation_ratio(counts)
307 if as_df:
308 df = _dictionary_to_dataframe(counts)
309 return df
310 return counts