Coverage for pyquickhelper/pycode/profiling.py: 88%
349 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1"""
2@file
3@brief Profiling helpers
4"""
5from collections import deque, OrderedDict
6from io import StringIO
7import json
8import math
9import os
10import site
11import cProfile
12from pstats import Stats
14try:
15 from pstarts import SortKey
16except ImportError: # pragma: no cover
17 # Python < 3.7
19 class SortKey:
20 LINE = 'line'
21 CUMULATIVE = 'cumulative'
22 TIME = 'time'
25class ProfileNode:
26 """
27 Graph structure to represent a profiling.
29 :param filename: filename
30 :param line: line number
31 :param func_name: function name
32 :param nc1: number of calls 1
33 :param nc2: number of calls 2
34 :param tin: time spent in the function
35 :param tout: time spent in the function and in the sub functions
36 """
38 def __init__(self, filename, line, func_name, nc1, nc2, tin, tall):
39 if "method 'disable' of '_lsprof.Profiler'" in func_name:
40 raise RuntimeError( # pragma: no cover
41 f"Function not allowed in the profiling: {func_name!r}.")
42 self.filename = filename
43 self.line = line
44 self.func_name = func_name
45 self.nc1 = nc1
46 self.nc2 = nc2
47 self.tin = tin
48 self.tall = tall
49 self.called_by = []
50 self.calls_to = []
51 self.calls_to_elements = []
53 def add_called_by(self, pnode):
54 "This function is called by these lines."
55 self.called_by.append(pnode)
57 def add_calls_to(self, pnode, time_elements):
58 "This function calls this node."
59 self.calls_to.append(pnode)
60 self.calls_to_elements.append(time_elements)
62 @staticmethod
63 def _key(filename, line, fct):
64 key = "%s:%d:%s" % (filename, line, fct)
65 return key
67 @property
68 def key(self):
69 "Returns `file:line`."
70 return ProfileNode._key(self.filename, self.line,
71 self.func_name)
73 def get_root(self):
74 "Returns the root of the graph."
75 done = set()
77 def _get_root(node, stor=None):
78 if stor is not None:
79 stor.append(node)
80 if len(node.called_by) == 0:
81 return node
82 if len(node.called_by) == 1:
83 return _get_root(node.called_by[0], stor=stor)
84 res = None
85 for ct in node.called_by:
86 k = id(node), id(ct)
87 if k in done:
88 continue
89 res = ct
90 break
91 if res is None:
92 # All paths have been explored and no entry point was found.
93 # Choosing the most consuming function.
94 return None
95 done.add((id(node), id(res)))
96 return _get_root(res, stor=stor)
98 root = _get_root(self)
99 if root is None:
100 candidates = []
101 _get_root(self, stor=candidates)
102 tall = [(n.tall, n) for n in candidates]
103 tall.sort()
104 root = tall[-1][-1]
105 return root
107 def __repr__(self):
108 "usual"
109 return "%s(%r, %r, %r, %r, %r, %r, %r) # %d-%d" % (
110 self.__class__.__name__,
111 self.filename, self.line, self.func_name,
112 self.nc1, self.nc2, self.tin, self.tall,
113 len(self.called_by), len(self.calls_to))
115 def __iter__(self):
116 "Returns all nodes in the graph."
117 done = set()
118 stack = deque()
119 stack.append(self)
120 while len(stack) > 0:
121 node = stack.popleft()
122 if node.key in done:
123 continue
124 yield node
125 done.add(node.key)
126 stack.extend(node.calls_to)
128 _modules_ = {
129 '~', 'subprocess.py', 'posixpath.py', 'os.py',
130 '<frozen importlib._bootstrap>', 'inspect.py',
131 'version.py', 'typing.py', 'warnings.py', 'errors.py',
132 'numbers.py', 'ast.py', 'threading.py', '_collections_abc.py',
133 'datetime.py', 'abc.py', 'argparse.py', '__future__.py',
134 'functools.py', 'six.py', 'sre_parse.py', 'contextlib.py',
135 ' _globals.py', '_ios.py', 'types.py'}
137 @staticmethod
138 def filter_node_(node, info=None):
139 """
140 Filters out node to be displayed by default.
142 :param node: node
143 :param info: if the node is called by a function,
144 this dictionary can be used to overwrite the attributes
145 held by the node
146 :return: boolean (True to keep, False to forget)
147 """
148 if node.filename in ProfileNode._modules_:
149 if info is None:
150 if (node.nc1 <= 10 and node.nc2 <= 10 and
151 node.tall <= 1e-4):
152 return False
153 else:
154 if (info['nc1'] <= 10 and info['nc2'] <= 10 and
155 info['tall'] <= 1e-4):
156 return False
158 return True
160 def as_dict(self, filter_node=None, sort_key=SortKey.LINE):
161 """
162 Renders the results of a profiling interpreted with
163 function @fn profile2graph. It can then be loaded with
164 a dataframe.
166 :param filter_node: display only the nodes for which
167 this function returns True, if None, the default function
168 removes built-in function with small impact
169 :param sort_key: sort sub nodes by...
170 :return: rows
171 """
172 def sort_key_line(dr):
173 if isinstance(dr, tuple):
174 return (dr[0].filename, dr[0].line)
175 return (dr.filename, dr.line)
177 def sort_key_tin(dr):
178 if isinstance(dr, tuple):
179 return -dr[1][2]
180 return -dr.tin
182 def sort_key_tall(dr):
183 if isinstance(dr, tuple):
184 return -dr[1][3]
185 return -dr.tall
187 if sort_key == SortKey.LINE:
188 sortk = sort_key_line
189 elif sort_key == SortKey.CUMULATIVE:
190 sortk = sort_key_tall
191 elif sort_key == SortKey.TIME:
192 sortk = sort_key_tin
193 else:
194 raise NotImplementedError(
195 f"Unable to sort subcalls with this key {sort_key!r}.")
197 def depth_first(node, roots_keys, indent=0):
198 text = {'fct': node.func_name, 'where': node.key,
199 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin,
200 'tall': node.tall, 'indent': indent,
201 'ncalls': len(node.calls_to), 'debug': 'A'}
202 yield text
203 for n, nel in sorted(zip(node.calls_to,
204 node.calls_to_elements),
205 key=sortk):
206 if n.key in roots_keys:
207 text = {'fct': n.func_name, 'where': n.key,
208 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2],
209 'tall': nel[3], 'indent': indent + 1,
210 'ncalls': len(n.calls_to), 'more': '+',
211 'debug': 'B'}
212 if (filter_node is not None and
213 not filter_node(n, info=text)):
214 continue
215 yield text
216 else:
217 if filter_node is not None and not filter_node(n):
218 continue
219 for t in depth_first(n, roots_keys, indent + 1):
220 yield t
222 if filter_node is None:
223 filter_node = ProfileNode.filter_node_
224 nodes = list(self)
225 roots = [node for node in nodes if len(node.called_by) != 1]
226 roots_key = {r.key: r for r in roots}
227 rows = []
228 for root in sorted(roots, key=sortk):
229 if filter_node is not None and not filter_node(root):
230 continue
231 rows.extend(depth_first(root, roots_key))
232 return rows
234 def to_text(self, filter_node=None, sort_key=SortKey.LINE,
235 fct_width=60):
236 """
237 Prints the profiling to text.
239 :param filter_node: display only the nodes for which
240 this function returns True, if None, the default function
241 removes built-in function with small impact
242 :param sort_key: sort sub nodes by...
243 :return: rows
244 """
245 def align_text(text, size):
246 if size <= 0:
247 return text
248 if len(text) <= size:
249 return text + " " * (size - len(text))
250 h = size // 2 - 1
251 return text[:h] + "..." + text[-h + 1:]
253 dicts = self.as_dict(filter_node=filter_node, sort_key=sort_key)
254 max_nc = max(max(_['nc1'] for _ in dicts),
255 max(_['nc2'] for _ in dicts))
256 dg = int(math.log(max_nc) / math.log(10) + 1.5)
257 line_format = ("{indent}{fct} -- {nc1: %dd} {nc2: %dd} -- {tin:1.5f} {tall:1.5f}"
258 " -- {name} ({fct2})" % (dg, dg))
259 text = []
260 for row in dicts:
261 line = line_format.format(
262 indent=" " * (row['indent'] * 4),
263 fct=align_text(row['fct'], fct_width - row['indent'] * 4),
264 nc1=row['nc1'], nc2=row['nc2'], tin=row['tin'],
265 tall=row['tall'], name=row['where'],
266 fct2=row['fct'])
267 if row.get('more', '') == '+':
268 line += " +++"
269 text.append(line)
270 return "\n".join(text)
272 def to_json(self, filter_node=None, sort_key=SortKey.LINE, as_str=True,
273 **kwargs):
274 """
275 Renders the results of a profiling interpreted with
276 function @fn profile2graph as :epkg:`JSON`.
278 :param filter_node: display only the nodes for which
279 this function returns True, if None, the default function
280 removes built-in function with small impact
281 :param sort_key: sort sub nodes by...
282 :param as_str: converts the json into a string
283 :param kwargs: see :func:`json.dumps`
284 :return: rows
286 See notebook :ref:`profilingrst` to see how to use
287 the json output.
288 """
289 def sort_key_line(dr):
290 if isinstance(dr, tuple):
291 return (dr[0].filename, dr[0].line)
292 return (dr.filename, dr.line)
294 def sort_key_tin(dr):
295 if isinstance(dr, tuple):
296 return -dr[1][2]
297 return -dr.tin
299 def sort_key_tall(dr):
300 if isinstance(dr, tuple):
301 return -dr[1][3]
302 return -dr.tall
304 if sort_key == SortKey.LINE:
305 sortk = sort_key_line
306 elif sort_key == SortKey.CUMULATIVE:
307 sortk = sort_key_tall
308 elif sort_key == SortKey.TIME:
309 sortk = sort_key_tin
310 else:
311 raise NotImplementedError(
312 f"Unable to sort subcalls with this key {sort_key!r}.")
314 def walk(node, roots_keys, indent=0):
315 item = {'details': {
316 'fct': node.func_name, 'where': node.key,
317 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin,
318 'tall': node.tall, 'indent': indent,
319 'ncalls': len(node.calls_to)}}
321 child = OrderedDict()
322 for n, nel in sorted(zip(node.calls_to,
323 node.calls_to_elements),
324 key=sortk):
325 key = (nel[0], f"{nel[3]:1.5f}:{n.func_name}")
326 if n.key in roots_keys:
327 details = {'fct': n.func_name, 'where': n.key,
328 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2],
329 'tall': nel[3], 'indent': indent,
330 'ncalls': len(node.calls_to)}
331 if (filter_node is not None and
332 not filter_node(n, info=details)):
333 continue
334 child[key] = {'details': details}
335 else:
336 if filter_node is not None and not filter_node(n):
337 continue
338 child[key] = walk(n, roots_key, indent + 1)
340 if len(child) > 0:
341 mx = max(_[0] for _ in child)
342 dg = int(math.log(mx) / math.log(10) + 1.5)
343 form = f"%-{dg}d-%s"
344 child = OrderedDict((form % k, v) for k, v in child.items())
345 item['calls'] = child
346 return item
348 if filter_node is None:
349 filter_node = ProfileNode.filter_node_
350 nodes = list(self)
351 roots = [node for node in nodes if len(node.called_by) != 1]
352 roots_key = {r.key: r for r in roots}
353 rows = OrderedDict()
354 for root in sorted(roots, key=sortk):
355 if filter_node is not None and not filter_node(root):
356 continue
357 key = (root.nc1, f"{root.tall:1.5f}:::{root.func_name}")
358 rows[key] = walk(root, roots_key)
359 mx = max(_[0] for _ in rows)
360 dg = int(math.log(mx) / math.log(10) + 1.5)
361 form = f"%-{dg}d-%s"
362 rows = OrderedDict((form % k, v) for k, v in rows.items())
363 if as_str:
364 return json.dumps({'profile': rows}, **kwargs)
365 return {'profile': rows}
368def _process_pstats(ps, clean_text=None, verbose=False, fLOG=None):
369 """
370 Converts class `Stats <https://docs.python.org/3/library/
371 profile.html#pstats.Stats>`_ into something
372 readable for a dataframe.
374 :param ps: instance of type :func:`pstats.Stats`
375 :param clean_text: function to clean function names
376 :param verbose: change verbosity
377 :param fLOG: logging function
378 :return: list of rows
379 """
380 if clean_text is None:
381 clean_text = lambda x: x
383 def add_rows(rows, d):
384 tt1, tt2 = 0, 0
385 for k, v in d.items():
386 stin = 0
387 stall = 0
388 if verbose and fLOG is not None:
389 fLOG("[pstats] %s=%r" % (
390 (clean_text(k[0].replace("\\", "/")), ) + k[1:],
391 v))
392 if len(v) < 5:
393 continue
394 row = {
395 'file': "%s:%d" % (clean_text(k[0].replace("\\", "/")), k[1]),
396 'fct': k[2],
397 'ncalls1': v[0],
398 'ncalls2': v[1],
399 'tin': v[2],
400 'tall': v[3]
401 }
402 stin += v[2]
403 stall += v[3]
404 if len(v) == 5:
405 t1, t2 = add_rows(rows, v[-1])
406 stin += t1
407 stall += t2
408 row['cum_tin'] = stin
409 row['cum_tall'] = stall
410 rows.append(row)
411 tt1 += stin
412 tt2 += stall
413 return tt1, tt2
415 rows = []
416 add_rows(rows, ps.stats)
417 return rows
420def profile2df(ps, as_df=True, clean_text=None, verbose=False, fLOG=None):
421 """
422 Converts profiling statistics into a Dataframe.
424 :param ps: an instance of `pstats
425 <https://docs.python.org/3/library/profile.html#pstats.Stats>`_
426 :param as_df: returns the results as a dataframe (True)
427 or a list of dictionaries (False)
428 :param clean_text: function to clean function names
429 :param verbose: verbosity
430 :param fLOG: logging function
431 :return: a DataFrame
433 ::
435 import pstats
436 from pyquickhelper.pycode.profiling import profile2df
438 ps = pstats.Stats('bench_ortmodule_nn_gpu6.prof')
439 df = profile2df(pd)
440 print(df)
441 """
442 rows = _process_pstats(ps, clean_text, verbose=verbose, fLOG=fLOG)
443 if not as_df:
444 return rows
446 import pandas
447 df = pandas.DataFrame(rows)
448 df = df[['fct', 'file', 'ncalls1', 'ncalls2', 'tin', 'cum_tin',
449 'tall', 'cum_tall']]
450 df = df.groupby(['fct', 'file'], as_index=False).sum().sort_values(
451 'cum_tall', ascending=False).reset_index(drop=True)
452 return df.copy()
455def profile(fct, sort='cumulative', rootrem=None, as_df=False,
456 pyinst_format=None, return_results=False, **kwargs):
457 """
458 Profiles the execution of a function.
460 :param fct: function to profile
461 :param sort: see `sort_stats <https://docs.python.org/3/library/
462 profile.html#pstats.Stats.sort_stats>`_
463 :param rootrem: root to remove in filenames
464 :param as_df: return the results as a dataframe and not text
465 :param pyinst_format: format for :epkg:`pyinstrument`, if not empty,
466 the function uses this module or raises an exception if not
467 installed, the options are *text*, *textu* (text with colors),
468 *json*, *html*
469 :param return_results: if True, return results as well
470 (in the first position)
471 :param kwargs: additional parameters used to create the profiler
472 :return: raw results, statistics text dump (or dataframe is *as_df* is True)
474 .. plot::
476 import matplotlib.pyplot as plt
477 from pyquickhelper.pycode.profiling import profile
478 from pyquickhelper.texthelper import compare_module_version
480 def fctm():
481 return compare_module_version('0.20.4', '0.22.dev0')
483 pr, df = profile(lambda: [fctm() for i in range(0, 1000)], as_df=True)
484 ax = df[['namefct', 'cum_tall']].head(n=15).set_index(
485 'namefct').plot(kind='bar', figsize=(8, 3), rot=30)
486 ax.set_title("example of a graph")
487 for la in ax.get_xticklabels():
488 la.set_horizontalalignment('right');
489 plt.show()
490 """
491 if pyinst_format is None:
492 pr = cProfile.Profile(**kwargs)
493 pr.enable()
494 fct_res = fct()
495 pr.disable()
496 s = StringIO()
497 ps = Stats(pr, stream=s).sort_stats(sort)
498 ps.print_stats()
499 res = s.getvalue()
500 try:
501 pack = site.getsitepackages()
502 except AttributeError: # pragma: no cover
503 import numpy
504 pack = os.path.normpath(os.path.abspath(
505 os.path.join(os.path.dirname(numpy.__file__), "..")))
506 pack = [pack]
507 pack_ = os.path.normpath(os.path.join(pack[-1], '..'))
509 def clean_text(res):
510 res = res.replace(pack[-1], "site-packages")
511 res = res.replace(pack_, "lib")
512 if rootrem is not None:
513 if isinstance(rootrem, str):
514 res = res.replace(rootrem, '')
515 else:
516 for sub in rootrem:
517 if isinstance(sub, str):
518 res = res.replace(sub, '')
519 elif isinstance(sub, tuple) and len(sub) == 2:
520 res = res.replace(sub[0], sub[1])
521 else:
522 raise TypeError( # pragma: no cover
523 "rootrem must contains strings or tuple not {0}"
524 ".".format(rootrem))
525 return res
527 if as_df:
529 def better_name(row):
530 if len(row['fct']) > 15:
531 return f"{row['file'].split(':')[-1]}-{row['fct']}"
532 name = row['file'].replace("\\", "/")
533 return f"{name.split('/')[-1]}-{row['fct']}"
535 rows = _process_pstats(ps, clean_text)
536 import pandas
537 df = pandas.DataFrame(rows)
538 df = df[['fct', 'file', 'ncalls1', 'ncalls2', 'tin', 'cum_tin',
539 'tall', 'cum_tall']]
540 df['namefct'] = df.apply(lambda row: better_name(row), axis=1)
541 df = df.groupby(['namefct', 'file'], as_index=False).sum().sort_values(
542 'cum_tall', ascending=False).reset_index(drop=True)
543 if return_results:
544 return fct_res, ps, df
545 return ps, df
546 else:
547 res = clean_text(res)
548 if return_results:
549 return fct_res, ps, res
550 return ps, res
551 if as_df:
552 raise ValueError( # pragma: no cover
553 "as_df is not a compatible option with pyinst_format.")
555 try:
556 from pyinstrument import Profiler
557 except ImportError as e: # pragma: no cover
558 raise ImportError("pyinstrument is not installed.") from e
560 profiler = Profiler(**kwargs)
561 profiler.start()
562 fct_res = fct()
563 profiler.stop()
565 if pyinst_format == "text":
566 if return_results:
567 return fct_res, profiler, profiler.output_text(
568 unicode=False, color=False)
569 return profiler, profiler.output_text(unicode=False, color=False)
570 if pyinst_format == "textu":
571 if return_results:
572 return fct_res, profiler, profiler.output_text(
573 unicode=True, color=True)
574 return profiler, profiler.output_text(unicode=True, color=True)
575 if pyinst_format == "json":
576 from pyinstrument.renderers import JSONRenderer
577 if return_results:
578 return fct_res, profiler, profiler.output(
579 JSONRenderer())
580 return profiler, profiler.output(JSONRenderer())
581 if pyinst_format == "html":
582 if return_results:
583 return fct_res, profiler, profiler.output_html()
584 return profiler, profiler.output_html()
585 raise ValueError(f"Unknown format '{pyinst_format}'.")
588def profile2graph(ps, clean_text=None, verbose=False, fLOG=None):
589 """
590 Converts profiling statistics into a graphs.
592 :param ps: an instance of `pstats
593 <https://docs.python.org/3/library/profile.html#pstats.Stats>`_
594 :param clean_text: function to clean function names
595 :param verbose: verbosity
596 :param fLOG: logging function
597 :return: an instance of class @see cl ProfileNode
599 .. exref::
600 :title: Hierarchical display for a profiling
602 :epkg:`pyinstrument` has a nice display to show
603 time spent and call stack at the same time. This function
604 tries to replicate that display based on the results produced
605 by module :mod:`cProfile`. Here is an example.
607 .. runpython::
608 :showcode:
610 import time
611 from pyquickhelper.pycode.profiling import profile, profile2graph
614 def fct0(t):
615 time.sleep(t)
618 def fct1(t):
619 time.sleep(t)
622 def fct2():
623 fct1(0.1)
624 fct1(0.01)
627 def fct3():
628 fct0(0.2)
629 fct1(0.5)
632 def fct4():
633 fct2()
634 fct3()
637 ps = profile(fct4)[0]
638 root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1])
639 text = root.to_text()
640 print(text)
641 """
642 if clean_text is None:
643 clean_text = lambda x: x
645 nodes = {}
646 for k, v in ps.stats.items():
647 if verbose and fLOG is not None:
648 fLOG(f"[pstats] {k}={v!r}")
649 if len(v) < 5:
650 continue
651 if k[0] == '~' and len(v) == 0:
652 # raw function never called by another
653 continue
654 if "method 'disable' of '_lsprof.Profiler'" in k[2]:
655 continue
656 node = ProfileNode(
657 filename=clean_text(k[0].replace("\\", "/")),
658 line=k[1], func_name=k[2],
659 nc1=v[0], nc2=v[1], tin=v[2], tall=v[3])
660 if node.key in nodes:
661 raise RuntimeError( # pragma: no cover
662 f"Key {node.key!r} is already present, node={node!r}.")
663 nodes[node.key] = node
665 for k, v in ps.stats.items():
666 if "method 'disable' of '_lsprof.Profiler'" in k[2]:
667 continue
668 filename = clean_text(k[0].replace("\\", "/"))
669 ks = ProfileNode._key(filename, k[1], k[2])
670 node = nodes[ks]
671 sublist = v[4]
672 for f, vv in sublist.items():
673 if "method 'disable' of '_lsprof.Profiler'" in f[2]:
674 continue
675 name = clean_text(f[0].replace("\\", "/"))
676 key = ProfileNode._key(name, f[1], f[2])
677 if key not in nodes:
678 raise RuntimeError( # pragma: no cover
679 "Unable to find key %r into\n%s" % (
680 key, "\n".join(sorted(nodes))))
681 if k[0] == '~' and len(v) == 0:
682 continue
683 child = nodes[key]
684 node.add_called_by(child)
685 child.add_calls_to(node, vv)
687 for k, v in nodes.items():
688 root = v.get_root()
689 break
691 return root, nodes