Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Profiling helpers
4"""
5from collections import deque, OrderedDict
6from io import StringIO
7import json
8import math
9import os
10import site
11import cProfile
12from pstats import Stats
14try:
15 from pstarts import SortKey
16except ImportError: # pragma: no cover
17 # Python < 3.7
19 class SortKey:
20 LINE = 'line'
21 CUMULATIVE = 'cumulative'
22 TIME = 'time'
25class ProfileNode:
26 """
27 Graph structure to represent a profiling.
29 :param filename: filename
30 :param line: line number
31 :param func_name: function name
32 :param nc1: number of calls 1
33 :param nc2: number of calls 2
34 :param tin: time spent in the function
35 :param tout: time spent in the function and in the sub functions
37 .. versionadded:: 1.11
38 """
40 def __init__(self, filename, line, func_name, nc1, nc2, tin, tall):
41 if "method 'disable' of '_lsprof.Profiler'" in func_name:
42 raise RuntimeError(
43 "Function not allowed in the profiling: %r." % func_name)
44 self.filename = filename
45 self.line = line
46 self.func_name = func_name
47 self.nc1 = nc1
48 self.nc2 = nc2
49 self.tin = tin
50 self.tall = tall
51 self.called_by = []
52 self.calls_to = []
53 self.calls_to_elements = []
55 def add_called_by(self, pnode):
56 "This function is called by these lines."
57 self.called_by.append(pnode)
59 def add_calls_to(self, pnode, time_elements):
60 "This function calls this node."
61 self.calls_to.append(pnode)
62 self.calls_to_elements.append(time_elements)
64 @staticmethod
65 def _key(filename, line, fct):
66 key = "%s:%d:%s" % (filename, line, fct)
67 return key
69 @property
70 def key(self):
71 "Returns `file:line`."
72 return ProfileNode._key(self.filename, self.line,
73 self.func_name)
75 def get_root(self):
76 "Returns the root of the graph."
77 done = set()
79 def _get_root(node, stor=None):
80 if stor is not None:
81 stor.append(node)
82 if len(node.called_by) == 0:
83 return node
84 if len(node.called_by) == 1:
85 return _get_root(node.called_by[0], stor=stor)
86 res = None
87 for ct in node.called_by:
88 k = id(node), id(ct)
89 if k in done:
90 continue
91 res = ct
92 break
93 if res is None:
94 # All paths have been explored and no entry point was found.
95 # Choosing the most consuming function.
96 return None
97 done.add((id(node), id(res)))
98 return _get_root(res, stor=stor)
100 root = _get_root(self)
101 if root is None:
102 candidates = []
103 _get_root(self, stor=candidates)
104 tall = [(n.tall, n) for n in candidates]
105 tall.sort()
106 root = tall[-1][-1]
107 return root
109 def __repr__(self):
110 "usual"
111 return "%s(%r, %r, %r, %r, %r, %r, %r) # %d-%d" % (
112 self.__class__.__name__,
113 self.filename, self.line, self.func_name,
114 self.nc1, self.nc2, self.tin, self.tall,
115 len(self.called_by), len(self.calls_to))
117 def __iter__(self):
118 "Returns all nodes in the graph."
119 done = set()
120 stack = deque()
121 stack.append(self)
122 while len(stack) > 0:
123 node = stack.popleft()
124 if node.key in done:
125 continue
126 yield node
127 done.add(node.key)
128 stack.extend(node.calls_to)
130 _modules_ = {
131 '~', 'subprocess.py', 'posixpath.py', 'os.py',
132 '<frozen importlib._bootstrap>', 'inspect.py',
133 'version.py', 'typing.py', 'warnings.py', 'errors.py',
134 'numbers.py', 'ast.py', 'threading.py', '_collections_abc.py',
135 'datetime.py', 'abc.py', 'argparse.py', '__future__.py',
136 'functools.py', 'six.py', 'sre_parse.py', 'contextlib.py',
137 ' _globals.py', '_ios.py', 'types.py'}
139 @staticmethod
140 def filter_node_(node, info=None):
141 """
142 Filters out node to be displayed by default.
144 :param node: node
145 :param info: if the node is called by a function,
146 this dictionary can be used to overwrite the attributes
147 held by the node
148 :return: boolean (True to keep, False to forget)
149 """
150 if node.filename in ProfileNode._modules_:
151 if info is None:
152 if (node.nc1 <= 10 and node.nc2 <= 10 and
153 node.tall <= 1e-4):
154 return False
155 else:
156 if (info['nc1'] <= 10 and info['nc2'] <= 10 and
157 info['tall'] <= 1e-4):
158 return False
160 return True
162 def as_dict(self, filter_node=None, sort_key=SortKey.LINE):
163 """
164 Renders the results of a profiling interpreted with
165 function @fn profile2graph. It can then be loaded with
166 a dataframe.
168 :param filter_node: display only the nodes for which
169 this function returns True, if None, the default function
170 removes built-in function with small impact
171 :param sort_key: sort sub nodes by...
172 :return: rows
173 """
174 def sort_key_line(dr):
175 if isinstance(dr, tuple):
176 return (dr[0].filename, dr[0].line)
177 return (dr.filename, dr.line)
179 def sort_key_tin(dr):
180 if isinstance(dr, tuple):
181 return -dr[1][2]
182 return -dr.tin
184 def sort_key_tall(dr):
185 if isinstance(dr, tuple):
186 return -dr[1][3]
187 return -dr.tall
189 if sort_key == SortKey.LINE:
190 sortk = sort_key_line
191 elif sort_key == SortKey.CUMULATIVE:
192 sortk = sort_key_tall
193 elif sort_key == SortKey.TIME:
194 sortk = sort_key_tin
195 else:
196 raise NotImplementedError(
197 "Unable to sort subcalls with this key %r." % sort_key)
199 def depth_first(node, roots_keys, indent=0):
200 text = {'fct': node.func_name, 'where': node.key,
201 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin,
202 'tall': node.tall, 'indent': indent,
203 'ncalls': len(node.calls_to), 'debug': 'A'}
204 yield text
205 for n, nel in sorted(zip(node.calls_to,
206 node.calls_to_elements),
207 key=sortk):
208 if n.key in roots_keys:
209 text = {'fct': n.func_name, 'where': n.key,
210 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2],
211 'tall': nel[3], 'indent': indent + 1,
212 'ncalls': len(n.calls_to), 'more': '+',
213 'debug': 'B'}
214 if (filter_node is not None and
215 not filter_node(n, info=text)):
216 continue
217 yield text
218 else:
219 if filter_node is not None and not filter_node(n):
220 continue
221 for t in depth_first(n, roots_keys, indent + 1):
222 yield t
224 if filter_node is None:
225 filter_node = ProfileNode.filter_node_
226 nodes = list(self)
227 roots = [node for node in nodes if len(node.called_by) != 1]
228 roots_key = {r.key: r for r in roots}
229 rows = []
230 for root in sorted(roots, key=sortk):
231 if filter_node is not None and not filter_node(root):
232 continue
233 rows.extend(depth_first(root, roots_key))
234 return rows
236 def to_text(self, filter_node=None, sort_key=SortKey.LINE,
237 fct_width=60):
238 """
239 Prints the profiling to text.
241 :param filter_node: display only the nodes for which
242 this function returns True, if None, the default function
243 removes built-in function with small impact
244 :param sort_key: sort sub nodes by...
245 :return: rows
246 """
247 def align_text(text, size):
248 if size <= 0:
249 return text
250 if len(text) <= size:
251 return text + " " * (size - len(text))
252 h = size // 2 - 1
253 return text[:h] + "..." + text[-h + 1:]
255 dicts = self.as_dict(filter_node=filter_node, sort_key=sort_key)
256 max_nc = max(max(_['nc1'] for _ in dicts),
257 max(_['nc2'] for _ in dicts))
258 dg = int(math.log(max_nc) / math.log(10) + 1.5)
259 line_format = ("{indent}{fct} -- {nc1: %dd} {nc2: %dd} -- {tin:1.5f} {tall:1.5f}"
260 " -- {name} ({fct2})" % (dg, dg))
261 text = []
262 for row in dicts:
263 line = line_format.format(
264 indent=" " * (row['indent'] * 4),
265 fct=align_text(row['fct'], fct_width - row['indent'] * 4),
266 nc1=row['nc1'], nc2=row['nc2'], tin=row['tin'],
267 tall=row['tall'], name=row['where'],
268 fct2=row['fct'])
269 if row.get('more', '') == '+':
270 line += " +++"
271 text.append(line)
272 return "\n".join(text)
274 def to_json(self, filter_node=None, sort_key=SortKey.LINE, as_str=True,
275 **kwargs):
276 """
277 Renders the results of a profiling interpreted with
278 function @fn profile2graph as :epkg:`JSON`.
280 :param filter_node: display only the nodes for which
281 this function returns True, if None, the default function
282 removes built-in function with small impact
283 :param sort_key: sort sub nodes by...
284 :param as_str: converts the json into a string
285 :param kwargs: see :func:`json.dumps`
286 :return: rows
288 See notebook :ref:`profilingrst` to see how to use
289 the json output.
290 """
291 def sort_key_line(dr):
292 if isinstance(dr, tuple):
293 return (dr[0].filename, dr[0].line)
294 return (dr.filename, dr.line)
296 def sort_key_tin(dr):
297 if isinstance(dr, tuple):
298 return -dr[1][2]
299 return -dr.tin
301 def sort_key_tall(dr):
302 if isinstance(dr, tuple):
303 return -dr[1][3]
304 return -dr.tall
306 if sort_key == SortKey.LINE:
307 sortk = sort_key_line
308 elif sort_key == SortKey.CUMULATIVE:
309 sortk = sort_key_tall
310 elif sort_key == SortKey.TIME:
311 sortk = sort_key_tin
312 else:
313 raise NotImplementedError(
314 "Unable to sort subcalls with this key %r." % sort_key)
316 def walk(node, roots_keys, indent=0):
317 item = {'details': {
318 'fct': node.func_name, 'where': node.key,
319 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin,
320 'tall': node.tall, 'indent': indent,
321 'ncalls': len(node.calls_to)}}
323 child = OrderedDict()
324 for n, nel in sorted(zip(node.calls_to,
325 node.calls_to_elements),
326 key=sortk):
327 key = (nel[0], "%1.5f:%s" % (nel[3], n.func_name))
328 if n.key in roots_keys:
329 details = {'fct': n.func_name, 'where': n.key,
330 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2],
331 'tall': nel[3], 'indent': indent,
332 'ncalls': len(node.calls_to)}
333 if (filter_node is not None and
334 not filter_node(n, info=details)):
335 continue
336 child[key] = {'details': details}
337 else:
338 if filter_node is not None and not filter_node(n):
339 continue
340 child[key] = walk(n, roots_key, indent + 1)
342 if len(child) > 0:
343 mx = max(_[0] for _ in child)
344 dg = int(math.log(mx) / math.log(10) + 1.5)
345 form = "%-{}d-%s".format(dg)
346 child = OrderedDict((form % k, v) for k, v in child.items())
347 item['calls'] = child
348 return item
350 if filter_node is None:
351 filter_node = ProfileNode.filter_node_
352 nodes = list(self)
353 roots = [node for node in nodes if len(node.called_by) != 1]
354 roots_key = {r.key: r for r in roots}
355 rows = OrderedDict()
356 for root in sorted(roots, key=sortk):
357 if filter_node is not None and not filter_node(root):
358 continue
359 key = (root.nc1, "%1.5f:::%s" % (root.tall, root.func_name))
360 rows[key] = walk(root, roots_key)
361 mx = max(_[0] for _ in rows)
362 dg = int(math.log(mx) / math.log(10) + 1.5)
363 form = "%-{}d-%s".format(dg)
364 rows = OrderedDict((form % k, v) for k, v in rows.items())
365 if as_str:
366 return json.dumps({'profile': rows}, **kwargs)
367 return {'profile': rows}
370def _process_pstats(ps, clean_text=None, verbose=False, fLOG=None):
371 """
372 Converts class `Stats <https://docs.python.org/3/library/
373 profile.html#pstats.Stats>`_ into something
374 readable for a dataframe.
376 :param ps: instance of type :func:`pstats.Stats`
377 :param clean_text: function to clean function names
378 :param verbose: change verbosity
379 :param fLOG: logging function
380 :return: list of rows
381 """
382 if clean_text is None:
383 clean_text = lambda x: x
385 def add_rows(rows, d):
386 tt1, tt2 = 0, 0
387 for k, v in d.items():
388 stin = 0
389 stall = 0
390 if verbose and fLOG is not None:
391 fLOG("[pstats] %s=%r" % (
392 (clean_text(k[0].replace("\\", "/")), ) + k[1:],
393 v))
394 if len(v) < 5:
395 continue
396 row = {
397 'file': "%s:%d" % (clean_text(k[0].replace("\\", "/")), k[1]),
398 'fct': k[2],
399 'ncalls1': v[0],
400 'ncalls2': v[1],
401 'tin': v[2],
402 'tall': v[3]
403 }
404 stin += v[2]
405 stall += v[3]
406 if len(v) == 5:
407 t1, t2 = add_rows(rows, v[-1])
408 stin += t1
409 stall += t2
410 row['cum_tin'] = stin
411 row['cum_tall'] = stall
412 rows.append(row)
413 tt1 += stin
414 tt2 += stall
415 return tt1, tt2
417 rows = []
418 add_rows(rows, ps.stats)
419 return rows
422def profile2df(ps, as_df=True, clean_text=None, verbose=False, fLOG=None):
423 """
424 Converts profiling statistics into a Dataframe.
426 :param ps: an instance of `pstats
427 <https://docs.python.org/3/library/profile.html#pstats.Stats>`_
428 :param as_df: returns the results as a dataframe (True)
429 or a list of dictionaries (False)
430 :param clean_text: function to clean function names
431 :param verbose: verbosity
432 :param fLOG: logging function
433 :return: a DataFrame
435 ::
437 import pstats
438 from pyquickhelper.pycode.profiling import profile2df
440 ps = pstats.Stats('bench_ortmodule_nn_gpu6.prof')
441 df = profile2df(pd)
442 print(df)
443 """
444 rows = _process_pstats(ps, clean_text, verbose=verbose, fLOG=fLOG)
445 if not as_df:
446 return rows
448 import pandas
449 df = pandas.DataFrame(rows)
450 df = df[['fct', 'file', 'ncalls1', 'ncalls2', 'tin', 'cum_tin',
451 'tall', 'cum_tall']]
452 df = df.groupby(['fct', 'file'], as_index=False).sum().sort_values(
453 'cum_tall', ascending=False).reset_index(drop=True)
454 return df.copy()
457def profile(fct, sort='cumulative', rootrem=None, as_df=False,
458 pyinst_format=None, return_results=False, **kwargs):
459 """
460 Profiles the execution of a function.
462 :param fct: function to profile
463 :param sort: see `sort_stats <https://docs.python.org/3/library/
464 profile.html#pstats.Stats.sort_stats>`_
465 :param rootrem: root to remove in filenames
466 :param as_df: return the results as a dataframe and not text
467 :param pyinst_format: format for :epkg:`pyinstrument`, if not empty,
468 the function uses this module or raises an exception if not
469 installed, the options are *text*, *textu* (text with colors),
470 *json*, *html*
471 :param return_results: if True, return results as well
472 (in the first position)
473 :param kwargs: additional parameters used to create the profiler
474 :return: raw results, statistics text dump (or dataframe is *as_df* is True)
476 .. plot::
478 import matplotlib.pyplot as plt
479 from pyquickhelper.pycode.profiling import profile
480 from pyquickhelper.texthelper import compare_module_version
482 def fctm():
483 return compare_module_version('0.20.4', '0.22.dev0')
485 pr, df = profile(lambda: [fctm() for i in range(0, 1000)], as_df=True)
486 ax = df[['namefct', 'cum_tall']].head(n=15).set_index(
487 'namefct').plot(kind='bar', figsize=(8, 3), rot=30)
488 ax.set_title("example of a graph")
489 for la in ax.get_xticklabels():
490 la.set_horizontalalignment('right');
491 plt.show()
493 .. versionchanged:: 1.11
494 Parameter *return_results* was added.
495 """
496 if pyinst_format is None:
497 pr = cProfile.Profile(**kwargs)
498 pr.enable()
499 fct_res = fct()
500 pr.disable()
501 s = StringIO()
502 ps = Stats(pr, stream=s).sort_stats(sort)
503 ps.print_stats()
504 res = s.getvalue()
505 try:
506 pack = site.getsitepackages()
507 except AttributeError: # pragma: no cover
508 import numpy
509 pack = os.path.normpath(os.path.abspath(
510 os.path.join(os.path.dirname(numpy.__file__), "..")))
511 pack = [pack]
512 pack_ = os.path.normpath(os.path.join(pack[-1], '..'))
514 def clean_text(res):
515 res = res.replace(pack[-1], "site-packages")
516 res = res.replace(pack_, "lib")
517 if rootrem is not None:
518 if isinstance(rootrem, str):
519 res = res.replace(rootrem, '')
520 else:
521 for sub in rootrem:
522 if isinstance(sub, str):
523 res = res.replace(sub, '')
524 elif isinstance(sub, tuple) and len(sub) == 2:
525 res = res.replace(sub[0], sub[1])
526 else:
527 raise TypeError(
528 "rootrem must contains strings or tuple not {0}"
529 ".".format(rootrem))
530 return res
532 if as_df:
534 def better_name(row):
535 if len(row['fct']) > 15:
536 return "{}-{}".format(row['file'].split(':')[-1], row['fct'])
537 name = row['file'].replace("\\", "/")
538 return "{}-{}".format(name.split('/')[-1], row['fct'])
540 rows = _process_pstats(ps, clean_text)
541 import pandas
542 df = pandas.DataFrame(rows)
543 df = df[['fct', 'file', 'ncalls1', 'ncalls2', 'tin', 'cum_tin',
544 'tall', 'cum_tall']]
545 df['namefct'] = df.apply(lambda row: better_name(row), axis=1)
546 df = df.groupby(['namefct', 'file'], as_index=False).sum().sort_values(
547 'cum_tall', ascending=False).reset_index(drop=True)
548 if return_results:
549 return fct_res, ps, df
550 return ps, df
551 else:
552 res = clean_text(res)
553 if return_results:
554 return fct_res, ps, res
555 return ps, res
556 if as_df:
557 raise ValueError( # pragma: no cover
558 "as_df is not a compatible option with pyinst_format.")
560 try:
561 from pyinstrument import Profiler
562 except ImportError as e: # pragma: no cover
563 raise ImportError("pyinstrument is not installed.") from e
565 profiler = Profiler(**kwargs)
566 profiler.start()
567 fct_res = fct()
568 profiler.stop()
570 if pyinst_format == "text":
571 if return_results:
572 return fct_res, profiler, profiler.output_text(
573 unicode=False, color=False)
574 return profiler, profiler.output_text(unicode=False, color=False)
575 if pyinst_format == "textu":
576 if return_results:
577 return fct_res, profiler, profiler.output_text(
578 unicode=True, color=True)
579 return profiler, profiler.output_text(unicode=True, color=True)
580 if pyinst_format == "json":
581 from pyinstrument.renderers import JSONRenderer
582 if return_results:
583 return fct_res, profiler, profiler.output(
584 JSONRenderer())
585 return profiler, profiler.output(JSONRenderer())
586 if pyinst_format == "html":
587 if return_results:
588 return fct_res, profiler, profiler.output_html()
589 return profiler, profiler.output_html()
590 raise ValueError("Unknown format '{}'.".format(pyinst_format))
593def profile2graph(ps, clean_text=None, verbose=False, fLOG=None):
594 """
595 Converts profiling statistics into a graphs.
597 :param ps: an instance of `pstats
598 <https://docs.python.org/3/library/profile.html#pstats.Stats>`_
599 :param clean_text: function to clean function names
600 :param verbose: verbosity
601 :param fLOG: logging function
602 :return: an instance of class @see cl ProfileNode
604 .. exref::
605 :title: Hierarchical display for a profiling
607 :epkg:`pyinstrument` has a nice display to show
608 time spent and call stack at the same time. This function
609 tries to replicate that display based on the results produced
610 by module :mod:`cProfile`. Here is an example.
612 .. runpython::
613 :showcode:
615 import time
616 from pyquickhelper.pycode.profiling import profile, profile2graph
619 def fct0(t):
620 time.sleep(t)
623 def fct1(t):
624 time.sleep(t)
627 def fct2():
628 fct1(0.1)
629 fct1(0.01)
632 def fct3():
633 fct0(0.2)
634 fct1(0.5)
637 def fct4():
638 fct2()
639 fct3()
642 ps = profile(fct4)[0]
643 root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1])
644 text = root.to_text()
645 print(text)
647 .. versionadded:: 1.11
648 """
649 if clean_text is None:
650 clean_text = lambda x: x
652 nodes = {}
653 for k, v in ps.stats.items():
654 if verbose and fLOG is not None:
655 fLOG("[pstats] %s=%r" % (k, v))
656 if len(v) < 5:
657 continue
658 if k[0] == '~' and len(v) == 0:
659 # raw function never called by another
660 continue
661 if "method 'disable' of '_lsprof.Profiler'" in k[2]:
662 continue
663 node = ProfileNode(
664 filename=clean_text(k[0].replace("\\", "/")),
665 line=k[1], func_name=k[2],
666 nc1=v[0], nc2=v[1], tin=v[2], tall=v[3])
667 if node.key in nodes:
668 raise RuntimeError(
669 "Key %r is already present, node=%r." % (node.key, node))
670 nodes[node.key] = node
672 for k, v in ps.stats.items():
673 if "method 'disable' of '_lsprof.Profiler'" in k[2]:
674 continue
675 filename = clean_text(k[0].replace("\\", "/"))
676 ks = ProfileNode._key(filename, k[1], k[2])
677 node = nodes[ks]
678 sublist = v[4]
679 for f, vv in sublist.items():
680 if "method 'disable' of '_lsprof.Profiler'" in f[2]:
681 continue
682 name = clean_text(f[0].replace("\\", "/"))
683 key = ProfileNode._key(name, f[1], f[2])
684 if key not in nodes:
685 raise RuntimeError(
686 "Unable to find key %r into\n%s" % (
687 key, "\n".join(sorted(nodes))))
688 if k[0] == '~' and len(v) == 0:
689 continue
690 child = nodes[key]
691 node.add_called_by(child)
692 child.add_calls_to(node, vv)
694 for k, v in nodes.items():
695 root = v.get_root()
696 break
698 return root, nodes