Coverage for pyquickhelper/pycode/profiling.py: 88%

1"""

2@file

3@brief Profiling helpers

4"""

5from collections import deque, OrderedDict

6from io import StringIO

7import json

8import math

9import os

10import site

11import cProfile

12from pstats import Stats

14try:

15 from pstarts import SortKey

16except ImportError: # pragma: no cover

17 # Python < 3.7

19 class SortKey:

20 LINE = 'line'

21 CUMULATIVE = 'cumulative'

22 TIME = 'time'

25class ProfileNode:

26 """

27 Graph structure to represent a profiling.

29 :param filename: filename

30 :param line: line number

31 :param func_name: function name

32 :param nc1: number of calls 1

33 :param nc2: number of calls 2

34 :param tin: time spent in the function

35 :param tout: time spent in the function and in the sub functions

36 """

38 def __init__(self, filename, line, func_name, nc1, nc2, tin, tall):

39 if "method 'disable' of '_lsprof.Profiler'" in func_name:

40 raise RuntimeError( # pragma: no cover

41 f"Function not allowed in the profiling: {func_name!r}.")

42 self.filename = filename

43 self.line = line

44 self.func_name = func_name

45 self.nc1 = nc1

46 self.nc2 = nc2

47 self.tin = tin

48 self.tall = tall

49 self.called_by = []

50 self.calls_to = []

51 self.calls_to_elements = []

53 def add_called_by(self, pnode):

54 "This function is called by these lines."

55 self.called_by.append(pnode)

57 def add_calls_to(self, pnode, time_elements):

58 "This function calls this node."

59 self.calls_to.append(pnode)

60 self.calls_to_elements.append(time_elements)

62 @staticmethod

63 def _key(filename, line, fct):

64 key = "%s:%d:%s" % (filename, line, fct)

65 return key

67 @property

68 def key(self):

69 "Returns `file:line`."

70 return ProfileNode._key(self.filename, self.line,

71 self.func_name)

73 def get_root(self):

74 "Returns the root of the graph."

75 done = set()

77 def _get_root(node, stor=None):

78 if stor is not None:

79 stor.append(node)

80 if len(node.called_by) == 0:

81 return node

82 if len(node.called_by) == 1:

83 return _get_root(node.called_by[0], stor=stor)

84 res = None

85 for ct in node.called_by:

86 k = id(node), id(ct)

87 if k in done:

88 continue

89 res = ct

90 break

91 if res is None:

92 # All paths have been explored and no entry point was found.

93 # Choosing the most consuming function.

94 return None

95 done.add((id(node), id(res)))

96 return _get_root(res, stor=stor)

98 root = _get_root(self)

99 if root is None:

100 candidates = []

101 _get_root(self, stor=candidates)

102 tall = [(n.tall, n) for n in candidates]

103 tall.sort()

104 root = tall[-1][-1]

105 return root

106

107 def __repr__(self):

108 "usual"

109 return "%s(%r, %r, %r, %r, %r, %r, %r) # %d-%d" % (

110 self.__class__.__name__,

111 self.filename, self.line, self.func_name,

112 self.nc1, self.nc2, self.tin, self.tall,

113 len(self.called_by), len(self.calls_to))

114

115 def __iter__(self):

116 "Returns all nodes in the graph."

117 done = set()

118 stack = deque()

119 stack.append(self)

120 while len(stack) > 0:

121 node = stack.popleft()

122 if node.key in done:

123 continue

124 yield node

125 done.add(node.key)

126 stack.extend(node.calls_to)

127

128 _modules_ = {

129 '~', 'subprocess.py', 'posixpath.py', 'os.py',

130 '<frozen importlib._bootstrap>', 'inspect.py',

131 'version.py', 'typing.py', 'warnings.py', 'errors.py',

132 'numbers.py', 'ast.py', 'threading.py', '_collections_abc.py',

133 'datetime.py', 'abc.py', 'argparse.py', '__future__.py',

134 'functools.py', 'six.py', 'sre_parse.py', 'contextlib.py',

135 ' _globals.py', '_ios.py', 'types.py'}

136

137 @staticmethod

138 def filter_node_(node, info=None):

139 """

140 Filters out node to be displayed by default.

141

142 :param node: node

143 :param info: if the node is called by a function,

144 this dictionary can be used to overwrite the attributes

145 held by the node

146 :return: boolean (True to keep, False to forget)

147 """

148 if node.filename in ProfileNode._modules_:

149 if info is None:

150 if (node.nc1 <= 10 and node.nc2 <= 10 and

151 node.tall <= 1e-4):

152 return False

153 else:

154 if (info['nc1'] <= 10 and info['nc2'] <= 10 and

155 info['tall'] <= 1e-4):

156 return False

157

158 return True

159

160 def as_dict(self, filter_node=None, sort_key=SortKey.LINE):

161 """

162 Renders the results of a profiling interpreted with

163 function @fn profile2graph. It can then be loaded with

164 a dataframe.

165

166 :param filter_node: display only the nodes for which

167 this function returns True, if None, the default function

168 removes built-in function with small impact

169 :param sort_key: sort sub nodes by...

170 :return: rows

171 """

172 def sort_key_line(dr):

173 if isinstance(dr, tuple):

174 return (dr[0].filename, dr[0].line)

175 return (dr.filename, dr.line)

176

177 def sort_key_tin(dr):

178 if isinstance(dr, tuple):

179 return -dr[1][2]

180 return -dr.tin

181

182 def sort_key_tall(dr):

183 if isinstance(dr, tuple):

184 return -dr[1][3]

185 return -dr.tall

186

187 if sort_key == SortKey.LINE:

188 sortk = sort_key_line

189 elif sort_key == SortKey.CUMULATIVE:

190 sortk = sort_key_tall

191 elif sort_key == SortKey.TIME:

192 sortk = sort_key_tin

193 else:

194 raise NotImplementedError(

195 f"Unable to sort subcalls with this key {sort_key!r}.")

196

197 def depth_first(node, roots_keys, indent=0):

198 text = {'fct': node.func_name, 'where': node.key,

199 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin,

200 'tall': node.tall, 'indent': indent,

201 'ncalls': len(node.calls_to), 'debug': 'A'}

202 yield text

203 for n, nel in sorted(zip(node.calls_to,

204 node.calls_to_elements),

205 key=sortk):

206 if n.key in roots_keys:

207 text = {'fct': n.func_name, 'where': n.key,

208 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2],

209 'tall': nel[3], 'indent': indent + 1,

210 'ncalls': len(n.calls_to), 'more': '+',

211 'debug': 'B'}

212 if (filter_node is not None and

213 not filter_node(n, info=text)):

214 continue

215 yield text

216 else:

217 if filter_node is not None and not filter_node(n):

218 continue

219 for t in depth_first(n, roots_keys, indent + 1):

220 yield t

221

222 if filter_node is None:

223 filter_node = ProfileNode.filter_node_

224 nodes = list(self)

225 roots = [node for node in nodes if len(node.called_by) != 1]

226 roots_key = {r.key: r for r in roots}

227 rows = []

228 for root in sorted(roots, key=sortk):

229 if filter_node is not None and not filter_node(root):

230 continue

231 rows.extend(depth_first(root, roots_key))

232 return rows

233

234 def to_text(self, filter_node=None, sort_key=SortKey.LINE,

235 fct_width=60):

236 """

237 Prints the profiling to text.

238

239 :param filter_node: display only the nodes for which

240 this function returns True, if None, the default function

241 removes built-in function with small impact

242 :param sort_key: sort sub nodes by...

243 :return: rows

244 """

245 def align_text(text, size):

246 if size <= 0:

247 return text

248 if len(text) <= size:

249 return text + " " * (size - len(text))

250 h = size // 2 - 1

251 return text[:h] + "..." + text[-h + 1:]

252

253 dicts = self.as_dict(filter_node=filter_node, sort_key=sort_key)

254 max_nc = max(max(_['nc1'] for _ in dicts),

255 max(_['nc2'] for _ in dicts))

256 dg = int(math.log(max_nc) / math.log(10) + 1.5)

257 line_format = ("{indent}{fct} -- {nc1: %dd} {nc2: %dd} -- {tin:1.5f} {tall:1.5f}"

258 " -- {name} ({fct2})" % (dg, dg))

259 text = []

260 for row in dicts:

261 line = line_format.format(

262 indent=" " * (row['indent'] * 4),

263 fct=align_text(row['fct'], fct_width - row['indent'] * 4),

264 nc1=row['nc1'], nc2=row['nc2'], tin=row['tin'],

265 tall=row['tall'], name=row['where'],

266 fct2=row['fct'])

267 if row.get('more', '') == '+':

268 line += " +++"

269 text.append(line)

270 return "\n".join(text)

271

272 def to_json(self, filter_node=None, sort_key=SortKey.LINE, as_str=True,

273 **kwargs):

274 """

275 Renders the results of a profiling interpreted with

276 function @fn profile2graph as :epkg:`JSON`.

277

278 :param filter_node: display only the nodes for which

279 this function returns True, if None, the default function

280 removes built-in function with small impact

281 :param sort_key: sort sub nodes by...

282 :param as_str: converts the json into a string

283 :param kwargs: see :func:`json.dumps`

284 :return: rows

285

286 See notebook :ref:`profilingrst` to see how to use

287 the json output.

288 """

289 def sort_key_line(dr):

290 if isinstance(dr, tuple):

291 return (dr[0].filename, dr[0].line)

292 return (dr.filename, dr.line)

293

294 def sort_key_tin(dr):

295 if isinstance(dr, tuple):

296 return -dr[1][2]

297 return -dr.tin

298

299 def sort_key_tall(dr):

300 if isinstance(dr, tuple):

301 return -dr[1][3]

302 return -dr.tall

303

304 if sort_key == SortKey.LINE:

305 sortk = sort_key_line

306 elif sort_key == SortKey.CUMULATIVE:

307 sortk = sort_key_tall

308 elif sort_key == SortKey.TIME:

309 sortk = sort_key_tin

310 else:

311 raise NotImplementedError(

312 f"Unable to sort subcalls with this key {sort_key!r}.")

313

314 def walk(node, roots_keys, indent=0):

315 item = {'details': {

316 'fct': node.func_name, 'where': node.key,

317 'nc1': node.nc1, 'nc2': node.nc2, 'tin': node.tin,

318 'tall': node.tall, 'indent': indent,

319 'ncalls': len(node.calls_to)}}

320

321 child = OrderedDict()

322 for n, nel in sorted(zip(node.calls_to,

323 node.calls_to_elements),

324 key=sortk):

325 key = (nel[0], f"{nel[3]:1.5f}:{n.func_name}")

326 if n.key in roots_keys:

327 details = {'fct': n.func_name, 'where': n.key,

328 'nc1': nel[0], 'nc2': nel[1], 'tin': nel[2],

329 'tall': nel[3], 'indent': indent,

330 'ncalls': len(node.calls_to)}

331 if (filter_node is not None and

332 not filter_node(n, info=details)):

333 continue

334 child[key] = {'details': details}

335 else:

336 if filter_node is not None and not filter_node(n):

337 continue

338 child[key] = walk(n, roots_key, indent + 1)

339

340 if len(child) > 0:

341 mx = max(_[0] for _ in child)

342 dg = int(math.log(mx) / math.log(10) + 1.5)

343 form = f"%-{dg}d-%s"

344 child = OrderedDict((form % k, v) for k, v in child.items())

345 item['calls'] = child

346 return item

347

348 if filter_node is None:

349 filter_node = ProfileNode.filter_node_

350 nodes = list(self)

351 roots = [node for node in nodes if len(node.called_by) != 1]

352 roots_key = {r.key: r for r in roots}

353 rows = OrderedDict()

354 for root in sorted(roots, key=sortk):

355 if filter_node is not None and not filter_node(root):

356 continue

357 key = (root.nc1, f"{root.tall:1.5f}:::{root.func_name}")

358 rows[key] = walk(root, roots_key)

359 mx = max(_[0] for _ in rows)

360 dg = int(math.log(mx) / math.log(10) + 1.5)

361 form = f"%-{dg}d-%s"

362 rows = OrderedDict((form % k, v) for k, v in rows.items())

363 if as_str:

364 return json.dumps({'profile': rows}, **kwargs)

365 return {'profile': rows}

366

367

368def _process_pstats(ps, clean_text=None, verbose=False, fLOG=None):

369 """

370 Converts class `Stats <https://docs.python.org/3/library/

371 profile.html#pstats.Stats>`_ into something

372 readable for a dataframe.

373

374 :param ps: instance of type :func:`pstats.Stats`

375 :param clean_text: function to clean function names

376 :param verbose: change verbosity

377 :param fLOG: logging function

378 :return: list of rows

379 """

380 if clean_text is None:

381 clean_text = lambda x: x

382

383 def add_rows(rows, d):

384 tt1, tt2 = 0, 0

385 for k, v in d.items():

386 stin = 0

387 stall = 0

388 if verbose and fLOG is not None:

389 fLOG("[pstats] %s=%r" % (

390 (clean_text(k[0].replace("\\", "/")), ) + k[1:],

391 v))

392 if len(v) < 5:

393 continue

394 row = {

395 'file': "%s:%d" % (clean_text(k[0].replace("\\", "/")), k[1]),

396 'fct': k[2],

397 'ncalls1': v[0],

398 'ncalls2': v[1],

399 'tin': v[2],

400 'tall': v[3]

401 }

402 stin += v[2]

403 stall += v[3]

404 if len(v) == 5:

405 t1, t2 = add_rows(rows, v[-1])

406 stin += t1

407 stall += t2

408 row['cum_tin'] = stin

409 row['cum_tall'] = stall

410 rows.append(row)

411 tt1 += stin

412 tt2 += stall

413 return tt1, tt2

414

415 rows = []

416 add_rows(rows, ps.stats)

417 return rows

418

419

420def profile2df(ps, as_df=True, clean_text=None, verbose=False, fLOG=None):

421 """

422 Converts profiling statistics into a Dataframe.

423

424 :param ps: an instance of `pstats

425 <https://docs.python.org/3/library/profile.html#pstats.Stats>`_

426 :param as_df: returns the results as a dataframe (True)

427 or a list of dictionaries (False)

428 :param clean_text: function to clean function names

429 :param verbose: verbosity

430 :param fLOG: logging function

431 :return: a DataFrame

432

433 ::

434

435 import pstats

436 from pyquickhelper.pycode.profiling import profile2df

437

438 ps = pstats.Stats('bench_ortmodule_nn_gpu6.prof')

439 df = profile2df(pd)

440 print(df)

441 """

442 rows = _process_pstats(ps, clean_text, verbose=verbose, fLOG=fLOG)

443 if not as_df:

444 return rows

445

446 import pandas

447 df = pandas.DataFrame(rows)

448 df = df[['fct', 'file', 'ncalls1', 'ncalls2', 'tin', 'cum_tin',

449 'tall', 'cum_tall']]

450 df = df.groupby(['fct', 'file'], as_index=False).sum().sort_values(

451 'cum_tall', ascending=False).reset_index(drop=True)

452 return df.copy()

453

454

455def profile(fct, sort='cumulative', rootrem=None, as_df=False,

456 pyinst_format=None, return_results=False, **kwargs):

457 """

458 Profiles the execution of a function.

459

460 :param fct: function to profile

461 :param sort: see `sort_stats <https://docs.python.org/3/library/

462 profile.html#pstats.Stats.sort_stats>`_

463 :param rootrem: root to remove in filenames

464 :param as_df: return the results as a dataframe and not text

465 :param pyinst_format: format for :epkg:`pyinstrument`, if not empty,

466 the function uses this module or raises an exception if not

467 installed, the options are *text*, *textu* (text with colors),

468 *json*, *html*

469 :param return_results: if True, return results as well

470 (in the first position)

471 :param kwargs: additional parameters used to create the profiler

472 :return: raw results, statistics text dump (or dataframe is *as_df* is True)

473

474 .. plot::

475

476 import matplotlib.pyplot as plt

477 from pyquickhelper.pycode.profiling import profile

478 from pyquickhelper.texthelper import compare_module_version

479

480 def fctm():

481 return compare_module_version('0.20.4', '0.22.dev0')

482

483 pr, df = profile(lambda: [fctm() for i in range(0, 1000)], as_df=True)

484 ax = df[['namefct', 'cum_tall']].head(n=15).set_index(

485 'namefct').plot(kind='bar', figsize=(8, 3), rot=30)

486 ax.set_title("example of a graph")

487 for la in ax.get_xticklabels():

488 la.set_horizontalalignment('right');

489 plt.show()

490 """

491 if pyinst_format is None:

492 pr = cProfile.Profile(**kwargs)

493 pr.enable()

494 fct_res = fct()

495 pr.disable()

496 s = StringIO()

497 ps = Stats(pr, stream=s).sort_stats(sort)

498 ps.print_stats()

499 res = s.getvalue()

500 try:

501 pack = site.getsitepackages()

502 except AttributeError: # pragma: no cover

503 import numpy

504 pack = os.path.normpath(os.path.abspath(

505 os.path.join(os.path.dirname(numpy.__file__), "..")))

506 pack = [pack]

507 pack_ = os.path.normpath(os.path.join(pack[-1], '..'))

508

509 def clean_text(res):

510 res = res.replace(pack[-1], "site-packages")

511 res = res.replace(pack_, "lib")

512 if rootrem is not None:

513 if isinstance(rootrem, str):

514 res = res.replace(rootrem, '')

515 else:

516 for sub in rootrem:

517 if isinstance(sub, str):

518 res = res.replace(sub, '')

519 elif isinstance(sub, tuple) and len(sub) == 2:

520 res = res.replace(sub[0], sub[1])

521 else:

522 raise TypeError( # pragma: no cover

523 "rootrem must contains strings or tuple not {0}"

524 ".".format(rootrem))

525 return res

526

527 if as_df:

528

529 def better_name(row):

530 if len(row['fct']) > 15:

531 return f"{row['file'].split(':')[-1]}-{row['fct']}"

532 name = row['file'].replace("\\", "/")

533 return f"{name.split('/')[-1]}-{row['fct']}"

534

535 rows = _process_pstats(ps, clean_text)

536 import pandas

537 df = pandas.DataFrame(rows)

538 df = df[['fct', 'file', 'ncalls1', 'ncalls2', 'tin', 'cum_tin',

539 'tall', 'cum_tall']]

540 df['namefct'] = df.apply(lambda row: better_name(row), axis=1)

541 df = df.groupby(['namefct', 'file'], as_index=False).sum().sort_values(

542 'cum_tall', ascending=False).reset_index(drop=True)

543 if return_results:

544 return fct_res, ps, df

545 return ps, df

546 else:

547 res = clean_text(res)

548 if return_results:

549 return fct_res, ps, res

550 return ps, res

551 if as_df:

552 raise ValueError( # pragma: no cover

553 "as_df is not a compatible option with pyinst_format.")

554

555 try:

556 from pyinstrument import Profiler

557 except ImportError as e: # pragma: no cover

558 raise ImportError("pyinstrument is not installed.") from e

559

560 profiler = Profiler(**kwargs)

561 profiler.start()

562 fct_res = fct()

563 profiler.stop()

564

565 if pyinst_format == "text":

566 if return_results:

567 return fct_res, profiler, profiler.output_text(

568 unicode=False, color=False)

569 return profiler, profiler.output_text(unicode=False, color=False)

570 if pyinst_format == "textu":

571 if return_results:

572 return fct_res, profiler, profiler.output_text(

573 unicode=True, color=True)

574 return profiler, profiler.output_text(unicode=True, color=True)

575 if pyinst_format == "json":

576 from pyinstrument.renderers import JSONRenderer

577 if return_results:

578 return fct_res, profiler, profiler.output(

579 JSONRenderer())

580 return profiler, profiler.output(JSONRenderer())

581 if pyinst_format == "html":

582 if return_results:

583 return fct_res, profiler, profiler.output_html()

584 return profiler, profiler.output_html()

585 raise ValueError(f"Unknown format '{pyinst_format}'.")

586

587

588def profile2graph(ps, clean_text=None, verbose=False, fLOG=None):

589 """

590 Converts profiling statistics into a graphs.

591

592 :param ps: an instance of `pstats

593 <https://docs.python.org/3/library/profile.html#pstats.Stats>`_

594 :param clean_text: function to clean function names

595 :param verbose: verbosity

596 :param fLOG: logging function

597 :return: an instance of class @see cl ProfileNode

598

599 .. exref::

600 :title: Hierarchical display for a profiling

601

602 :epkg:`pyinstrument` has a nice display to show

603 time spent and call stack at the same time. This function

604 tries to replicate that display based on the results produced

605 by module :mod:`cProfile`. Here is an example.

606

607 .. runpython::

608 :showcode:

609

610 import time

611 from pyquickhelper.pycode.profiling import profile, profile2graph

612

613

614 def fct0(t):

615 time.sleep(t)

616

617

618 def fct1(t):

619 time.sleep(t)

620

621

622 def fct2():

623 fct1(0.1)

624 fct1(0.01)

625

626

627 def fct3():

628 fct0(0.2)

629 fct1(0.5)

630

631

632 def fct4():

633 fct2()

634 fct3()

635

636

637 ps = profile(fct4)[0]

638 root, nodes = profile2graph(ps, clean_text=lambda x: x.split('/')[-1])

639 text = root.to_text()

640 print(text)

641 """

642 if clean_text is None:

643 clean_text = lambda x: x

644

645 nodes = {}

646 for k, v in ps.stats.items():

647 if verbose and fLOG is not None:

648 fLOG(f"[pstats] {k}={v!r}")

649 if len(v) < 5:

650 continue

651 if k[0] == '~' and len(v) == 0:

652 # raw function never called by another

653 continue

654 if "method 'disable' of '_lsprof.Profiler'" in k[2]:

655 continue

656 node = ProfileNode(

657 filename=clean_text(k[0].replace("\\", "/")),

658 line=k[1], func_name=k[2],

659 nc1=v[0], nc2=v[1], tin=v[2], tall=v[3])

660 if node.key in nodes:

661 raise RuntimeError( # pragma: no cover

662 f"Key {node.key!r} is already present, node={node!r}.")

663 nodes[node.key] = node

664

665 for k, v in ps.stats.items():

666 if "method 'disable' of '_lsprof.Profiler'" in k[2]:

667 continue

668 filename = clean_text(k[0].replace("\\", "/"))

669 ks = ProfileNode._key(filename, k[1], k[2])

670 node = nodes[ks]

671 sublist = v[4]

672 for f, vv in sublist.items():

673 if "method 'disable' of '_lsprof.Profiler'" in f[2]:

674 continue

675 name = clean_text(f[0].replace("\\", "/"))

676 key = ProfileNode._key(name, f[1], f[2])

677 if key not in nodes:

678 raise RuntimeError( # pragma: no cover

679 "Unable to find key %r into\n%s" % (

680 key, "\n".join(sorted(nodes))))

681 if k[0] == '~' and len(v) == 0:

682 continue

683 child = nodes[key]

684 node.add_called_by(child)

685 child.add_calls_to(node, vv)

686

687 for k, v in nodes.items():

688 root = v.get_root()

689 break

690

691 return root, nodes