Coverage for pyquickhelper/ipythonhelper/notebook_runner.py: 88%

651 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1""" 

2@file 

3@brief Modified version of `runipy.notebook_runner 

4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_. 

5""" 

6 

7import base64 

8import os 

9import re 

10import time 

11import platform 

12import warnings 

13from queue import Empty 

14from time import sleep 

15from collections import Counter 

16from io import StringIO, BytesIO 

17import numpy 

18from nbformat import NotebookNode, writes 

19from nbformat.reader import reads 

20from ..imghelper.svg_helper import svg2img, PYQImageException 

21from ..loghelper.flog import noLOG 

22 

23 

24class NotebookError(Exception): 

25 """ 

26 Raised when the execution fails. 

27 """ 

28 pass 

29 

30 

31class NotebookKernelError(Exception): 

32 """ 

33 Raised when 

34 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/ 

35 jupyter_client/blocking/client.py#L84>`_ fails. 

36 """ 

37 pass 

38 

39 

40class NotebookRunner(object): 

41 

42 """ 

43 The kernel communicates with mime-types while the notebook 

44 uses short labels for different cell types. We'll use this to 

45 map from kernel types to notebook format types. 

46 

47 This classes executes a notebook end to end. 

48 

49 .. index:: kernel, notebook 

50 

51 The class can use different kernels. The next links gives more 

52 information on how to create or test a kernel: 

53 

54 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_ 

55 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_ 

56 

57 .. faqref:: 

58 :title: Do I need to shutdown the kernel after running a notebook? 

59 

60 .. index:: travis 

61 

62 If the class is instantiated with *kernel=True*, a kernel will 

63 be started. It must be shutdown otherwise the program might 

64 be waiting for it for ever. That is one of the reasons why the 

65 travis build does not complete. The build finished but cannot terminate 

66 until all kernels are shutdown. 

67 """ 

68 

69 # . available output types 

70 MIME_MAP = { 

71 'image/jpeg': 'jpeg', 

72 'image/png': 'png', 

73 'image/gif': 'gif', 

74 'text/plain': 'text', 

75 'text/html': 'html', 

76 'text/latex': 'latex', 

77 'application/javascript': 'html', 

78 'image/svg+xml': 'svg', 

79 } 

80 

81 def __init__(self, nb, profile_dir=None, working_dir=None, 

82 comment="", fLOG=noLOG, theNotebook=None, code_init=None, 

83 kernel_name="python", log_level="30", extended_args=None, 

84 kernel=False, filename=None, replacements=None, detailed_log=None, 

85 startup_timeout=300, raise_exception=False): 

86 """ 

87 @param nb notebook as :epkg:`JSON` 

88 @param profile_dir profile directory 

89 @param working_dir working directory 

90 @param comment additional information added to error message 

91 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook 

92 @param code_init to initialize the notebook with a python code as if it was a cell 

93 @param fLOG logging function 

94 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL') 

95 @param kernel_name kernel name, it can be None 

96 @param extended_args others arguments to pass to the command line 

97 (`--KernelManager.autorestar=True` for example), 

98 see :ref:`l-ipython_notebook_args` for a full list 

99 @param kernel *kernel* is True by default, the notebook can be run, if False, 

100 the notebook can be read but not run 

101 @param filename to add the notebook file if there is one in error messages 

102 @param replacements replacements to make in every cell before running it, 

103 dictionary ``{ string: string }`` 

104 @param detailed_log to log detailed information when executing the notebook, this should be a function 

105 with the same signature as ``print`` or None 

106 @param startup_timeout wait for this long for the kernel to be ready, 

107 see `wait_for_ready 

108 <https://github.com/jupyter/jupyter_client/blob/master/ 

109 jupyter_client/blocking/client.py#L84>`_ 

110 @param raise_exception raise an exception if a cell raises one 

111 """ 

112 if kernel: 

113 try: 

114 from jupyter_client import KernelManager 

115 except ImportError: # pragma: no cover 

116 from ipykernel import KernelManager 

117 

118 with warnings.catch_warnings(): 

119 warnings.filterwarnings("ignore", category=DeprecationWarning) 

120 self.km = KernelManager( 

121 kernel_name=kernel_name) if kernel_name is not None else KernelManager() 

122 else: 

123 self.km = None 

124 self.raise_exception = raise_exception 

125 self.detailed_log = detailed_log 

126 self.fLOG = fLOG 

127 self.theNotebook = theNotebook 

128 self.code_init = code_init 

129 self._filename = filename if filename is not None else "memory" 

130 self.replacements = replacements 

131 self.init_args = dict( 

132 profile_dir=profile_dir, working_dir=working_dir, 

133 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init, 

134 kernel_name="python", log_level="30", extended_args=None, 

135 kernel=kernel, filename=filename, replacements=replacements) 

136 args = [] 

137 

138 if profile_dir: 

139 args.append(f'--profile-dir={os.path.abspath(profile_dir)}') 

140 if log_level: 

141 args.append(f'--log-level={log_level}') 

142 

143 if extended_args is not None and len(extended_args) > 0: 

144 for opt in extended_args: 

145 if not opt.startswith("--"): 

146 raise SyntaxError( # pragma: no cover 

147 "every option should start with '--': " + opt) 

148 if "=" not in opt: 

149 raise SyntaxError( # pragma: no cover 

150 "every option should be assigned a value: " + opt) 

151 args.append(opt) 

152 

153 if kernel: 

154 cwd = os.getcwd() 

155 

156 if working_dir: 

157 os.chdir(working_dir) 

158 

159 if self.km is not None: 

160 try: 

161 with warnings.catch_warnings(): 

162 warnings.filterwarnings( 

163 "ignore", category=ResourceWarning) 

164 self.km.start_kernel(extra_arguments=args) 

165 except Exception as e: # pragma: no cover 

166 raise NotebookKernelError( 

167 f"Failure with args: {args}\nand error:\n{str(e)}") from e 

168 

169 if platform.system() == 'Darwin': 

170 # see http://www.pypedia.com/index.php/notebook_runner 

171 # There is sometimes a race condition where the first 

172 # execute command hits the kernel before it's ready. 

173 # It appears to happen only on Darwin (Mac OS) and an 

174 # easy (but clumsy) way to mitigate it is to sleep 

175 # for a second. 

176 sleep(1) # pragma: no cover 

177 

178 if working_dir: 

179 os.chdir(cwd) 

180 

181 self.kc = self.km.client() 

182 self.kc.start_channels(stdin=False) 

183 try: 

184 self.kc.wait_for_ready(timeout=startup_timeout) 

185 except RuntimeError as e: # pragma: no cover 

186 # We wait for one second. 

187 sleep(startup_timeout) 

188 self.kc.stop_channels() 

189 self.km.shutdown_kernel() 

190 self.km = None 

191 self.kc = None 

192 self.nb = nb 

193 self.comment = comment 

194 raise NotebookKernelError( 

195 f"Wait_for_ready fails (timeout={startup_timeout}).") from e 

196 else: 

197 self.km = None 

198 self.kc = None 

199 self.nb = nb 

200 self.comment = comment 

201 

202 def __del__(self): 

203 """ 

204 We close the kernel. 

205 """ 

206 if self.km is not None: 

207 del self.km 

208 if self.kc is not None: 

209 del self.kc 

210 

211 def to_json(self, filename=None, encoding="utf8"): 

212 """ 

213 Converts the notebook into :epkg:`JSON`. 

214 

215 @param filename filename or stream 

216 @param encoding encoding 

217 @return Json string if filename is None, None otherwise 

218 """ 

219 if isinstance(filename, str): 

220 with open(filename, "w", encoding=encoding) as payload: 

221 self.to_json(payload) 

222 return None 

223 

224 if filename is None: 

225 st = StringIO() 

226 st.write(writes(self.nb)) 

227 return st.getvalue() 

228 

229 filename.write(writes(self.nb)) 

230 return None 

231 

232 def copy(self): 

233 """ 

234 Copies the notebook (just the content). 

235 

236 @return instance of @see cl NotebookRunner 

237 """ 

238 st = StringIO() 

239 self.to_json(st) 

240 args = self.init_args.copy() 

241 for name in ["theNotebook", "filename"]: 

242 if name in args: 

243 del args[name] 

244 nb = reads(st.getvalue()) 

245 return NotebookRunner(nb, **args) 

246 

247 def __add__(self, nb): 

248 """ 

249 Merges two notebooks together, returns a new none. 

250 

251 @param nb notebook 

252 @return new notebook 

253 """ 

254 c = self.copy() 

255 c.merge_notebook(nb) 

256 return c 

257 

258 def shutdown_kernel(self): 

259 """ 

260 Shuts down kernel. 

261 """ 

262 self.fLOG('-- shutdown kernel') 

263 if self.kc is None: 

264 raise ValueError( # pragma: no cover 

265 "No kernel was started, specify kernel=True when initializing the instance.") 

266 self.kc.stop_channels() 

267 self.km.shutdown_kernel(now=True) 

268 

269 def clean_code(self, code): 

270 """ 

271 Cleans the code before running it, the function comment out 

272 instruction such as ``show()``. 

273 

274 @param code code (string) 

275 @return cleaned code 

276 """ 

277 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code 

278 if code is None: 

279 return code 

280 

281 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")] 

282 res = [] 

283 show_is_last = False 

284 for line in lines: 

285 if line.replace(" ", "") == "show()": 

286 line = line.replace("show", "#show") 

287 show_is_last = True 

288 elif has_bokeh and line.replace(" ", "") == "output_notebook()": 

289 line = line.replace("output_notebook", "#output_notebook") 

290 else: 

291 show_is_last = False 

292 if self.replacements is not None: 

293 for k, v in self.replacements.items(): 

294 line = line.replace(k, v) 

295 res.append(line) 

296 if show_is_last: 

297 res.append('"nothing to show"') 

298 return "\n".join(res) 

299 

300 @staticmethod 

301 def get_cell_code(cell): 

302 """ 

303 Returns the code of a cell. 

304 

305 @param cell a cell or a string 

306 @return boolean (=iscell), string 

307 """ 

308 if isinstance(cell, str): 

309 iscell = False 

310 return iscell, cell 

311 

312 iscell = True 

313 try: 

314 return iscell, cell.source 

315 except AttributeError: # pragma: no cover 

316 return iscell, cell.input 

317 

318 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15): 

319 ''' 

320 Runs a notebook cell and update the output of that cell inplace. 

321 

322 :param index_cell: index of the cell 

323 :param cell: cell to execute 

324 :param clean_function: cleaning function to apply to the code before running it 

325 :param max_nbissue: number of times an issue can be raised before stopping 

326 :return: output of the cell 

327 ''' 

328 if self.detailed_log: 

329 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format( 

330 index_cell, clean_function)) 

331 iscell, codei = NotebookRunner.get_cell_code(cell) 

332 

333 self.fLOG(f'-- running cell:\n{codei}\n') 

334 if self.detailed_log: 

335 self.detailed_log( 

336 '[run_cell] code=\n {0}'.format( 

337 "\n ".join(codei.split("\n")))) 

338 

339 code = self.clean_code(codei) 

340 if clean_function is not None: 

341 code = clean_function(code) 

342 if self.detailed_log: 

343 self.detailed_log( 

344 ' cleaned code=\n {0}'.format( 

345 "\n ".join(code.split("\n")))) 

346 if len(code) == 0: 

347 return "" 

348 if self.kc is None: 

349 raise ValueError( # pragma: no cover 

350 "No kernel was started, specify kernel=True when initializing the instance.") 

351 self.kc.execute(code) 

352 

353 reply = self.kc.get_shell_msg() 

354 reason = None 

355 try: 

356 status = reply['content']['status'] 

357 except KeyError: # pragma: no cover 

358 status = 'error' 

359 reason = "no status key in reply['content']" 

360 

361 if status == 'error': # pragma: no cover 

362 ansi_escape = re.compile(r'\x1b[^m]*m') 

363 try: 

364 tr = [ansi_escape.sub('', _) 

365 for _ in reply['content']['traceback']] 

366 except KeyError: # pragma: no cover 

367 tr = (["No traceback, available keys in reply['content']"] + 

368 list(reply['content'])) 

369 traceback_text = '\n'.join(tr) 

370 if self.raise_exception: 

371 raise NotebookError(traceback_text) 

372 self.fLOG("[nberror]\n", traceback_text) 

373 if self.detailed_log: 

374 self.detailed_log( # pragma: no cover 

375 '[run_cell] ERROR=\n {0}'.format( 

376 "\n ".join(traceback_text.split("\n")))) 

377 else: 

378 traceback_text = '' 

379 self.fLOG('-- cell returned') 

380 

381 outs = list() 

382 nbissue = 0 

383 statuses = [status] 

384 while True: 

385 try: 

386 msg = self.kc.get_iopub_msg(timeout=1) 

387 if msg['msg_type'] == 'status': 

388 if msg['content']['execution_state'] == 'idle': 

389 status = 'ok' 

390 statuses.append(status) 

391 break 

392 statuses.append(status) 

393 except Empty as e: # pragma: no cover 

394 # execution state should return to idle before 

395 # the queue becomes empty, 

396 # if it doesn't, something bad has happened 

397 status = "error" 

398 statuses.append(status) 

399 reason = f"exception Empty was raised ({e!r})" 

400 nbissue += 1 

401 if nbissue > max_nbissue: 

402 # the notebook is empty 

403 return "" 

404 else: 

405 continue 

406 

407 content = msg['content'] 

408 msg_type = msg['msg_type'] 

409 if self.detailed_log: 

410 self.detailed_log(f' msg_type={msg_type}') 

411 

412 out = NotebookNode(output_type=msg_type, metadata=dict()) 

413 

414 if 'execution_count' in content: 

415 if iscell: 

416 cell['execution_count'] = content['execution_count'] 

417 out.execution_count = content['execution_count'] 

418 

419 if msg_type in ('status', 'pyin', 'execute_input'): 

420 continue 

421 

422 if msg_type == 'stream': 

423 out.name = content['name'] 

424 # in msgspec 5, this is name, text 

425 # in msgspec 4, this is name, data 

426 if 'text' in content: 

427 out.text = content['text'] 

428 else: 

429 out.data = content['data'] 

430 

431 elif msg_type in ('display_data', 'pyout', 'execute_result'): 

432 out.data = content['data'] 

433 

434 elif msg_type in ('pyerr', 'error'): 

435 out.ename = content['ename'] 

436 out.evalue = content['evalue'] 

437 out.traceback = content['traceback'] 

438 out.name = 'stderr' 

439 

440 elif msg_type == 'clear_output': 

441 outs = list() 

442 continue 

443 

444 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'): 

445 # widgets in a notebook 

446 out.data = content["data"] 

447 out.comm_id = content["comm_id"] 

448 

449 else: 

450 dcontent = "\n".join(f"{k}={v}" 

451 for k, v in sorted(content.items())) 

452 raise NotImplementedError( # pragma: no cover 

453 f"Unhandled iopub message: '{msg_type}'\n--CONTENT--\n{dcontent}") 

454 

455 outs.append(out) 

456 if self.detailed_log: 

457 self.detailed_log(f' out={type(out)}') 

458 if hasattr(out, "data"): 

459 self.detailed_log(f' out={out.data}') 

460 

461 if iscell: 

462 cell['outputs'] = outs 

463 

464 raw = [] 

465 for _ in outs: 

466 try: 

467 t = _.data 

468 except AttributeError: 

469 continue 

470 

471 # see MIMEMAP to see the available output type 

472 for k, v in t.items(): 

473 if k.startswith("text"): 

474 raw.append(v) 

475 

476 sraw = "\n".join(raw) 

477 self.fLOG(sraw) 

478 if self.detailed_log: 

479 self.detailed_log(' sraw=\n {0}'.format( 

480 "\n ".join(sraw.split("\n")))) 

481 

482 def reply2string(reply): 

483 sreply = [] 

484 for k, v in sorted(reply.items()): 

485 if isinstance(v, dict): 

486 temp = [] 

487 for _, __ in sorted(v.items()): 

488 temp.append(f" [{_}]={str(__)}") 

489 v_ = "\n".join(temp) 

490 sreply.append(f"reply['{k}']=dict\n{v_}") 

491 else: 

492 sreply.append(f"reply['{k}']={str(v)}") 

493 sreply = "\n".join(sreply) 

494 return sreply 

495 

496 if status == 'error': # pragma: no cover 

497 sreply = reply2string(reply) 

498 if len(code) < 5: 

499 scode = [code] 

500 else: 

501 scode = "" 

502 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} " 

503 "length={5} -- {6}:\n-----------------\n" 

504 "content={12}\nmsg_type: {13} nbissue={14}" 

505 "\nstatuses={15}" 

506 "\n-----------------\n{0}" 

507 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}") 

508 raise NotebookError(mes.format( 

509 code, traceback_text, sraw, sreply, index_cell, # 0-4 

510 len(code), scode, self.comment, status, reason, # 5-9 

511 self._filename, index_cell, content, msg_type, nbissue, # 10-14 

512 statuses)) # 15 

513 if self.detailed_log: 

514 self.detailed_log(f'[run_cell] status={status}') 

515 return outs 

516 

517 def to_python(self): 

518 """ 

519 Converts the notebook into python. 

520 

521 @return string 

522 """ 

523 rows = [] 

524 for cell in self.iter_cells(): 

525 if cell.cell_type == "code": 

526 codei = NotebookRunner.get_cell_code(cell)[1] 

527 rows.append(codei) 

528 elif cell.cell_type in ("markdown", "raw"): 

529 content = cell.source 

530 lines = content.split("\n") 

531 for line in lines: 

532 if line.startswith("#"): 

533 rows.append("###") 

534 rows.append(line) 

535 else: 

536 rows.append("# " + line) 

537 else: 

538 # No text, no code. 

539 rows.append(f"# cell.type = {cell.cell_type}") 

540 rows.append("") 

541 return "\n".join(rows) 

542 

543 def iter_code_cells(self): 

544 ''' 

545 Iterates over the notebook cells containing code. 

546 ''' 

547 for cell in self.iter_cells(): 

548 if cell.cell_type == 'code': 

549 yield cell 

550 

551 def iter_cells(self): 

552 ''' 

553 Iterates over the notebook cells. 

554 ''' 

555 if hasattr(self.nb, "worksheets"): # pragma: no cover 

556 for ws in self.nb.worksheets: 

557 for cell in ws.cells: 

558 yield cell 

559 else: 

560 for cell in self.nb.cells: 

561 yield cell 

562 

563 def first_cell(self): 

564 """ 

565 Returns the first cell. 

566 """ 

567 for cell in self.iter_cells(): 

568 return cell 

569 

570 def _cell_container(self): 

571 """ 

572 Returns a cells container, it may change according to the format. 

573 

574 @return cell container 

575 """ 

576 if hasattr(self.nb, "worksheets"): # pragma: no cover 

577 last = None 

578 for ws in self.nb.worksheets: 

579 last = ws 

580 if last is None: 

581 raise NotebookError("no cell container") # pragma: no cover 

582 return last.cells 

583 return self.nb.cells 

584 

585 def __len__(self): 

586 """ 

587 Returns the number of cells, it iterates on cells 

588 to get this information and does cache the information. 

589 

590 @return int 

591 """ 

592 return sum(1 for _ in self.iter_cells()) 

593 

594 def cell_type(self, cell): 

595 """ 

596 Returns the cell type. 

597 

598 @param cell from @see me iter_cells 

599 @return type 

600 """ 

601 return cell.cell_type 

602 

603 def cell_metadata(self, cell): 

604 """ 

605 Returns the cell metadata. 

606 

607 @param cell cell 

608 @return metadata 

609 """ 

610 return cell.metadata 

611 

612 def _check_thumbnail_tuple(self, b): 

613 """ 

614 Checks types for a thumbnail. 

615 

616 @param b tuple image, format 

617 @return b 

618 

619 The function raises an exception if the type is incorrect. 

620 """ 

621 if not isinstance(b, tuple): 

622 raise TypeError( # pragma: no cover 

623 f"tuple expected, not {type(b)}") 

624 if len(b) != 2: 

625 raise TypeError( # pragma: no cover 

626 f"tuple expected of lengh 2, not {len(b)}") 

627 if b[1] == "svg": 

628 if not isinstance(b[0], str): 

629 raise TypeError( # pragma: no cover 

630 f"str expected for svg, not {type(b[0])}") 

631 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", 

632 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'): 

633 # Don't know how to extract a snippet out of this. 

634 pass 

635 else: 

636 if not isinstance(b[0], bytes): 

637 raise TypeError( # pragma: no cover 

638 f"bytes expected for images, not {type(b[0])}-'{b[1]}'\n{b}") 

639 return b 

640 

641 def create_picture_from(self, text, format, asbytes=True, context=None): 

642 """ 

643 Creates a picture from text. 

644 

645 @param text the text 

646 @param format text, json, ... 

647 @param context (str) indication on the content of text (error, ...) 

648 @param asbytes results as bytes or as an image 

649 @return tuple (picture, format) or PIL.Image (if asbytes is False) 

650 

651 The picture will be bytes, the format png, bmp... 

652 The size of the picture will depend on the text. 

653 The longer, the bigger. The method relies on matplotlib 

654 and then convert the image into a PIL image. 

655 

656 HTML could be rendered with QWebPage from PyQt (not implemented). 

657 """ 

658 if not isinstance(text, (str, bytes)): 

659 text = str(text) 

660 if "\n" not in text: 

661 rows = [] 

662 for i in range(0, len(text), 20): 

663 end = min(i + 20, len(text)) 

664 rows.append(text[i:end]) 

665 text = "\n".join(text) 

666 if len(text) > 200: 

667 text = text[:200] 

668 size = len(text) // 10 

669 figsize = (3 + size, 3 + size) 

670 lines = text.replace("\t", " ").replace("\r", "").split("\n") 

671 

672 import matplotlib.pyplot as plt 

673 from matplotlib.textpath import TextPath 

674 from matplotlib.font_manager import FontProperties 

675 fig = plt.figure(figsize=figsize) 

676 ax = fig.add_subplot(111) 

677 fp = FontProperties(size=200) 

678 

679 dx = 0 

680 dy = 0 

681 for i, line in enumerate(lines): 

682 if len(line.strip()) > 0: 

683 ax.text(0, -dy, line, fontproperties=fp, va='top') 

684 tp = TextPath((0, -dy), line, prop=fp) 

685 bb = tp.get_extents() 

686 dy += bb.height 

687 dx = max(dx, bb.width) 

688 

689 ratio = abs(dx) * 1. / max(abs(dy), 1) 

690 ratio = max(min(ratio, 3), 1) 

691 fig.set_size_inches(int((1 + size) * ratio), 1 + size) 

692 try: 

693 ax.set_xlim(numpy.array([0., dx])) 

694 ax.set_ylim(numpy.array([-dy, 0.])) 

695 except TypeError as e: 

696 warnings.warn(f"[create_picture_from] {e}") 

697 ax.set_axis_off() 

698 sio = BytesIO() 

699 fig.savefig(sio, format="png") 

700 plt.close() 

701 

702 if asbytes: 

703 b = sio.getvalue(), "png" 

704 self._check_thumbnail_tuple(b) 

705 return b 

706 try: 

707 from PIL import Image 

708 except ImportError: # pragma: no cover 

709 import Image 

710 img = Image.open(sio) 

711 return img 

712 

713 def cell_image(self, cell, image_from_text=False): 

714 """ 

715 Returns the cell image or None if not found. 

716 

717 @param cell cell to examine 

718 @param image_from_text produce an image even if it is not one 

719 @return None for no image or a list of tuple (image as bytes, extension) 

720 for each output of the cell 

721 """ 

722 kind = self.cell_type(cell) 

723 if kind != "code": 

724 return None 

725 results = [] 

726 for output in cell.outputs: 

727 if output["output_type"] in {"execute_result", "display_data"}: 

728 data = output["data"] 

729 for k, v in data.items(): 

730 if k == "text/plain": 

731 if image_from_text: 

732 b = self.create_picture_from( 

733 v, "text", context=output["output_type"]) 

734 results.append(b) 

735 elif k == "application/javascript": 

736 if image_from_text: 

737 b = self.create_picture_from(v, "js") 

738 results.append(b) 

739 elif k == "application/json": # pragma: no cover 

740 if image_from_text: 

741 b = self.create_picture_from(v, "json") 

742 results.append(b) 

743 elif k == "image/svg+xml": 

744 if not isinstance(v, str): 

745 raise TypeError( # pragma: no cover 

746 f"This should be str not '{type(v)}' (=SVG).") 

747 results.append((v, "svg")) 

748 elif k == "text/html": 

749 if image_from_text: 

750 b = self.create_picture_from(v, "html") 

751 results.append(b) 

752 elif k == "text/latex": 

753 if image_from_text: # pragma: no cover 

754 b = self.create_picture_from(v, "latex") 

755 results.append(b) 

756 elif k == "application/vnd.jupyter.widget-view+json": 

757 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html 

758 if "model_id" not in v: 

759 raise KeyError( # pragma: no cover 

760 f"model_id is missing from {v}") 

761 model_id = v["model_id"] 

762 self.fLOG( 

763 "[application/vnd.jupyter.widget-view+json] not rendered", model_id) 

764 elif k == "application/vnd.jupyter.widget-state+json": 

765 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html 

766 if "model_id" not in v: 

767 raise KeyError( # pragma: no cover 

768 f"model_id is missing from {v}") 

769 model_id = v["model_id"] 

770 self.fLOG( 

771 "[application/vnd.jupyter.widget-state+json] not rendered", model_id) 

772 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: 

773 if not isinstance(v, bytes): 

774 v = base64.b64decode(v) 

775 if not isinstance(v, bytes): 

776 raise TypeError( # pragma: no cover 

777 f"This should be bytes not '{type(v)}' (=IMG:{k}).") 

778 results.append((v, k.split("/")[-1])) 

779 elif k in ("text/vnd.plotly.v1+html", 

780 "application/vnd.plotly.v1+json", 

781 "application/vnd.bokehjs_exec.v0+json", 

782 "application/vnd.bokehjs_load.v0+json"): 

783 results.append((v, k.split("/")[-1])) 

784 else: 

785 raise NotImplementedError( # pragma: no cover 

786 f"cell type: {kind}\nk={k}\nv={v}\nCELL:\n{cell}") 

787 elif output["output_type"] == "error": 

788 vl = output["traceback"] 

789 if image_from_text: 

790 for v in vl: 

791 b = self.create_picture_from( 

792 v, "text", context="error") 

793 results.append(b) 

794 elif output["output_type"] == "stream": 

795 v = output["text"] 

796 if image_from_text: 

797 b = self.create_picture_from(v, "text") 

798 results.append(b) 

799 else: 

800 raise NotImplementedError( # pragma: no cover 

801 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" 

802 "".format(kind, output["output_type"], output, cell)) 

803 if len(results) > 0: 

804 res = self._merge_images(results) 

805 if res[0] is None: 

806 return None 

807 self._check_thumbnail_tuple(res) 

808 return res 

809 return None 

810 

811 def cell_height(self, cell): 

812 """ 

813 Approximates the height of a cell by its number of lines it contains. 

814 

815 @param cell cell 

816 @return number of cell 

817 """ 

818 kind = self.cell_type(cell) 

819 if kind == "markdown": 

820 content = cell.source 

821 lines = content.split("\n") 

822 nbs = sum(1 + len(line) // 80 for line in lines) 

823 return nbs 

824 if kind == "raw": 

825 content = cell.source 

826 lines = content.split("\n") 

827 nbs = sum(1 + len(line) // 80 for line in lines) 

828 return nbs 

829 if kind == "code": 

830 content = cell.source 

831 lines = content.split("\n") 

832 nbl = len(lines) 

833 

834 for output in cell.outputs: 

835 if output["output_type"] == "execute_result" or \ 

836 output["output_type"] == "display_data": 

837 data = output["data"] 

838 for k, v in data.items(): 

839 if k == "text/plain": 

840 nbl += len(v.split("\n")) 

841 elif k == "application/javascript": 

842 # rough estimation 

843 nbl += len(v.split("\n")) // 2 

844 elif k == "application/json": 

845 # rough estimation 

846 try: 

847 nbl += len(v.split("{")) 

848 except AttributeError: # pragma: no cover 

849 nbl += len(v) // 5 + 1 

850 elif k == "image/svg+xml": 

851 nbl += len(v) // 5 

852 elif k == "text/html": 

853 nbl += len(v.split("\n")) 

854 elif k == "text/latex": 

855 nbl += len(v.split("\\\\")) * 2 

856 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: 

857 nbl += len(v) // 50 

858 elif k == "application/vnd.jupyter.widget-view+json": 

859 nbl += 5 

860 elif k == "application/vnd.jupyter.widget-state+json": 

861 nbl += 5 

862 elif k in ("text/vnd.plotly.v1+html", 

863 "application/vnd.plotly.v1+json", 

864 "application/vnd.bokehjs_load.v0+json", 

865 "application/vnd.bokehjs_exec.v0+json"): 

866 nbl += 10 

867 else: 

868 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}" 

869 raise NotImplementedError( 

870 fmt.format(kind, k, v, cell)) 

871 elif output["output_type"] == "stream": 

872 v = output["text"] 

873 nbl += len(v.split("\n")) 

874 elif output["output_type"] == "error": 

875 v = output["traceback"] 

876 nbl += len(v) 

877 else: 

878 raise NotImplementedError( # pragma: no cover 

879 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" 

880 .format(kind, output["output_type"], output, cell)) 

881 

882 return nbl 

883 

884 raise NotImplementedError( # pragma: no cover 

885 f"cell type: {kind}\nCELL:\n{cell}") 

886 

887 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25): 

888 """ 

889 Tries to add tags for a slide show when they are too few. 

890 

891 @param max_nb_cell maximum number of cells within a slide 

892 @param max_nb_line maximum number of lines within a slide 

893 @return list of modified cells { #slide: (kind, reason, cell) } 

894 """ 

895 res = {} 

896 nbline = 0 

897 nbcell = 0 

898 for i, cell in enumerate(self.iter_cells()): 

899 meta = cell.metadata 

900 if "slideshow" in meta: 

901 st = meta["slideshow"]["slide_type"] 

902 if st in ["slide", "subslide"]: 

903 nbline = 0 

904 nbcell = 0 

905 else: 

906 if cell.cell_type == "markdown": 

907 content = cell.source 

908 if content.startswith("# ") or \ 

909 content.startswith("## ") or \ 

910 content.startswith("### "): 

911 meta["slideshow"] = {'slide_type': 'slide'} 

912 nbline = 0 

913 nbcell = 0 

914 res[i] = ("slide", "section", cell) 

915 

916 dh = self.cell_height(cell) 

917 dc = 1 

918 new_nbline = nbline + dh 

919 new_cell = dc + nbcell 

920 if "slideshow" not in meta: 

921 if new_cell > max_nb_cell or \ 

922 new_nbline > max_nb_line: 

923 res[i] = ( 

924 "subslide", f"{nbcell}-{nbline} <-> {dc}-{dh}", cell) 

925 nbline = 0 

926 nbcell = 0 

927 meta["slideshow"] = {'slide_type': 'subslide'} 

928 

929 nbline += dh 

930 nbcell += dc 

931 

932 return res 

933 

934 def run_notebook(self, skip_exceptions=False, progress_callback=None, 

935 additional_path=None, valid=None, clean_function=None, 

936 context=None): 

937 ''' 

938 Runs all the cells of a notebook in order and update 

939 the outputs in-place. 

940 

941 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the 

942 subsequent cells are run (by default, the notebook execution stops). 

943 

944 @param skip_exceptions skip exception 

945 @param progress_callback call back function 

946 @param additional_path additional paths (as a list or None if none) 

947 @param valid if not None, valid is a function which returns whether 

948 or not the cell should be executed or not, if the function 

949 returns None, the execution of the notebooks and skip 

950 the execution of the other cells 

951 @param clean_function function which cleans a cell's code before executing 

952 it (None for None) 

953 @return dictionary with statistics 

954 

955 The function adds the local variable ``theNotebook`` with 

956 the absolute file name of the notebook. 

957 Function *valid* can return *None* to stop the execution of the notebook 

958 before this cell. 

959 ''' 

960 if self.detailed_log: 

961 self.detailed_log( 

962 f"[run_notebook] Starting execution of '{self._filename}'") 

963 # additional path 

964 if additional_path is not None: 

965 if not isinstance(additional_path, list): 

966 raise TypeError( # pragma: no cover 

967 "Additional_path should be a list not: " + str(additional_path)) 

968 code = ["import sys"] 

969 for p in additional_path: 

970 code.append(f"sys.path.append(r'{p}')") 

971 cell = "\n".join(code) 

972 self.run_cell(-1, cell) 

973 

974 # we add local variable theNotebook 

975 if self.theNotebook is not None: 

976 cell = f"theNotebook = r'''{self.theNotebook}'''" 

977 self.run_cell(-1, cell) 

978 

979 # initialisation with a code not inside the notebook 

980 if self.code_init is not None: 

981 self.run_cell(-1, self.code_init) 

982 

983 # execution of the notebook 

984 nbcell = 0 

985 nbrun = 0 

986 nbnerr = 0 

987 cl = time.perf_counter() 

988 for i, cell in enumerate(self.iter_code_cells()): 

989 nbcell += 1 

990 codei = NotebookRunner.get_cell_code(cell)[1] 

991 if valid is not None: 

992 r = valid(codei) 

993 if r is None: 

994 break 

995 if not r: 

996 continue 

997 try: 

998 nbrun += 1 

999 self.run_cell(i, cell, clean_function=clean_function) 

1000 nbnerr += 1 

1001 except Empty as er: # pragma: no cover 

1002 raise RuntimeError( 

1003 f"{self.comment}\nissue when executing:\n{codei}") from er 

1004 except NotebookError as e: # pragma: no cover 

1005 if not skip_exceptions: 

1006 raise 

1007 raise RuntimeError( 

1008 f"Issue when executing:\n{codei}") from e 

1009 if progress_callback: 

1010 progress_callback(i) 

1011 etime = time.perf_counter() - cl 

1012 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime) 

1013 if self.detailed_log: 

1014 self.detailed_log( 

1015 f"[run_notebook] end execution of '{self._filename}'") 

1016 self.detailed_log( 

1017 f"[run_notebook] execution time: {etime}") 

1018 self.detailed_log(f"[run_notebook] statistics : {res}") 

1019 return res 

1020 

1021 def count_code_cells(self): 

1022 ''' 

1023 Returns the number of code cells in the notebook. 

1024 ''' 

1025 return sum(1 for _ in self.iter_code_cells()) 

1026 

1027 def merge_notebook(self, nb): 

1028 """ 

1029 Appends notebook *nb* to this one. 

1030 

1031 @param nb notebook or list of notebook (@see cl NotebookRunner) 

1032 @return number of added cells 

1033 

1034 .. faqref:: 

1035 :title: How to merge notebook? 

1036 

1037 The following code merges two notebooks into the first one 

1038 and stores the result unto a file. 

1039 

1040 :: 

1041 

1042 from pyquickhelper.ipythonhelper import read_nb 

1043 nb1 = read_nb("<file1>", kernel=False) 

1044 nb2 = read_nb("<file2>", kernel=False) 

1045 nb1.merge_notebook(nb2) 

1046 nb1.to_json(outfile) 

1047 """ 

1048 if isinstance(nb, list): 

1049 s = 0 

1050 for n in nb: 

1051 s += self.merge_notebook(n) 

1052 return s 

1053 else: 

1054 last = self._cell_container() 

1055 s = 0 

1056 for cell in nb.iter_cells(): 

1057 last.append(cell) 

1058 s += 1 

1059 return s 

1060 

1061 def get_description(self): 

1062 """ 

1063 Gets summary and description of this notebook. 

1064 We expect the first cell to contain a title and a description 

1065 of its content. 

1066 

1067 @return header, description 

1068 """ 

1069 def split_header(s, get_header=True): 

1070 s = s.lstrip().rstrip() 

1071 parts = s.splitlines() 

1072 if parts[0].startswith('#'): 

1073 if get_header: 

1074 header = re.sub('#+\\s*', '', parts.pop(0)) 

1075 if not parts: 

1076 return header, '' 

1077 else: 

1078 header = '' 

1079 rest = '\n'.join(parts).lstrip().split('\n\n') 

1080 desc = rest[0].replace('\n', ' ') 

1081 return header, desc 

1082 

1083 if get_header: 

1084 if parts[0].startswith(('=', '-')): 

1085 parts = parts[1:] 

1086 header = parts.pop(0) 

1087 if parts and parts[0].startswith(('=', '-')): 

1088 parts.pop(0) 

1089 if not parts: 

1090 return header, '' 

1091 else: 

1092 header = '' 

1093 rest = '\n'.join(parts).lstrip().split('\n\n') 

1094 desc = rest[0].replace('\n', ' ') 

1095 return header, desc 

1096 

1097 first_cell = self.first_cell() 

1098 

1099 if not first_cell['cell_type'] == 'markdown': 

1100 raise ValueError( # pragma: no cover 

1101 "The first cell is not in markdown but '{0}' filename='{1}'.".format( 

1102 first_cell['cell_type'], self._filename)) 

1103 

1104 header, desc = split_header(first_cell['source']) 

1105 if not desc and len(self.nb['cells']) > 1: 

1106 second_cell = self.nb['cells'][1] 

1107 if second_cell['cell_type'] == 'markdown': 

1108 _, desc = split_header(second_cell['source'], False) 

1109 

1110 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))" 

1111 reg = re.compile(reg_link) 

1112 new_desc = reg.sub("\\2", desc) 

1113 if "http://" in new_desc or "https://" in new_desc: 

1114 raise ValueError( # pragma: no cover 

1115 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format( 

1116 desc, new_desc, self._filename)) 

1117 return header, new_desc.replace('"', "") 

1118 

1119 def get_thumbnail(self, max_width=200, max_height=200, use_default=False): 

1120 """ 

1121 Processes the notebook and creates one picture based on the outputs 

1122 to illustrate a notebook. 

1123 

1124 @param max_width maximum size of the thumbnail 

1125 @param max_height maximum size of the thumbnail 

1126 @param use_default force using a default image even if an even is present 

1127 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`) 

1128 """ 

1129 images = [] 

1130 cells = list(self.iter_cells()) 

1131 cells.reverse() 

1132 for cell in cells: 

1133 c = self.cell_image(cell, False) 

1134 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in ( 

1135 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", 

1136 "vnd.bokehjs_load.v0+json"): 

1137 self._check_thumbnail_tuple(c) 

1138 images.append(c) 

1139 if not use_default and len(images) == 0: 

1140 for cell in cells: 

1141 c = self.cell_image(cell, True) 

1142 if c is not None and len(c) > 0 and len(c[0]) > 0: 

1143 self._check_thumbnail_tuple(c) 

1144 images.append(c) 

1145 if len(c[0]) >= 1000: 

1146 break 

1147 if use_default: 

1148 images = [] 

1149 if len(images) == 0: 

1150 # no image, we need to consider the default one 

1151 no_image = os.path.join( 

1152 os.path.dirname(__file__), 'no_image_nb.png') 

1153 with open(no_image, "rb") as f: 

1154 c = (f.read(), "png") 

1155 self._check_thumbnail_tuple(c) 

1156 images.append(c) 

1157 

1158 # select the image 

1159 if len(images) == 0: 

1160 raise ValueError( # pragma: no cover 

1161 "There should be at least one image.") 

1162 if len(images) == 1: 

1163 image = images[0] 

1164 else: 

1165 # maybe later we'll implement a different logic 

1166 # we pick the last one 

1167 image = images[0] 

1168 

1169 # zoom 

1170 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"): 

1171 return None 

1172 if image[1] == 'svg': 

1173 try: 

1174 img = svg2img(image[0]) 

1175 except PYQImageException: # pragma: no cover 

1176 # Enable to convert SVG. 

1177 return None 

1178 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height) 

1179 img = self._scale_image( 

1180 image[0], image[1], max_width=max_width, max_height=max_height) 

1181 return img 

1182 

1183 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200): 

1184 """ 

1185 Scales an image with the same aspect ratio centered in an 

1186 image with a given max_width and max_height. 

1187 

1188 @param in_bytes image as bytes 

1189 @param format indication of the format (can be empty) 

1190 @param max_width maximum size of the thumbnail 

1191 @param max_height maximum size of the thumbnail 

1192 @return Image (PIL) 

1193 """ 

1194 # local import to avoid testing dependency on PIL: 

1195 try: 

1196 from PIL import Image 

1197 except ImportError: # pragma: no cover 

1198 import Image 

1199 

1200 if isinstance(in_bytes, tuple): 

1201 in_bytes = in_bytes[0] 

1202 if isinstance(in_bytes, bytes): 

1203 img = Image.open(BytesIO(in_bytes)) 

1204 elif isinstance(in_bytes, Image.Image): 

1205 img = in_bytes 

1206 else: 

1207 raise TypeError( # pragma: no cover 

1208 f"bytes expected, not {type(in_bytes)} - format={format}") 

1209 width_in, height_in = img.size 

1210 scale_w = max_width / float(width_in) 

1211 scale_h = max_height / float(height_in) 

1212 

1213 if height_in * scale_w <= max_height: 

1214 scale = scale_w 

1215 else: 

1216 scale = scale_h 

1217 

1218 if scale >= 1.0: 

1219 return img 

1220 

1221 width_sc = int(round(scale * width_in)) 

1222 height_sc = int(round(scale * height_in)) 

1223 

1224 # resize the image and center 

1225 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS) 

1226 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255)) 

1227 pos_insert = ((max_width - width_sc) // 2, 

1228 (max_height - height_sc) // 2) 

1229 thumb.paste(img, pos_insert) 

1230 return thumb 

1231 

1232 def _merge_images(self, results): 

1233 """ 

1234 Merges images defined by (buffer, format). 

1235 The method uses PIL to merge images when possible. 

1236 

1237 @return ``[ (image, format) ]`` 

1238 """ 

1239 if len(results) == 1: 

1240 results = results[0] 

1241 self._check_thumbnail_tuple(results) 

1242 return results 

1243 if len(results) == 0: 

1244 return None 

1245 

1246 formats_counts = Counter(_[1] for _ in results) 

1247 if len(formats_counts) == 1: 

1248 format = results[0][1] 

1249 else: 

1250 items = sorted(((v, k) 

1251 for k, v in formats_counts.items()), reverse=False) 

1252 for it in items: 

1253 format = it 

1254 break 

1255 

1256 results = [_ for _ in results if _[1] == format] 

1257 if format == "svg": 

1258 return ("\n".join(_[0] for _ in results), format) 

1259 

1260 # local import to avoid testing dependency on PIL: 

1261 try: 

1262 from PIL import Image 

1263 except ImportError: # pragma: no cover 

1264 import Image 

1265 

1266 dx = 0. 

1267 dy = 0. 

1268 over = 0.7 

1269 imgs = [] 

1270 for in_bytes, _ in results: 

1271 img = Image.open(BytesIO(in_bytes)) 

1272 imgs.append(img) 

1273 dx = max(dx, img.size[0]) 

1274 dy += img.size[1] * over 

1275 

1276 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220)) 

1277 for img in imgs: 

1278 dy -= img.size[1] * over 

1279 new_im.paste(img, (0, max(int(dy), 0))) 

1280 

1281 if max(dx, dy) > 0: 

1282 image_buffer = BytesIO() 

1283 new_im.save(image_buffer, "PNG") 

1284 b = image_buffer.getvalue(), "png" 

1285 return b 

1286 b = None, "png" 

1287 return b