Coverage for pyquickhelper/ipythonhelper/notebook_runner.py: 88%

650 statements  

« prev     ^ index     » next       coverage.py v6.4.3, created at 2022-08-13 03:05 +0200

1""" 

2@file 

3@brief Modified version of `runipy.notebook_runner 

4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_. 

5""" 

6 

7import base64 

8import os 

9import re 

10import time 

11import platform 

12import warnings 

13from queue import Empty 

14from time import sleep 

15from collections import Counter 

16from io import StringIO, BytesIO 

17import numpy 

18from nbformat import NotebookNode, writes 

19from nbformat.reader import reads 

20from ..imghelper.svg_helper import svg2img, PYQImageException 

21from ..loghelper.flog import noLOG 

22 

23 

24class NotebookError(Exception): 

25 """ 

26 Raised when the execution fails. 

27 """ 

28 pass 

29 

30 

31class NotebookKernelError(Exception): 

32 """ 

33 Raised when 

34 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/ 

35 jupyter_client/blocking/client.py#L84>`_ fails. 

36 """ 

37 pass 

38 

39 

40class NotebookRunner(object): 

41 

42 """ 

43 The kernel communicates with mime-types while the notebook 

44 uses short labels for different cell types. We'll use this to 

45 map from kernel types to notebook format types. 

46 

47 This classes executes a notebook end to end. 

48 

49 .. index:: kernel, notebook 

50 

51 The class can use different kernels. The next links gives more 

52 information on how to create or test a kernel: 

53 

54 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_ 

55 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_ 

56 

57 .. faqref:: 

58 :title: Do I need to shutdown the kernel after running a notebook? 

59 

60 .. index:: travis 

61 

62 If the class is instantiated with *kernel=True*, a kernel will 

63 be started. It must be shutdown otherwise the program might 

64 be waiting for it for ever. That is one of the reasons why the 

65 travis build does not complete. The build finished but cannot terminate 

66 until all kernels are shutdown. 

67 """ 

68 

69 # . available output types 

70 MIME_MAP = { 

71 'image/jpeg': 'jpeg', 

72 'image/png': 'png', 

73 'image/gif': 'gif', 

74 'text/plain': 'text', 

75 'text/html': 'html', 

76 'text/latex': 'latex', 

77 'application/javascript': 'html', 

78 'image/svg+xml': 'svg', 

79 } 

80 

81 def __init__(self, nb, profile_dir=None, working_dir=None, 

82 comment="", fLOG=noLOG, theNotebook=None, code_init=None, 

83 kernel_name="python", log_level="30", extended_args=None, 

84 kernel=False, filename=None, replacements=None, detailed_log=None, 

85 startup_timeout=300): 

86 """ 

87 @param nb notebook as :epkg:`JSON` 

88 @param profile_dir profile directory 

89 @param working_dir working directory 

90 @param comment additional information added to error message 

91 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook 

92 @param code_init to initialize the notebook with a python code as if it was a cell 

93 @param fLOG logging function 

94 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL') 

95 @param kernel_name kernel name, it can be None 

96 @param extended_args others arguments to pass to the command line 

97 (`--KernelManager.autorestar=True` for example), 

98 see :ref:`l-ipython_notebook_args` for a full list 

99 @param kernel *kernel* is True by default, the notebook can be run, if False, 

100 the notebook can be read but not run 

101 @param filename to add the notebook file if there is one in error messages 

102 @param replacements replacements to make in every cell before running it, 

103 dictionary ``{ string: string }`` 

104 @param detailed_log to log detailed information when executing the notebook, this should be a function 

105 with the same signature as ``print`` or None 

106 @param startup_timeout wait for this long for the kernel to be ready, 

107 see `wait_for_ready 

108 <https://github.com/jupyter/jupyter_client/blob/master/ 

109 jupyter_client/blocking/client.py#L84>`_ 

110 """ 

111 if kernel: 

112 try: 

113 from jupyter_client import KernelManager 

114 except ImportError: # pragma: no cover 

115 from ipykernel import KernelManager 

116 

117 with warnings.catch_warnings(): 

118 warnings.filterwarnings("ignore", category=DeprecationWarning) 

119 self.km = KernelManager( 

120 kernel_name=kernel_name) if kernel_name is not None else KernelManager() 

121 else: 

122 self.km = None 

123 self.detailed_log = detailed_log 

124 self.fLOG = fLOG 

125 self.theNotebook = theNotebook 

126 self.code_init = code_init 

127 self._filename = filename if filename is not None else "memory" 

128 self.replacements = replacements 

129 self.init_args = dict( 

130 profile_dir=profile_dir, working_dir=working_dir, 

131 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init, 

132 kernel_name="python", log_level="30", extended_args=None, 

133 kernel=kernel, filename=filename, replacements=replacements) 

134 args = [] 

135 

136 if profile_dir: 

137 args.append(f'--profile-dir={os.path.abspath(profile_dir)}') 

138 if log_level: 

139 args.append(f'--log-level={log_level}') 

140 

141 if extended_args is not None and len(extended_args) > 0: 

142 for opt in extended_args: 

143 if not opt.startswith("--"): 

144 raise SyntaxError( # pragma: no cover 

145 "every option should start with '--': " + opt) 

146 if "=" not in opt: 

147 raise SyntaxError( # pragma: no cover 

148 "every option should be assigned a value: " + opt) 

149 args.append(opt) 

150 

151 if kernel: 

152 cwd = os.getcwd() 

153 

154 if working_dir: 

155 os.chdir(working_dir) 

156 

157 if self.km is not None: 

158 try: 

159 with warnings.catch_warnings(): 

160 warnings.filterwarnings( 

161 "ignore", category=ResourceWarning) 

162 self.km.start_kernel(extra_arguments=args) 

163 except Exception as e: # pragma: no cover 

164 raise NotebookKernelError( 

165 f"Failure with args: {args}\nand error:\n{str(e)}") from e 

166 

167 if platform.system() == 'Darwin': 

168 # see http://www.pypedia.com/index.php/notebook_runner 

169 # There is sometimes a race condition where the first 

170 # execute command hits the kernel before it's ready. 

171 # It appears to happen only on Darwin (Mac OS) and an 

172 # easy (but clumsy) way to mitigate it is to sleep 

173 # for a second. 

174 sleep(1) # pragma: no cover 

175 

176 if working_dir: 

177 os.chdir(cwd) 

178 

179 self.kc = self.km.client() 

180 self.kc.start_channels(stdin=False) 

181 try: 

182 self.kc.wait_for_ready(timeout=startup_timeout) 

183 except RuntimeError as e: # pragma: no cover 

184 # We wait for one second. 

185 sleep(startup_timeout) 

186 self.kc.stop_channels() 

187 self.km.shutdown_kernel() 

188 self.km = None 

189 self.kc = None 

190 self.nb = nb 

191 self.comment = comment 

192 raise NotebookKernelError( 

193 f"Wait_for_ready fails (timeout={startup_timeout}).") from e 

194 else: 

195 self.km = None 

196 self.kc = None 

197 self.nb = nb 

198 self.comment = comment 

199 

200 def __del__(self): 

201 """ 

202 We close the kernel. 

203 """ 

204 if self.km is not None: 

205 del self.km 

206 if self.kc is not None: 

207 del self.kc 

208 

209 def to_json(self, filename=None, encoding="utf8"): 

210 """ 

211 Converts the notebook into :epkg:`JSON`. 

212 

213 @param filename filename or stream 

214 @param encoding encoding 

215 @return Json string if filename is None, None otherwise 

216 """ 

217 if isinstance(filename, str): 

218 with open(filename, "w", encoding=encoding) as payload: 

219 self.to_json(payload) 

220 return None 

221 

222 if filename is None: 

223 st = StringIO() 

224 st.write(writes(self.nb)) 

225 return st.getvalue() 

226 

227 filename.write(writes(self.nb)) 

228 return None 

229 

230 def copy(self): 

231 """ 

232 Copies the notebook (just the content). 

233 

234 @return instance of @see cl NotebookRunner 

235 """ 

236 st = StringIO() 

237 self.to_json(st) 

238 args = self.init_args.copy() 

239 for name in ["theNotebook", "filename"]: 

240 if name in args: 

241 del args[name] 

242 nb = reads(st.getvalue()) 

243 return NotebookRunner(nb, **args) 

244 

245 def __add__(self, nb): 

246 """ 

247 Merges two notebooks together, returns a new none. 

248 

249 @param nb notebook 

250 @return new notebook 

251 """ 

252 c = self.copy() 

253 c.merge_notebook(nb) 

254 return c 

255 

256 def shutdown_kernel(self): 

257 """ 

258 Shuts down kernel. 

259 """ 

260 self.fLOG('-- shutdown kernel') 

261 if self.kc is None: 

262 raise ValueError( # pragma: no cover 

263 "No kernel was started, specify kernel=True when initializing the instance.") 

264 self.kc.stop_channels() 

265 self.km.shutdown_kernel(now=True) 

266 

267 def clean_code(self, code): 

268 """ 

269 Cleans the code before running it, the function comment out 

270 instruction such as ``show()``. 

271 

272 @param code code (string) 

273 @return cleaned code 

274 """ 

275 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code 

276 if code is None: 

277 return code 

278 

279 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")] 

280 res = [] 

281 show_is_last = False 

282 for line in lines: 

283 if line.replace(" ", "") == "show()": 

284 line = line.replace("show", "#show") 

285 show_is_last = True 

286 elif has_bokeh and line.replace(" ", "") == "output_notebook()": 

287 line = line.replace("output_notebook", "#output_notebook") 

288 else: 

289 show_is_last = False 

290 if self.replacements is not None: 

291 for k, v in self.replacements.items(): 

292 line = line.replace(k, v) 

293 res.append(line) 

294 if show_is_last: 

295 res.append('"nothing to show"') 

296 return "\n".join(res) 

297 

298 @staticmethod 

299 def get_cell_code(cell): 

300 """ 

301 Returns the code of a cell. 

302 

303 @param cell a cell or a string 

304 @return boolean (=iscell), string 

305 """ 

306 if isinstance(cell, str): 

307 iscell = False 

308 return iscell, cell 

309 

310 iscell = True 

311 try: 

312 return iscell, cell.source 

313 except AttributeError: # pragma: no cover 

314 return iscell, cell.input 

315 

316 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15): 

317 ''' 

318 Runs a notebook cell and update the output of that cell inplace. 

319 

320 :param index_cell: index of the cell 

321 :param cell: cell to execute 

322 :param clean_function: cleaning function to apply to the code before running it 

323 :param max_nbissue: number of times an issue can be raised before stopping 

324 :return: output of the cell 

325 ''' 

326 if self.detailed_log: 

327 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format( 

328 index_cell, clean_function)) 

329 iscell, codei = NotebookRunner.get_cell_code(cell) 

330 

331 self.fLOG(f'-- running cell:\n{codei}\n') 

332 if self.detailed_log: 

333 self.detailed_log( 

334 '[run_cell] code=\n {0}'.format( 

335 "\n ".join(codei.split("\n")))) 

336 

337 code = self.clean_code(codei) 

338 if clean_function is not None: 

339 code = clean_function(code) 

340 if self.detailed_log: 

341 self.detailed_log( 

342 ' cleaned code=\n {0}'.format( 

343 "\n ".join(code.split("\n")))) 

344 if len(code) == 0: 

345 return "" 

346 if self.kc is None: 

347 raise ValueError( # pragma: no cover 

348 "No kernel was started, specify kernel=True when initializing the instance.") 

349 self.kc.execute(code) 

350 

351 reply = self.kc.get_shell_msg() 

352 reason = None 

353 try: 

354 status = reply['content']['status'] 

355 except KeyError: # pragma: no cover 

356 status = 'error' 

357 reason = "no status key in reply['content']" 

358 

359 if status == 'error': # pragma: no cover 

360 ansi_escape = re.compile(r'\x1b[^m]*m') 

361 try: 

362 tr = [ansi_escape.sub('', _) 

363 for _ in reply['content']['traceback']] 

364 except KeyError: # pragma: no cover 

365 tr = (["No traceback, available keys in reply['content']"] + 

366 list(reply['content'])) 

367 traceback_text = '\n'.join(tr) 

368 self.fLOG("[nberror]\n", traceback_text) 

369 if self.detailed_log: 

370 self.detailed_log( # pragma: no cover 

371 '[run_cell] ERROR=\n {0}'.format( 

372 "\n ".join(traceback_text.split("\n")))) 

373 else: 

374 traceback_text = '' 

375 self.fLOG('-- cell returned') 

376 

377 outs = list() 

378 nbissue = 0 

379 statuses = [status] 

380 while True: 

381 try: 

382 msg = self.kc.get_iopub_msg(timeout=1) 

383 if msg['msg_type'] == 'status': 

384 if msg['content']['execution_state'] == 'idle': 

385 status = 'ok' 

386 statuses.append(status) 

387 break 

388 statuses.append(status) 

389 except Empty as e: # pragma: no cover 

390 # execution state should return to idle before 

391 # the queue becomes empty, 

392 # if it doesn't, something bad has happened 

393 status = "error" 

394 statuses.append(status) 

395 reason = f"exception Empty was raised ({e!r})" 

396 nbissue += 1 

397 if nbissue > max_nbissue: 

398 # the notebook is empty 

399 return "" 

400 else: 

401 continue 

402 

403 content = msg['content'] 

404 msg_type = msg['msg_type'] 

405 if self.detailed_log: 

406 self.detailed_log(f' msg_type={msg_type}') 

407 

408 out = NotebookNode(output_type=msg_type, metadata=dict()) 

409 

410 if 'execution_count' in content: 

411 if iscell: 

412 cell['execution_count'] = content['execution_count'] 

413 out.execution_count = content['execution_count'] 

414 

415 if msg_type in ('status', 'pyin', 'execute_input'): 

416 continue 

417 

418 if msg_type == 'stream': 

419 out.name = content['name'] 

420 # in msgspec 5, this is name, text 

421 # in msgspec 4, this is name, data 

422 if 'text' in content: 

423 out.text = content['text'] 

424 else: 

425 out.data = content['data'] 

426 

427 elif msg_type in ('display_data', 'pyout', 'execute_result'): 

428 out.data = content['data'] 

429 

430 elif msg_type in ('pyerr', 'error'): 

431 out.ename = content['ename'] 

432 out.evalue = content['evalue'] 

433 out.traceback = content['traceback'] 

434 out.name = 'stderr' 

435 

436 elif msg_type == 'clear_output': 

437 outs = list() 

438 continue 

439 

440 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'): 

441 # widgets in a notebook 

442 out.data = content["data"] 

443 out.comm_id = content["comm_id"] 

444 

445 else: 

446 dcontent = "\n".join(f"{k}={v}" 

447 for k, v in sorted(content.items())) 

448 raise NotImplementedError( # pragma: no cover 

449 f"Unhandled iopub message: '{msg_type}'\n--CONTENT--\n{dcontent}") 

450 

451 outs.append(out) 

452 if self.detailed_log: 

453 self.detailed_log(f' out={type(out)}') 

454 if hasattr(out, "data"): 

455 self.detailed_log(f' out={out.data}') 

456 

457 if iscell: 

458 cell['outputs'] = outs 

459 

460 raw = [] 

461 for _ in outs: 

462 try: 

463 t = _.data 

464 except AttributeError: 

465 continue 

466 

467 # see MIMEMAP to see the available output type 

468 for k, v in t.items(): 

469 if k.startswith("text"): 

470 raw.append(v) 

471 

472 sraw = "\n".join(raw) 

473 self.fLOG(sraw) 

474 if self.detailed_log: 

475 self.detailed_log(' sraw=\n {0}'.format( 

476 "\n ".join(sraw.split("\n")))) 

477 

478 def reply2string(reply): 

479 sreply = [] 

480 for k, v in sorted(reply.items()): 

481 if isinstance(v, dict): 

482 temp = [] 

483 for _, __ in sorted(v.items()): 

484 temp.append(f" [{_}]={str(__)}") 

485 v_ = "\n".join(temp) 

486 sreply.append(f"reply['{k}']=dict\n{v_}") 

487 else: 

488 sreply.append(f"reply['{k}']={str(v)}") 

489 sreply = "\n".join(sreply) 

490 return sreply 

491 

492 if status == 'error': # pragma: no cover 

493 sreply = reply2string(reply) 

494 if len(code) < 5: 

495 scode = [code] 

496 else: 

497 scode = "" 

498 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} " 

499 "length={5} -- {6}:\n-----------------\n" 

500 "content={12}\nmsg_type: {13} nbissue={14}" 

501 "\nstatuses={15}" 

502 "\n-----------------\n{0}" 

503 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}") 

504 raise NotebookError(mes.format( 

505 code, traceback_text, sraw, sreply, index_cell, # 0-4 

506 len(code), scode, self.comment, status, reason, # 5-9 

507 self._filename, index_cell, content, msg_type, nbissue, # 10-14 

508 statuses)) # 15 

509 if self.detailed_log: 

510 self.detailed_log(f'[run_cell] status={status}') 

511 return outs 

512 

513 def to_python(self): 

514 """ 

515 Converts the notebook into python. 

516 

517 @return string 

518 """ 

519 rows = [] 

520 for cell in self.iter_cells(): 

521 if cell.cell_type == "code": 

522 codei = NotebookRunner.get_cell_code(cell)[1] 

523 rows.append(codei) 

524 elif cell.cell_type in ("markdown", "raw"): 

525 content = cell.source 

526 lines = content.split("\n") 

527 for line in lines: 

528 if line.startswith("#"): 

529 rows.append("###") 

530 rows.append(line) 

531 else: 

532 rows.append("# " + line) 

533 else: 

534 # No text, no code. 

535 rows.append(f"# cell.type = {cell.cell_type}") 

536 rows.append("") 

537 return "\n".join(rows) 

538 

539 def iter_code_cells(self): 

540 ''' 

541 Iterates over the notebook cells containing code. 

542 ''' 

543 for cell in self.iter_cells(): 

544 if cell.cell_type == 'code': 

545 yield cell 

546 

547 def iter_cells(self): 

548 ''' 

549 Iterates over the notebook cells. 

550 ''' 

551 if hasattr(self.nb, "worksheets"): # pragma: no cover 

552 for ws in self.nb.worksheets: 

553 for cell in ws.cells: 

554 yield cell 

555 else: 

556 for cell in self.nb.cells: 

557 yield cell 

558 

559 def first_cell(self): 

560 """ 

561 Returns the first cell. 

562 """ 

563 for cell in self.iter_cells(): 

564 return cell 

565 

566 def _cell_container(self): 

567 """ 

568 Returns a cells container, it may change according to the format. 

569 

570 @return cell container 

571 """ 

572 if hasattr(self.nb, "worksheets"): # pragma: no cover 

573 last = None 

574 for ws in self.nb.worksheets: 

575 last = ws 

576 if last is None: 

577 raise NotebookError("no cell container") # pragma: no cover 

578 return last.cells 

579 return self.nb.cells 

580 

581 def __len__(self): 

582 """ 

583 Returns the number of cells, it iterates on cells 

584 to get this information and does cache the information. 

585 

586 @return int 

587 """ 

588 return sum(1 for _ in self.iter_cells()) 

589 

590 def cell_type(self, cell): 

591 """ 

592 Returns the cell type. 

593 

594 @param cell from @see me iter_cells 

595 @return type 

596 """ 

597 return cell.cell_type 

598 

599 def cell_metadata(self, cell): 

600 """ 

601 Returns the cell metadata. 

602 

603 @param cell cell 

604 @return metadata 

605 """ 

606 return cell.metadata 

607 

608 def _check_thumbnail_tuple(self, b): 

609 """ 

610 Checks types for a thumbnail. 

611 

612 @param b tuple image, format 

613 @return b 

614 

615 The function raises an exception if the type is incorrect. 

616 """ 

617 if not isinstance(b, tuple): 

618 raise TypeError( # pragma: no cover 

619 f"tuple expected, not {type(b)}") 

620 if len(b) != 2: 

621 raise TypeError( # pragma: no cover 

622 f"tuple expected of lengh 2, not {len(b)}") 

623 if b[1] == "svg": 

624 if not isinstance(b[0], str): 

625 raise TypeError( # pragma: no cover 

626 f"str expected for svg, not {type(b[0])}") 

627 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", 

628 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'): 

629 # Don't know how to extract a snippet out of this. 

630 pass 

631 else: 

632 if not isinstance(b[0], bytes): 

633 raise TypeError( # pragma: no cover 

634 f"bytes expected for images, not {type(b[0])}-'{b[1]}'\n{b}") 

635 return b 

636 

637 def create_picture_from(self, text, format, asbytes=True, context=None): 

638 """ 

639 Creates a picture from text. 

640 

641 @param text the text 

642 @param format text, json, ... 

643 @param context (str) indication on the content of text (error, ...) 

644 @param asbytes results as bytes or as an image 

645 @return tuple (picture, format) or PIL.Image (if asbytes is False) 

646 

647 The picture will be bytes, the format png, bmp... 

648 The size of the picture will depend on the text. 

649 The longer, the bigger. The method relies on matplotlib 

650 and then convert the image into a PIL image. 

651 

652 HTML could be rendered with QWebPage from PyQt (not implemented). 

653 """ 

654 if not isinstance(text, (str, bytes)): 

655 text = str(text) 

656 if "\n" not in text: 

657 rows = [] 

658 for i in range(0, len(text), 20): 

659 end = min(i + 20, len(text)) 

660 rows.append(text[i:end]) 

661 text = "\n".join(text) 

662 if len(text) > 200: 

663 text = text[:200] 

664 size = len(text) // 10 

665 figsize = (3 + size, 3 + size) 

666 lines = text.replace("\t", " ").replace("\r", "").split("\n") 

667 

668 import matplotlib.pyplot as plt 

669 from matplotlib.textpath import TextPath 

670 from matplotlib.font_manager import FontProperties 

671 fig = plt.figure(figsize=figsize) 

672 ax = fig.add_subplot(111) 

673 fp = FontProperties(size=200) 

674 

675 dx = 0 

676 dy = 0 

677 for i, line in enumerate(lines): 

678 if len(line.strip()) > 0: 

679 ax.text(0, -dy, line, fontproperties=fp, va='top') 

680 tp = TextPath((0, -dy), line, prop=fp) 

681 bb = tp.get_extents() 

682 dy += bb.height 

683 dx = max(dx, bb.width) 

684 

685 ratio = abs(dx) * 1. / max(abs(dy), 1) 

686 ratio = max(min(ratio, 3), 1) 

687 fig.set_size_inches(int((1 + size) * ratio), 1 + size) 

688 try: 

689 ax.set_xlim(numpy.array([0., dx])) 

690 ax.set_ylim(numpy.array([-dy, 0.])) 

691 except TypeError as e: 

692 warnings.warn(f"[create_picture_from] {e}") 

693 ax.set_axis_off() 

694 sio = BytesIO() 

695 fig.savefig(sio, format="png") 

696 plt.close() 

697 

698 if asbytes: 

699 b = sio.getvalue(), "png" 

700 self._check_thumbnail_tuple(b) 

701 return b 

702 try: 

703 from PIL import Image 

704 except ImportError: # pragma: no cover 

705 import Image 

706 img = Image.open(sio) 

707 return img 

708 

709 def cell_image(self, cell, image_from_text=False): 

710 """ 

711 Returns the cell image or None if not found. 

712 

713 @param cell cell to examine 

714 @param image_from_text produce an image even if it is not one 

715 @return None for no image or a list of tuple (image as bytes, extension) 

716 for each output of the cell 

717 """ 

718 kind = self.cell_type(cell) 

719 if kind != "code": 

720 return None 

721 results = [] 

722 for output in cell.outputs: 

723 if output["output_type"] in {"execute_result", "display_data"}: 

724 data = output["data"] 

725 for k, v in data.items(): 

726 if k == "text/plain": 

727 if image_from_text: 

728 b = self.create_picture_from( 

729 v, "text", context=output["output_type"]) 

730 results.append(b) 

731 elif k == "application/javascript": 

732 if image_from_text: 

733 b = self.create_picture_from(v, "js") 

734 results.append(b) 

735 elif k == "application/json": # pragma: no cover 

736 if image_from_text: 

737 b = self.create_picture_from(v, "json") 

738 results.append(b) 

739 elif k == "image/svg+xml": 

740 if not isinstance(v, str): 

741 raise TypeError( # pragma: no cover 

742 f"This should be str not '{type(v)}' (=SVG).") 

743 results.append((v, "svg")) 

744 elif k == "text/html": 

745 if image_from_text: 

746 b = self.create_picture_from(v, "html") 

747 results.append(b) 

748 elif k == "text/latex": 

749 if image_from_text: # pragma: no cover 

750 b = self.create_picture_from(v, "latex") 

751 results.append(b) 

752 elif k == "application/vnd.jupyter.widget-view+json": 

753 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html 

754 if "model_id" not in v: 

755 raise KeyError( # pragma: no cover 

756 f"model_id is missing from {v}") 

757 model_id = v["model_id"] 

758 self.fLOG( 

759 "[application/vnd.jupyter.widget-view+json] not rendered", model_id) 

760 elif k == "application/vnd.jupyter.widget-state+json": 

761 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html 

762 if "model_id" not in v: 

763 raise KeyError( # pragma: no cover 

764 f"model_id is missing from {v}") 

765 model_id = v["model_id"] 

766 self.fLOG( 

767 "[application/vnd.jupyter.widget-state+json] not rendered", model_id) 

768 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: 

769 if not isinstance(v, bytes): 

770 v = base64.b64decode(v) 

771 if not isinstance(v, bytes): 

772 raise TypeError( # pragma: no cover 

773 f"This should be bytes not '{type(v)}' (=IMG:{k}).") 

774 results.append((v, k.split("/")[-1])) 

775 elif k in ("text/vnd.plotly.v1+html", 

776 "application/vnd.plotly.v1+json", 

777 "application/vnd.bokehjs_exec.v0+json", 

778 "application/vnd.bokehjs_load.v0+json"): 

779 results.append((v, k.split("/")[-1])) 

780 else: 

781 raise NotImplementedError( # pragma: no cover 

782 f"cell type: {kind}\nk={k}\nv={v}\nCELL:\n{cell}") 

783 elif output["output_type"] == "error": 

784 vl = output["traceback"] 

785 if image_from_text: 

786 for v in vl: 

787 b = self.create_picture_from( 

788 v, "text", context="error") 

789 results.append(b) 

790 elif output["output_type"] == "stream": 

791 v = output["text"] 

792 if image_from_text: 

793 b = self.create_picture_from(v, "text") 

794 results.append(b) 

795 else: 

796 raise NotImplementedError( # pragma: no cover 

797 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" 

798 "".format(kind, output["output_type"], output, cell)) 

799 if len(results) > 0: 

800 res = self._merge_images(results) 

801 if res[0] is None: 

802 return None 

803 self._check_thumbnail_tuple(res) 

804 return res 

805 return None 

806 

807 def cell_height(self, cell): 

808 """ 

809 Approximates the height of a cell by its number of lines it contains. 

810 

811 @param cell cell 

812 @return number of cell 

813 """ 

814 kind = self.cell_type(cell) 

815 if kind == "markdown": 

816 content = cell.source 

817 lines = content.split("\n") 

818 nbs = sum(1 + len(line) // 80 for line in lines) 

819 return nbs 

820 if kind == "raw": 

821 content = cell.source 

822 lines = content.split("\n") 

823 nbs = sum(1 + len(line) // 80 for line in lines) 

824 return nbs 

825 if kind == "code": 

826 content = cell.source 

827 lines = content.split("\n") 

828 nbl = len(lines) 

829 

830 for output in cell.outputs: 

831 if output["output_type"] == "execute_result" or \ 

832 output["output_type"] == "display_data": 

833 data = output["data"] 

834 for k, v in data.items(): 

835 if k == "text/plain": 

836 nbl += len(v.split("\n")) 

837 elif k == "application/javascript": 

838 # rough estimation 

839 nbl += len(v.split("\n")) // 2 

840 elif k == "application/json": 

841 # rough estimation 

842 try: 

843 nbl += len(v.split("{")) 

844 except AttributeError: # pragma: no cover 

845 nbl += len(v) // 5 + 1 

846 elif k == "image/svg+xml": 

847 nbl += len(v) // 5 

848 elif k == "text/html": 

849 nbl += len(v.split("\n")) 

850 elif k == "text/latex": 

851 nbl += len(v.split("\\\\")) * 2 

852 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: 

853 nbl += len(v) // 50 

854 elif k == "application/vnd.jupyter.widget-view+json": 

855 nbl += 5 

856 elif k == "application/vnd.jupyter.widget-state+json": 

857 nbl += 5 

858 elif k in ("text/vnd.plotly.v1+html", 

859 "application/vnd.plotly.v1+json", 

860 "application/vnd.bokehjs_load.v0+json", 

861 "application/vnd.bokehjs_exec.v0+json"): 

862 nbl += 10 

863 else: 

864 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}" 

865 raise NotImplementedError( 

866 fmt.format(kind, k, v, cell)) 

867 elif output["output_type"] == "stream": 

868 v = output["text"] 

869 nbl += len(v.split("\n")) 

870 elif output["output_type"] == "error": 

871 v = output["traceback"] 

872 nbl += len(v) 

873 else: 

874 raise NotImplementedError( # pragma: no cover 

875 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" 

876 .format(kind, output["output_type"], output, cell)) 

877 

878 return nbl 

879 

880 raise NotImplementedError( # pragma: no cover 

881 f"cell type: {kind}\nCELL:\n{cell}") 

882 

883 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25): 

884 """ 

885 Tries to add tags for a slide show when they are too few. 

886 

887 @param max_nb_cell maximum number of cells within a slide 

888 @param max_nb_line maximum number of lines within a slide 

889 @return list of modified cells { #slide: (kind, reason, cell) } 

890 """ 

891 res = {} 

892 nbline = 0 

893 nbcell = 0 

894 for i, cell in enumerate(self.iter_cells()): 

895 meta = cell.metadata 

896 if "slideshow" in meta: 

897 st = meta["slideshow"]["slide_type"] 

898 if st in ["slide", "subslide"]: 

899 nbline = 0 

900 nbcell = 0 

901 else: 

902 if cell.cell_type == "markdown": 

903 content = cell.source 

904 if content.startswith("# ") or \ 

905 content.startswith("## ") or \ 

906 content.startswith("### "): 

907 meta["slideshow"] = {'slide_type': 'slide'} 

908 nbline = 0 

909 nbcell = 0 

910 res[i] = ("slide", "section", cell) 

911 

912 dh = self.cell_height(cell) 

913 dc = 1 

914 new_nbline = nbline + dh 

915 new_cell = dc + nbcell 

916 if "slideshow" not in meta: 

917 if new_cell > max_nb_cell or \ 

918 new_nbline > max_nb_line: 

919 res[i] = ( 

920 "subslide", f"{nbcell}-{nbline} <-> {dc}-{dh}", cell) 

921 nbline = 0 

922 nbcell = 0 

923 meta["slideshow"] = {'slide_type': 'subslide'} 

924 

925 nbline += dh 

926 nbcell += dc 

927 

928 return res 

929 

930 def run_notebook(self, skip_exceptions=False, progress_callback=None, 

931 additional_path=None, valid=None, clean_function=None, 

932 context=None): 

933 ''' 

934 Runs all the cells of a notebook in order and update 

935 the outputs in-place. 

936 

937 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the 

938 subsequent cells are run (by default, the notebook execution stops). 

939 

940 @param skip_exceptions skip exception 

941 @param progress_callback call back function 

942 @param additional_path additional paths (as a list or None if none) 

943 @param valid if not None, valid is a function which returns whether 

944 or not the cell should be executed or not, if the function 

945 returns None, the execution of the notebooks and skip 

946 the execution of the other cells 

947 @param clean_function function which cleans a cell's code before executing 

948 it (None for None) 

949 @return dictionary with statistics 

950 

951 The function adds the local variable ``theNotebook`` with 

952 the absolute file name of the notebook. 

953 Function *valid* can return *None* to stop the execution of the notebook 

954 before this cell. 

955 ''' 

956 if self.detailed_log: 

957 self.detailed_log( 

958 f"[run_notebook] Starting execution of '{self._filename}'") 

959 # additional path 

960 if additional_path is not None: 

961 if not isinstance(additional_path, list): 

962 raise TypeError( # pragma: no cover 

963 "Additional_path should be a list not: " + str(additional_path)) 

964 code = ["import sys"] 

965 for p in additional_path: 

966 code.append(f"sys.path.append(r'{p}')") 

967 cell = "\n".join(code) 

968 self.run_cell(-1, cell) 

969 

970 # we add local variable theNotebook 

971 if self.theNotebook is not None: 

972 cell = f"theNotebook = r'''{self.theNotebook}'''" 

973 self.run_cell(-1, cell) 

974 

975 # initialisation with a code not inside the notebook 

976 if self.code_init is not None: 

977 self.run_cell(-1, self.code_init) 

978 

979 # execution of the notebook 

980 nbcell = 0 

981 nbrun = 0 

982 nbnerr = 0 

983 cl = time.perf_counter() 

984 for i, cell in enumerate(self.iter_code_cells()): 

985 nbcell += 1 

986 codei = NotebookRunner.get_cell_code(cell)[1] 

987 if valid is not None: 

988 r = valid(codei) 

989 if r is None: 

990 break 

991 if not r: 

992 continue 

993 try: 

994 nbrun += 1 

995 self.run_cell(i, cell, clean_function=clean_function) 

996 nbnerr += 1 

997 except Empty as er: # pragma: no cover 

998 raise RuntimeError( 

999 f"{self.comment}\nissue when executing:\n{codei}") from er 

1000 except NotebookError as e: # pragma: no cover 

1001 if not skip_exceptions: 

1002 raise 

1003 raise RuntimeError( 

1004 f"Issue when executing:\n{codei}") from e 

1005 if progress_callback: 

1006 progress_callback(i) 

1007 etime = time.perf_counter() - cl 

1008 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime) 

1009 if self.detailed_log: 

1010 self.detailed_log( 

1011 f"[run_notebook] end execution of '{self._filename}'") 

1012 self.detailed_log( 

1013 f"[run_notebook] execution time: {etime}") 

1014 self.detailed_log(f"[run_notebook] statistics : {res}") 

1015 return res 

1016 

1017 def count_code_cells(self): 

1018 ''' 

1019 Returns the number of code cells in the notebook. 

1020 ''' 

1021 return sum(1 for _ in self.iter_code_cells()) 

1022 

1023 def merge_notebook(self, nb): 

1024 """ 

1025 Appends notebook *nb* to this one. 

1026 

1027 @param nb notebook or list of notebook (@see cl NotebookRunner) 

1028 @return number of added cells 

1029 

1030 .. faqref:: 

1031 :title: How to merge notebook? 

1032 

1033 The following code merges two notebooks into the first one 

1034 and stores the result unto a file. 

1035 

1036 :: 

1037 

1038 from pyquickhelper.ipythonhelper import read_nb 

1039 nb1 = read_nb("<file1>", kernel=False) 

1040 nb2 = read_nb("<file2>", kernel=False) 

1041 nb1.merge_notebook(nb2) 

1042 nb1.to_json(outfile) 

1043 """ 

1044 if isinstance(nb, list): 

1045 s = 0 

1046 for n in nb: 

1047 s += self.merge_notebook(n) 

1048 return s 

1049 else: 

1050 last = self._cell_container() 

1051 s = 0 

1052 for cell in nb.iter_cells(): 

1053 last.append(cell) 

1054 s += 1 

1055 return s 

1056 

1057 def get_description(self): 

1058 """ 

1059 Gets summary and description of this notebook. 

1060 We expect the first cell to contain a title and a description 

1061 of its content. 

1062 

1063 @return header, description 

1064 """ 

1065 def split_header(s, get_header=True): 

1066 s = s.lstrip().rstrip() 

1067 parts = s.splitlines() 

1068 if parts[0].startswith('#'): 

1069 if get_header: 

1070 header = re.sub('#+\\s*', '', parts.pop(0)) 

1071 if not parts: 

1072 return header, '' 

1073 else: 

1074 header = '' 

1075 rest = '\n'.join(parts).lstrip().split('\n\n') 

1076 desc = rest[0].replace('\n', ' ') 

1077 return header, desc 

1078 

1079 if get_header: 

1080 if parts[0].startswith(('=', '-')): 

1081 parts = parts[1:] 

1082 header = parts.pop(0) 

1083 if parts and parts[0].startswith(('=', '-')): 

1084 parts.pop(0) 

1085 if not parts: 

1086 return header, '' 

1087 else: 

1088 header = '' 

1089 rest = '\n'.join(parts).lstrip().split('\n\n') 

1090 desc = rest[0].replace('\n', ' ') 

1091 return header, desc 

1092 

1093 first_cell = self.first_cell() 

1094 

1095 if not first_cell['cell_type'] == 'markdown': 

1096 raise ValueError( # pragma: no cover 

1097 "The first cell is not in markdown but '{0}' filename='{1}'.".format( 

1098 first_cell['cell_type'], self._filename)) 

1099 

1100 header, desc = split_header(first_cell['source']) 

1101 if not desc and len(self.nb['cells']) > 1: 

1102 second_cell = self.nb['cells'][1] 

1103 if second_cell['cell_type'] == 'markdown': 

1104 _, desc = split_header(second_cell['source'], False) 

1105 

1106 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))" 

1107 reg = re.compile(reg_link) 

1108 new_desc = reg.sub("\\2", desc) 

1109 if "http://" in new_desc or "https://" in new_desc: 

1110 raise ValueError( # pragma: no cover 

1111 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format( 

1112 desc, new_desc, self._filename)) 

1113 return header, new_desc.replace('"', "") 

1114 

1115 def get_thumbnail(self, max_width=200, max_height=200, use_default=False): 

1116 """ 

1117 Processes the notebook and creates one picture based on the outputs 

1118 to illustrate a notebook. 

1119 

1120 @param max_width maximum size of the thumbnail 

1121 @param max_height maximum size of the thumbnail 

1122 @param use_default force using a default image even if an even is present 

1123 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`) 

1124 """ 

1125 images = [] 

1126 cells = list(self.iter_cells()) 

1127 cells.reverse() 

1128 for cell in cells: 

1129 c = self.cell_image(cell, False) 

1130 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in ( 

1131 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", 

1132 "vnd.bokehjs_load.v0+json"): 

1133 self._check_thumbnail_tuple(c) 

1134 images.append(c) 

1135 if not use_default and len(images) == 0: 

1136 for cell in cells: 

1137 c = self.cell_image(cell, True) 

1138 if c is not None and len(c) > 0 and len(c[0]) > 0: 

1139 self._check_thumbnail_tuple(c) 

1140 images.append(c) 

1141 if len(c[0]) >= 1000: 

1142 break 

1143 if use_default: 

1144 images = [] 

1145 if len(images) == 0: 

1146 # no image, we need to consider the default one 

1147 no_image = os.path.join( 

1148 os.path.dirname(__file__), 'no_image_nb.png') 

1149 with open(no_image, "rb") as f: 

1150 c = (f.read(), "png") 

1151 self._check_thumbnail_tuple(c) 

1152 images.append(c) 

1153 

1154 # select the image 

1155 if len(images) == 0: 

1156 raise ValueError( # pragma: no cover 

1157 "There should be at least one image.") 

1158 if len(images) == 1: 

1159 image = images[0] 

1160 else: 

1161 # maybe later we'll implement a different logic 

1162 # we pick the last one 

1163 image = images[0] 

1164 

1165 # zoom 

1166 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"): 

1167 return None 

1168 if image[1] == 'svg': 

1169 try: 

1170 img = svg2img(image[0]) 

1171 except PYQImageException: # pragma: no cover 

1172 # Enable to convert SVG. 

1173 return None 

1174 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height) 

1175 img = self._scale_image( 

1176 image[0], image[1], max_width=max_width, max_height=max_height) 

1177 return img 

1178 

1179 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200): 

1180 """ 

1181 Scales an image with the same aspect ratio centered in an 

1182 image with a given max_width and max_height. 

1183 

1184 @param in_bytes image as bytes 

1185 @param format indication of the format (can be empty) 

1186 @param max_width maximum size of the thumbnail 

1187 @param max_height maximum size of the thumbnail 

1188 @return Image (PIL) 

1189 """ 

1190 # local import to avoid testing dependency on PIL: 

1191 try: 

1192 from PIL import Image 

1193 except ImportError: # pragma: no cover 

1194 import Image 

1195 

1196 if isinstance(in_bytes, tuple): 

1197 in_bytes = in_bytes[0] 

1198 if isinstance(in_bytes, bytes): 

1199 img = Image.open(BytesIO(in_bytes)) 

1200 elif isinstance(in_bytes, Image.Image): 

1201 img = in_bytes 

1202 else: 

1203 raise TypeError( # pragma: no cover 

1204 f"bytes expected, not {type(in_bytes)} - format={format}") 

1205 width_in, height_in = img.size 

1206 scale_w = max_width / float(width_in) 

1207 scale_h = max_height / float(height_in) 

1208 

1209 if height_in * scale_w <= max_height: 

1210 scale = scale_w 

1211 else: 

1212 scale = scale_h 

1213 

1214 if scale >= 1.0: 

1215 return img 

1216 

1217 width_sc = int(round(scale * width_in)) 

1218 height_sc = int(round(scale * height_in)) 

1219 

1220 # resize the image and center 

1221 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS) 

1222 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255)) 

1223 pos_insert = ((max_width - width_sc) // 2, 

1224 (max_height - height_sc) // 2) 

1225 thumb.paste(img, pos_insert) 

1226 return thumb 

1227 

1228 def _merge_images(self, results): 

1229 """ 

1230 Merges images defined by (buffer, format). 

1231 The method uses PIL to merge images when possible. 

1232 

1233 @return ``[ (image, format) ]`` 

1234 """ 

1235 if len(results) == 1: 

1236 results = results[0] 

1237 self._check_thumbnail_tuple(results) 

1238 return results 

1239 if len(results) == 0: 

1240 return None 

1241 

1242 formats_counts = Counter(_[1] for _ in results) 

1243 if len(formats_counts) == 1: 

1244 format = results[0][1] 

1245 else: 

1246 items = sorted(((v, k) 

1247 for k, v in formats_counts.items()), reverse=False) 

1248 for it in items: 

1249 format = it 

1250 break 

1251 

1252 results = [_ for _ in results if _[1] == format] 

1253 if format == "svg": 

1254 return ("\n".join(_[0] for _ in results), format) 

1255 

1256 # local import to avoid testing dependency on PIL: 

1257 try: 

1258 from PIL import Image 

1259 except ImportError: # pragma: no cover 

1260 import Image 

1261 

1262 dx = 0. 

1263 dy = 0. 

1264 over = 0.7 

1265 imgs = [] 

1266 for in_bytes, _ in results: 

1267 img = Image.open(BytesIO(in_bytes)) 

1268 imgs.append(img) 

1269 dx = max(dx, img.size[0]) 

1270 dy += img.size[1] * over 

1271 

1272 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220)) 

1273 for img in imgs: 

1274 dy -= img.size[1] * over 

1275 new_im.paste(img, (0, max(int(dy), 0))) 

1276 

1277 if max(dx, dy) > 0: 

1278 image_buffer = BytesIO() 

1279 new_im.save(image_buffer, "PNG") 

1280 b = image_buffer.getvalue(), "png" 

1281 return b 

1282 b = None, "png" 

1283 return b