Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Modified version of `runipy.notebook_runner 

4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_. 

5""" 

6 

7import base64 

8import os 

9import re 

10import time 

11import platform 

12import warnings 

13from queue import Empty 

14from time import sleep 

15from collections import Counter 

16from io import StringIO, BytesIO 

17from nbformat import NotebookNode, writes 

18from nbformat.reader import reads 

19from ..imghelper.svg_helper import svg2img, PYQImageException 

20from ..loghelper.flog import noLOG 

21 

22 

23class NotebookError(Exception): 

24 """ 

25 Raised when the execution fails. 

26 """ 

27 pass 

28 

29 

30class NotebookKernelError(Exception): 

31 """ 

32 Raised when 

33 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/ 

34 jupyter_client/blocking/client.py#L84>`_ fails. 

35 """ 

36 pass 

37 

38 

39class NotebookRunner(object): 

40 

41 """ 

42 The kernel communicates with mime-types while the notebook 

43 uses short labels for different cell types. We'll use this to 

44 map from kernel types to notebook format types. 

45 

46 This classes executes a notebook end to end. 

47 

48 .. index:: kernel, notebook 

49 

50 The class can use different kernels. The next links gives more 

51 information on how to create or test a kernel: 

52 

53 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_ 

54 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_ 

55 

56 .. faqref:: 

57 :title: Do I need to shutdown the kernel after running a notebook? 

58 

59 .. index:: travis 

60 

61 If the class is instantiated with *kernel=True*, a kernel will 

62 be started. It must be shutdown otherwise the program might 

63 be waiting for it for ever. That is one of the reasons why the 

64 travis build does not complete. The build finished but cannot terminate 

65 until all kernels are shutdown. 

66 """ 

67 

68 # . available output types 

69 MIME_MAP = { 

70 'image/jpeg': 'jpeg', 

71 'image/png': 'png', 

72 'image/gif': 'gif', 

73 'text/plain': 'text', 

74 'text/html': 'html', 

75 'text/latex': 'latex', 

76 'application/javascript': 'html', 

77 'image/svg+xml': 'svg', 

78 } 

79 

80 def __init__(self, nb, profile_dir=None, working_dir=None, 

81 comment="", fLOG=noLOG, theNotebook=None, code_init=None, 

82 kernel_name="python", log_level="30", extended_args=None, 

83 kernel=False, filename=None, replacements=None, detailed_log=None, 

84 startup_timeout=300): 

85 """ 

86 @param nb notebook as :epkg:`JSON` 

87 @param profile_dir profile directory 

88 @param working_dir working directory 

89 @param comment additional information added to error message 

90 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook 

91 @param code_init to initialize the notebook with a python code as if it was a cell 

92 @param fLOG logging function 

93 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL') 

94 @param kernel_name kernel name, it can be None 

95 @param extended_args others arguments to pass to the command line 

96 (`--KernelManager.autorestar=True` for example), 

97 see :ref:`l-ipython_notebook_args` for a full list 

98 @param kernel *kernel* is True by default, the notebook can be run, if False, 

99 the notebook can be read but not run 

100 @param filename to add the notebook file if there is one in error messages 

101 @param replacements replacements to make in every cell before running it, 

102 dictionary ``{ string: string }`` 

103 @param detailed_log to log detailed information when executing the notebook, this should be a function 

104 with the same signature as ``print`` or None 

105 @param startup_timeout wait for this long for the kernel to be ready, 

106 see `wait_for_ready 

107 <https://github.com/jupyter/jupyter_client/blob/master/ 

108 jupyter_client/blocking/client.py#L84>`_ 

109 """ 

110 if kernel: 

111 try: 

112 from jupyter_client import KernelManager 

113 except ImportError: # pragma: no cover 

114 from ipykernel import KernelManager 

115 

116 with warnings.catch_warnings(): 

117 warnings.filterwarnings("ignore", category=DeprecationWarning) 

118 self.km = KernelManager( 

119 kernel_name=kernel_name) if kernel_name is not None else KernelManager() 

120 else: 

121 self.km = None 

122 self.detailed_log = detailed_log 

123 self.fLOG = fLOG 

124 self.theNotebook = theNotebook 

125 self.code_init = code_init 

126 self._filename = filename if filename is not None else "memory" 

127 self.replacements = replacements 

128 self.init_args = dict( 

129 profile_dir=profile_dir, working_dir=working_dir, 

130 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init, 

131 kernel_name="python", log_level="30", extended_args=None, 

132 kernel=kernel, filename=filename, replacements=replacements) 

133 args = [] 

134 

135 if profile_dir: 

136 args.append('--profile-dir=%s' % os.path.abspath(profile_dir)) 

137 if log_level: 

138 args.append('--log-level=%s' % log_level) 

139 

140 if extended_args is not None and len(extended_args) > 0: 

141 for opt in extended_args: 

142 if not opt.startswith("--"): 

143 raise SyntaxError( 

144 "every option should start with '--': " + opt) 

145 if "=" not in opt: 

146 raise SyntaxError( # pragma: no cover 

147 "every option should be assigned a value: " + opt) 

148 args.append(opt) 

149 

150 if kernel: 

151 cwd = os.getcwd() 

152 

153 if working_dir: 

154 os.chdir(working_dir) 

155 

156 if self.km is not None: 

157 try: 

158 with warnings.catch_warnings(): 

159 warnings.filterwarnings( 

160 "ignore", category=ResourceWarning) 

161 self.km.start_kernel(extra_arguments=args) 

162 except Exception as e: # pragma: no cover 

163 raise NotebookKernelError( 

164 "Failure with args: {0}\nand error:\n{1}".format(args, str(e))) from e 

165 

166 if platform.system() == 'Darwin': 

167 # see http://www.pypedia.com/index.php/notebook_runner 

168 # There is sometimes a race condition where the first 

169 # execute command hits the kernel before it's ready. 

170 # It appears to happen only on Darwin (Mac OS) and an 

171 # easy (but clumsy) way to mitigate it is to sleep 

172 # for a second. 

173 sleep(1) # pragma: no cover 

174 

175 if working_dir: 

176 os.chdir(cwd) 

177 

178 self.kc = self.km.client() 

179 self.kc.start_channels(stdin=False) 

180 try: 

181 self.kc.wait_for_ready(timeout=startup_timeout) 

182 except RuntimeError as e: # pragma: no cover 

183 # We wait for one second. 

184 sleep(startup_timeout) 

185 self.kc.stop_channels() 

186 self.km.shutdown_kernel() 

187 self.km = None 

188 self.kc = None 

189 self.nb = nb 

190 self.comment = comment 

191 raise NotebookKernelError( 

192 "Wait_for_ready fails (timeout={0}).".format(startup_timeout)) from e 

193 else: 

194 self.km = None 

195 self.kc = None 

196 self.nb = nb 

197 self.comment = comment 

198 

199 def __del__(self): 

200 """ 

201 We close the kernel. 

202 """ 

203 if self.km is not None: 

204 del self.km 

205 if self.kc is not None: 

206 del self.kc 

207 

208 def to_json(self, filename=None, encoding="utf8"): 

209 """ 

210 Converts the notebook into :epkg:`JSON`. 

211 

212 @param filename filename or stream 

213 @param encoding encoding 

214 @return Json string if filename is None, None otherwise 

215 """ 

216 if isinstance(filename, str): 

217 with open(filename, "w", encoding=encoding) as payload: 

218 self.to_json(payload) 

219 return None 

220 

221 if filename is None: 

222 st = StringIO() 

223 st.write(writes(self.nb)) 

224 return st.getvalue() 

225 

226 filename.write(writes(self.nb)) 

227 return None 

228 

229 def copy(self): 

230 """ 

231 Copies the notebook (just the content). 

232 

233 @return instance of @see cl NotebookRunner 

234 """ 

235 st = StringIO() 

236 self.to_json(st) 

237 args = self.init_args.copy() 

238 for name in ["theNotebook", "filename"]: 

239 if name in args: 

240 del args[name] 

241 nb = reads(st.getvalue()) 

242 return NotebookRunner(nb, **args) 

243 

244 def __add__(self, nb): 

245 """ 

246 Merges two notebooks together, returns a new none. 

247 

248 @param nb notebook 

249 @return new notebook 

250 """ 

251 c = self.copy() 

252 c.merge_notebook(nb) 

253 return c 

254 

255 def shutdown_kernel(self): 

256 """ 

257 Shuts down kernel. 

258 """ 

259 self.fLOG('-- shutdown kernel') 

260 if self.kc is None: 

261 raise ValueError( # pragma: no cover 

262 "No kernel was started, specify kernel=True when initializing the instance.") 

263 self.kc.stop_channels() 

264 self.km.shutdown_kernel(now=True) 

265 

266 def clean_code(self, code): 

267 """ 

268 Cleans the code before running it, the function comment out 

269 instruction such as ``show()``. 

270 

271 @param code code (string) 

272 @return cleaned code 

273 """ 

274 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code 

275 if code is None: 

276 return code 

277 

278 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")] 

279 res = [] 

280 show_is_last = False 

281 for line in lines: 

282 if line.replace(" ", "") == "show()": 

283 line = line.replace("show", "#show") 

284 show_is_last = True 

285 elif has_bokeh and line.replace(" ", "") == "output_notebook()": 

286 line = line.replace("output_notebook", "#output_notebook") 

287 else: 

288 show_is_last = False 

289 if self.replacements is not None: 

290 for k, v in self.replacements.items(): 

291 line = line.replace(k, v) 

292 res.append(line) 

293 if show_is_last: 

294 res.append('"nothing to show"') 

295 return "\n".join(res) 

296 

297 @staticmethod 

298 def get_cell_code(cell): 

299 """ 

300 Returns the code of a cell. 

301 

302 @param cell a cell or a string 

303 @return boolean (=iscell), string 

304 """ 

305 if isinstance(cell, str): 

306 iscell = False 

307 return iscell, cell 

308 

309 iscell = True 

310 try: 

311 return iscell, cell.source 

312 except AttributeError: # pragma: no cover 

313 return iscell, cell.input 

314 

315 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15): 

316 ''' 

317 Runs a notebook cell and update the output of that cell inplace. 

318 

319 :param index_cell: index of the cell 

320 :param cell: cell to execute 

321 :param clean_function: cleaning function to apply to the code before running it 

322 :param max_nbissue: number of times an issue can be raised before stopping 

323 :return: output of the cell 

324 ''' 

325 if self.detailed_log: 

326 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format( 

327 index_cell, clean_function)) 

328 iscell, codei = NotebookRunner.get_cell_code(cell) 

329 

330 self.fLOG('-- running cell:\n%s\n' % codei) 

331 if self.detailed_log: 

332 self.detailed_log( 

333 '[run_cell] code=\n {0}'.format( 

334 "\n ".join(codei.split("\n")))) 

335 

336 code = self.clean_code(codei) 

337 if clean_function is not None: 

338 code = clean_function(code) 

339 if self.detailed_log: 

340 self.detailed_log( 

341 ' cleaned code=\n {0}'.format( 

342 "\n ".join(code.split("\n")))) 

343 if len(code) == 0: 

344 return "" 

345 if self.kc is None: 

346 raise ValueError( # pragma: no cover 

347 "No kernel was started, specify kernel=True when initializing the instance.") 

348 self.kc.execute(code) 

349 

350 reply = self.kc.get_shell_msg() 

351 reason = None 

352 try: 

353 status = reply['content']['status'] 

354 except KeyError: # pragma: no cover 

355 status = 'error' 

356 reason = "no status key in reply['content']" 

357 

358 if status == 'error': 

359 ansi_escape = re.compile(r'\x1b[^m]*m') 

360 try: 

361 tr = [ansi_escape.sub('', _) 

362 for _ in reply['content']['traceback']] 

363 except KeyError: # pragma: no cover 

364 tr = (["No traceback, available keys in reply['content']"] + 

365 list(reply['content'])) 

366 traceback_text = '\n'.join(tr) 

367 self.fLOG("[nberror]\n", traceback_text) 

368 if self.detailed_log: 

369 self.detailed_log( # pragma: no cover 

370 '[run_cell] ERROR=\n {0}'.format( 

371 "\n ".join(traceback_text.split("\n")))) 

372 else: 

373 traceback_text = '' 

374 self.fLOG('-- cell returned') 

375 

376 outs = list() 

377 nbissue = 0 

378 statuses = [status] 

379 while True: 

380 try: 

381 msg = self.kc.get_iopub_msg(timeout=1) 

382 if msg['msg_type'] == 'status': 

383 if msg['content']['execution_state'] == 'idle': 

384 status = 'ok' 

385 statuses.append(status) 

386 break 

387 statuses.append(status) 

388 except Empty as e: # pragma: no cover 

389 # execution state should return to idle before 

390 # the queue becomes empty, 

391 # if it doesn't, something bad has happened 

392 status = "error" 

393 statuses.append(status) 

394 reason = "exception Empty was raised (%r)" % e 

395 nbissue += 1 

396 if nbissue > max_nbissue: 

397 # the notebook is empty 

398 return "" 

399 else: 

400 continue 

401 

402 content = msg['content'] 

403 msg_type = msg['msg_type'] 

404 if self.detailed_log: 

405 self.detailed_log(' msg_type={0}'.format(msg_type)) 

406 

407 out = NotebookNode(output_type=msg_type, metadata=dict()) 

408 

409 if 'execution_count' in content: 

410 if iscell: 

411 cell['execution_count'] = content['execution_count'] 

412 out.execution_count = content['execution_count'] 

413 

414 if msg_type in ('status', 'pyin', 'execute_input'): 

415 continue 

416 

417 if msg_type == 'stream': 

418 out.name = content['name'] 

419 # in msgspec 5, this is name, text 

420 # in msgspec 4, this is name, data 

421 if 'text' in content: 

422 out.text = content['text'] 

423 else: 

424 out.data = content['data'] 

425 

426 elif msg_type in ('display_data', 'pyout', 'execute_result'): 

427 out.data = content['data'] 

428 

429 elif msg_type in ('pyerr', 'error'): 

430 out.ename = content['ename'] 

431 out.evalue = content['evalue'] 

432 out.traceback = content['traceback'] 

433 out.name = 'stderr' 

434 

435 elif msg_type == 'clear_output': 

436 outs = list() 

437 continue 

438 

439 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'): 

440 # widgets in a notebook 

441 out.data = content["data"] 

442 out.comm_id = content["comm_id"] 

443 

444 else: 

445 dcontent = "\n".join("{0}={1}".format(k, v) 

446 for k, v in sorted(content.items())) 

447 raise NotImplementedError( # pragma: no cover 

448 "Unhandled iopub message: '{0}'\n--CONTENT--\n{1}".format(msg_type, dcontent)) 

449 

450 outs.append(out) 

451 if self.detailed_log: 

452 self.detailed_log(' out={0}'.format(type(out))) 

453 if hasattr(out, "data"): 

454 self.detailed_log(' out={0}'.format(out.data)) 

455 

456 if iscell: 

457 cell['outputs'] = outs 

458 

459 raw = [] 

460 for _ in outs: 

461 try: 

462 t = _.data 

463 except AttributeError: 

464 continue 

465 

466 # see MIMEMAP to see the available output type 

467 for k, v in t.items(): 

468 if k.startswith("text"): 

469 raw.append(v) 

470 

471 sraw = "\n".join(raw) 

472 self.fLOG(sraw) 

473 if self.detailed_log: 

474 self.detailed_log(' sraw=\n {0}'.format( 

475 "\n ".join(sraw.split("\n")))) 

476 

477 def reply2string(reply): 

478 sreply = [] 

479 for k, v in sorted(reply.items()): 

480 if isinstance(v, dict): 

481 temp = [] 

482 for _, __ in sorted(v.items()): 

483 temp.append(" [{0}]={1}".format(_, str(__))) 

484 v_ = "\n".join(temp) 

485 sreply.append("reply['{0}']=dict\n{1}".format(k, v_)) 

486 else: 

487 sreply.append("reply['{0}']={1}".format(k, str(v))) 

488 sreply = "\n".join(sreply) 

489 return sreply 

490 

491 if status == 'error': # pragma: no cover 

492 sreply = reply2string(reply) 

493 if len(code) < 5: 

494 scode = [code] 

495 else: 

496 scode = "" 

497 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} " 

498 "length={5} -- {6}:\n-----------------\n" 

499 "content={12}\nmsg_type: {13} nbissue={14}" 

500 "\nstatuses={15}" 

501 "\n-----------------\n{0}" 

502 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}") 

503 raise NotebookError(mes.format( 

504 code, traceback_text, sraw, sreply, index_cell, # 0-4 

505 len(code), scode, self.comment, status, reason, # 5-9 

506 self._filename, index_cell, content, msg_type, nbissue, # 10-14 

507 statuses)) # 15 

508 if self.detailed_log: 

509 self.detailed_log('[run_cell] status={0}'.format(status)) 

510 return outs 

511 

512 def to_python(self): 

513 """ 

514 Converts the notebook into python. 

515 

516 @return string 

517 """ 

518 rows = [] 

519 for cell in self.iter_cells(): 

520 if cell.cell_type == "code": 

521 codei = NotebookRunner.get_cell_code(cell)[1] 

522 rows.append(codei) 

523 elif cell.cell_type in ("markdown", "raw"): 

524 content = cell.source 

525 lines = content.split("\n") 

526 for line in lines: 

527 if line.startswith("#"): 

528 rows.append("###") 

529 rows.append(line) 

530 else: 

531 rows.append("# " + line) 

532 else: 

533 # No text, no code. 

534 rows.append("# cell.type = {0}".format(cell.cell_type)) 

535 rows.append("") 

536 return "\n".join(rows) 

537 

538 def iter_code_cells(self): 

539 ''' 

540 Iterates over the notebook cells containing code. 

541 ''' 

542 for cell in self.iter_cells(): 

543 if cell.cell_type == 'code': 

544 yield cell 

545 

546 def iter_cells(self): 

547 ''' 

548 Iterates over the notebook cells. 

549 ''' 

550 if hasattr(self.nb, "worksheets"): # pragma: no cover 

551 for ws in self.nb.worksheets: 

552 for cell in ws.cells: 

553 yield cell 

554 else: 

555 for cell in self.nb.cells: 

556 yield cell 

557 

558 def first_cell(self): 

559 """ 

560 Returns the first cell. 

561 """ 

562 for cell in self.iter_cells(): 

563 return cell 

564 

565 def _cell_container(self): 

566 """ 

567 Returns a cells container, it may change according to the format. 

568 

569 @return cell container 

570 """ 

571 if hasattr(self.nb, "worksheets"): # pragma: no cover 

572 last = None 

573 for ws in self.nb.worksheets: 

574 last = ws 

575 if last is None: 

576 raise NotebookError("no cell container") # pragma: no cover 

577 return last.cells 

578 return self.nb.cells 

579 

580 def __len__(self): 

581 """ 

582 Returns the number of cells, it iterates on cells 

583 to get this information and does cache the information. 

584 

585 @return int 

586 """ 

587 return sum(1 for _ in self.iter_cells()) 

588 

589 def cell_type(self, cell): 

590 """ 

591 Returns the cell type. 

592 

593 @param cell from @see me iter_cells 

594 @return type 

595 """ 

596 return cell.cell_type 

597 

598 def cell_metadata(self, cell): 

599 """ 

600 Returns the cell metadata. 

601 

602 @param cell cell 

603 @return metadata 

604 """ 

605 return cell.metadata 

606 

607 def _check_thumbnail_tuple(self, b): 

608 """ 

609 Checks types for a thumbnail. 

610 

611 @param b tuple image, format 

612 @return b 

613 

614 The function raises an exception if the type is incorrect. 

615 """ 

616 if not isinstance(b, tuple): 

617 raise TypeError( # pragma: no cover 

618 "tuple expected, not {0}".format(type(b))) 

619 if len(b) != 2: 

620 raise TypeError( # pragma: no cover 

621 "tuple expected of lengh 2, not {0}".format(len(b))) 

622 if b[1] == "svg": 

623 if not isinstance(b[0], str): 

624 raise TypeError( 

625 "str expected for svg, not {0}".format(type(b[0]))) 

626 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", 

627 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'): 

628 # Don't know how to extract a snippet out of this. 

629 pass 

630 else: 

631 if not isinstance(b[0], bytes): 

632 raise TypeError( 

633 "bytes expected for images, not {0}-'{1}'\n{2}".format(type(b[0]), b[1], b)) 

634 return b 

635 

636 def create_picture_from(self, text, format, asbytes=True, context=None): 

637 """ 

638 Creates a picture from text. 

639 

640 @param text the text 

641 @param format text, json, ... 

642 @param context (str) indication on the content of text (error, ...) 

643 @param asbytes results as bytes or as an image 

644 @return tuple (picture, format) or PIL.Image (if asbytes is False) 

645 

646 The picture will be bytes, the format png, bmp... 

647 The size of the picture will depend on the text. 

648 The longer, the bigger. The method relies on matplotlib 

649 and then convert the image into a PIL image. 

650 

651 HTML could be rendered with QWebPage from PyQt (not implemented). 

652 """ 

653 if not isinstance(text, (str, bytes)): 

654 text = str(text) 

655 if "\n" not in text: 

656 rows = [] 

657 for i in range(0, len(text), 20): 

658 end = min(i + 20, len(text)) 

659 rows.append(text[i:end]) 

660 text = "\n".join(text) 

661 if len(text) > 200: 

662 text = text[:200] 

663 size = len(text) // 10 

664 figsize = (3 + size, 3 + size) 

665 lines = text.replace("\t", " ").replace("\r", "").split("\n") 

666 

667 import matplotlib.pyplot as plt 

668 from matplotlib.textpath import TextPath 

669 from matplotlib.font_manager import FontProperties 

670 fig = plt.figure(figsize=figsize) 

671 ax = fig.add_subplot(111) 

672 fp = FontProperties(size=200) 

673 

674 dx = 0 

675 dy = 0 

676 for i, line in enumerate(lines): 

677 if len(line.strip()) > 0: 

678 ax.text(0, -dy, line, fontproperties=fp, va='top') 

679 tp = TextPath((0, -dy), line, prop=fp) 

680 bb = tp.get_extents() 

681 dy += bb.height 

682 dx = max(dx, bb.width) 

683 

684 ratio = abs(dx) / max(abs(dy), 1) 

685 ratio = max(min(ratio, 3), 1) 

686 fig.set_size_inches(int((1 + size) * ratio), 1 + size) 

687 ax.set_xlim([0, dx]) 

688 ax.set_ylim([-dy, 0]) 

689 ax.set_axis_off() 

690 sio = BytesIO() 

691 fig.savefig(sio, format="png") 

692 plt.close() 

693 

694 if asbytes: 

695 b = sio.getvalue(), "png" 

696 self._check_thumbnail_tuple(b) 

697 return b 

698 try: 

699 from PIL import Image 

700 except ImportError: # pragma: no cover 

701 import Image 

702 img = Image.open(sio) 

703 return img 

704 

705 def cell_image(self, cell, image_from_text=False): 

706 """ 

707 Returns the cell image or None if not found. 

708 

709 @param cell cell to examine 

710 @param image_from_text produce an image even if it is not one 

711 @return None for no image or a list of tuple (image as bytes, extension) 

712 for each output of the cell 

713 """ 

714 kind = self.cell_type(cell) 

715 if kind != "code": 

716 return None 

717 results = [] 

718 for output in cell.outputs: 

719 if output["output_type"] in {"execute_result", "display_data"}: 

720 data = output["data"] 

721 for k, v in data.items(): 

722 if k == "text/plain": 

723 if image_from_text: 

724 b = self.create_picture_from( 

725 v, "text", context=output["output_type"]) 

726 results.append(b) 

727 elif k == "application/javascript": 

728 if image_from_text: 

729 b = self.create_picture_from(v, "js") 

730 results.append(b) 

731 elif k == "application/json": 

732 if image_from_text: 

733 b = self.create_picture_from(v, "json") 

734 results.append(b) 

735 elif k == "image/svg+xml": 

736 if not isinstance(v, str): 

737 raise TypeError( 

738 "This should be str not '{0}' (=SVG).".format(type(v))) 

739 results.append((v, "svg")) 

740 elif k == "text/html": 

741 if image_from_text: 

742 b = self.create_picture_from(v, "html") 

743 results.append(b) 

744 elif k == "text/latex": 

745 if image_from_text: 

746 b = self.create_picture_from(v, "latex") 

747 results.append(b) 

748 elif k == "application/vnd.jupyter.widget-view+json": 

749 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html 

750 if "model_id" not in v: 

751 raise KeyError( # pragma: no cover 

752 "model_id is missing from {0}".format(v)) 

753 model_id = v["model_id"] 

754 self.fLOG( 

755 "[application/vnd.jupyter.widget-view+json] not rendered", model_id) 

756 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: 

757 if not isinstance(v, bytes): 

758 v = base64.b64decode(v) 

759 if not isinstance(v, bytes): 

760 raise TypeError( # pragma: no cover 

761 "This should be bytes not '{0}' (=IMG:{1}).".format(type(v), k)) 

762 results.append((v, k.split("/")[-1])) 

763 elif k in ("text/vnd.plotly.v1+html", "application/vnd.plotly.v1+json", 

764 "application/vnd.bokehjs_exec.v0+json", 

765 "application/vnd.bokehjs_load.v0+json"): 

766 results.append((v, k.split("/")[-1])) 

767 else: 

768 raise NotImplementedError( # pragma: no cover 

769 "cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format( 

770 kind, k, v, cell)) 

771 elif output["output_type"] == "error": 

772 vl = output["traceback"] 

773 if image_from_text: 

774 for v in vl: 

775 b = self.create_picture_from( 

776 v, "text", context="error") 

777 results.append(b) 

778 elif output["output_type"] == "stream": 

779 v = output["text"] 

780 if image_from_text: 

781 b = self.create_picture_from(v, "text") 

782 results.append(b) 

783 else: 

784 raise NotImplementedError( # pragma: no cover 

785 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" 

786 "".format(kind, output["output_type"], output, cell)) 

787 if len(results) > 0: 

788 res = self._merge_images(results) 

789 if res[0] is None: 

790 return None 

791 self._check_thumbnail_tuple(res) 

792 return res 

793 return None 

794 

795 def cell_height(self, cell): 

796 """ 

797 Approximates the height of a cell by its number of lines it contains. 

798 

799 @param cell cell 

800 @return number of cell 

801 """ 

802 kind = self.cell_type(cell) 

803 if kind == "markdown": 

804 content = cell.source 

805 lines = content.split("\n") 

806 nbs = sum(1 + len(line) // 80 for line in lines) 

807 return nbs 

808 if kind == "raw": 

809 content = cell.source 

810 lines = content.split("\n") 

811 nbs = sum(1 + len(line) // 80 for line in lines) 

812 return nbs 

813 if kind == "code": 

814 content = cell.source 

815 lines = content.split("\n") 

816 nbl = len(lines) 

817 

818 for output in cell.outputs: 

819 if output["output_type"] == "execute_result" or \ 

820 output["output_type"] == "display_data": 

821 data = output["data"] 

822 for k, v in data.items(): 

823 if k == "text/plain": 

824 nbl += len(v.split("\n")) 

825 elif k == "application/javascript": 

826 # rough estimation 

827 nbl += len(v.split("\n")) // 2 

828 elif k == "application/json": 

829 # rough estimation 

830 try: 

831 nbl += len(v.split("{")) 

832 except AttributeError: # pragma: no cover 

833 nbl += len(v) // 5 + 1 

834 elif k == "image/svg+xml": 

835 nbl += len(v) // 5 

836 elif k == "text/html": 

837 nbl += len(v.split("\n")) 

838 elif k == "text/latex": 

839 nbl += len(v.split("\\\\")) * 2 

840 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: 

841 nbl += len(v) // 50 

842 elif k == "application/vnd.jupyter.widget-view+json": 

843 nbl += 5 

844 elif k in ("text/vnd.plotly.v1+html", 

845 "application/vnd.plotly.v1+json", 

846 "application/vnd.bokehjs_load.v0+json", 

847 "application/vnd.bokehjs_exec.v0+json"): 

848 nbl += 10 

849 else: 

850 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}" 

851 raise NotImplementedError( 

852 fmt.format(kind, k, v, cell)) 

853 elif output["output_type"] == "stream": 

854 v = output["text"] 

855 nbl += len(v.split("\n")) 

856 elif output["output_type"] == "error": 

857 v = output["traceback"] 

858 nbl += len(v) 

859 else: 

860 raise NotImplementedError( # pragma: no cover 

861 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" 

862 .format(kind, output["output_type"], output, cell)) 

863 

864 return nbl 

865 

866 raise NotImplementedError( # pragma: no cover 

867 "cell type: {0}\nCELL:\n{1}".format(kind, cell)) 

868 

869 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25): 

870 """ 

871 Tries to add tags for a slide show when they are too few. 

872 

873 @param max_nb_cell maximum number of cells within a slide 

874 @param max_nb_line maximum number of lines within a slide 

875 @return list of modified cells { #slide: (kind, reason, cell) } 

876 """ 

877 res = {} 

878 nbline = 0 

879 nbcell = 0 

880 for i, cell in enumerate(self.iter_cells()): 

881 meta = cell.metadata 

882 if "slideshow" in meta: 

883 st = meta["slideshow"]["slide_type"] 

884 if st in ["slide", "subslide"]: 

885 nbline = 0 

886 nbcell = 0 

887 else: 

888 if cell.cell_type == "markdown": 

889 content = cell.source 

890 if content.startswith("# ") or \ 

891 content.startswith("## ") or \ 

892 content.startswith("### "): 

893 meta["slideshow"] = {'slide_type': 'slide'} 

894 nbline = 0 

895 nbcell = 0 

896 res[i] = ("slide", "section", cell) 

897 

898 dh = self.cell_height(cell) 

899 dc = 1 

900 new_nbline = nbline + dh 

901 new_cell = dc + nbcell 

902 if "slideshow" not in meta: 

903 if new_cell > max_nb_cell or \ 

904 new_nbline > max_nb_line: 

905 res[i] = ( 

906 "subslide", "{0}-{1} <-> {2}-{3}".format(nbcell, nbline, dc, dh), cell) 

907 nbline = 0 

908 nbcell = 0 

909 meta["slideshow"] = {'slide_type': 'subslide'} 

910 

911 nbline += dh 

912 nbcell += dc 

913 

914 return res 

915 

916 def run_notebook(self, skip_exceptions=False, progress_callback=None, 

917 additional_path=None, valid=None, clean_function=None, 

918 context=None): 

919 ''' 

920 Runs all the cells of a notebook in order and update 

921 the outputs in-place. 

922 

923 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the 

924 subsequent cells are run (by default, the notebook execution stops). 

925 

926 @param skip_exceptions skip exception 

927 @param progress_callback call back function 

928 @param additional_path additional paths (as a list or None if none) 

929 @param valid if not None, valid is a function which returns whether 

930 or not the cell should be executed or not, if the function 

931 returns None, the execution of the notebooks and skip 

932 the execution of the other cells 

933 @param clean_function function which cleans a cell's code before executing 

934 it (None for None) 

935 @return dictionary with statistics 

936 

937 The function adds the local variable ``theNotebook`` with 

938 the absolute file name of the notebook. 

939 Function *valid* can return *None* to stop the execution of the notebook 

940 before this cell. 

941 ''' 

942 if self.detailed_log: 

943 self.detailed_log( 

944 "[run_notebook] Starting execution of '{0}'".format(self._filename)) 

945 # additional path 

946 if additional_path is not None: 

947 if not isinstance(additional_path, list): 

948 raise TypeError( # pragma: no cover 

949 "Additional_path should be a list not: " + str(additional_path)) 

950 code = ["import sys"] 

951 for p in additional_path: 

952 code.append("sys.path.append(r'{0}')".format(p)) 

953 cell = "\n".join(code) 

954 self.run_cell(-1, cell) 

955 

956 # we add local variable theNotebook 

957 if self.theNotebook is not None: 

958 cell = "theNotebook = r'''{0}'''".format(self.theNotebook) 

959 self.run_cell(-1, cell) 

960 

961 # initialisation with a code not inside the notebook 

962 if self.code_init is not None: 

963 self.run_cell(-1, self.code_init) 

964 

965 # execution of the notebook 

966 nbcell = 0 

967 nbrun = 0 

968 nbnerr = 0 

969 cl = time.perf_counter() 

970 for i, cell in enumerate(self.iter_code_cells()): 

971 nbcell += 1 

972 codei = NotebookRunner.get_cell_code(cell)[1] 

973 if valid is not None: 

974 r = valid(codei) 

975 if r is None: 

976 break 

977 if not r: 

978 continue 

979 try: 

980 nbrun += 1 

981 self.run_cell(i, cell, clean_function=clean_function) 

982 nbnerr += 1 

983 except Empty as er: 

984 raise RuntimeError( # pragma: no cover 

985 "{0}\nissue when executing:\n{1}".format(self.comment, codei)) from er 

986 except NotebookError as e: # pragma: no cover 

987 if not skip_exceptions: 

988 raise 

989 raise RuntimeError( 

990 "Issue when executing:\n{0}".format(codei)) from e 

991 if progress_callback: 

992 progress_callback(i) 

993 etime = time.perf_counter() - cl 

994 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime) 

995 if self.detailed_log: 

996 self.detailed_log( 

997 "[run_notebook] end execution of '{0}'".format(self._filename)) 

998 self.detailed_log( 

999 "[run_notebook] execution time: {0}".format(etime)) 

1000 self.detailed_log("[run_notebook] statistics : {0}".format(res)) 

1001 return res 

1002 

1003 def count_code_cells(self): 

1004 ''' 

1005 Returns the number of code cells in the notebook. 

1006 ''' 

1007 return sum(1 for _ in self.iter_code_cells()) 

1008 

1009 def merge_notebook(self, nb): 

1010 """ 

1011 Appends notebook *nb* to this one. 

1012 

1013 @param nb notebook or list of notebook (@see cl NotebookRunner) 

1014 @return number of added cells 

1015 

1016 .. faqref:: 

1017 :title: How to merge notebook? 

1018 

1019 The following code merges two notebooks into the first one 

1020 and stores the result unto a file. 

1021 

1022 :: 

1023 

1024 from pyquickhelper.ipythonhelper import read_nb 

1025 nb1 = read_nb("<file1>", kernel=False) 

1026 nb2 = read_nb("<file2>", kernel=False) 

1027 nb1.merge_notebook(nb2) 

1028 nb1.to_json(outfile) 

1029 """ 

1030 if isinstance(nb, list): 

1031 s = 0 

1032 for n in nb: 

1033 s += self.merge_notebook(n) 

1034 return s 

1035 else: 

1036 last = self._cell_container() 

1037 s = 0 

1038 for cell in nb.iter_cells(): 

1039 last.append(cell) 

1040 s += 1 

1041 return s 

1042 

1043 def get_description(self): 

1044 """ 

1045 Gets summary and description of this notebook. 

1046 We expect the first cell to contain a title and a description 

1047 of its content. 

1048 

1049 @return header, description 

1050 """ 

1051 def split_header(s, get_header=True): 

1052 s = s.lstrip().rstrip() 

1053 parts = s.splitlines() 

1054 if parts[0].startswith('#'): 

1055 if get_header: 

1056 header = re.sub('#+\\s*', '', parts.pop(0)) 

1057 if not parts: 

1058 return header, '' 

1059 else: 

1060 header = '' 

1061 rest = '\n'.join(parts).lstrip().split('\n\n') 

1062 desc = rest[0].replace('\n', ' ') 

1063 return header, desc 

1064 

1065 if get_header: 

1066 if parts[0].startswith(('=', '-')): 

1067 parts = parts[1:] 

1068 header = parts.pop(0) 

1069 if parts and parts[0].startswith(('=', '-')): 

1070 parts.pop(0) 

1071 if not parts: 

1072 return header, '' 

1073 else: 

1074 header = '' 

1075 rest = '\n'.join(parts).lstrip().split('\n\n') 

1076 desc = rest[0].replace('\n', ' ') 

1077 return header, desc 

1078 

1079 first_cell = self.first_cell() 

1080 

1081 if not first_cell['cell_type'] == 'markdown': 

1082 raise ValueError( # pragma: no cover 

1083 "The first cell is not in markdown but '{0}' filename='{1}'.".format( 

1084 first_cell['cell_type'], self._filename)) 

1085 

1086 header, desc = split_header(first_cell['source']) 

1087 if not desc and len(self.nb['cells']) > 1: 

1088 second_cell = self.nb['cells'][1] 

1089 if second_cell['cell_type'] == 'markdown': 

1090 _, desc = split_header(second_cell['source'], False) 

1091 

1092 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))" 

1093 reg = re.compile(reg_link) 

1094 new_desc = reg.sub("\\2", desc) 

1095 if "http://" in new_desc or "https://" in new_desc: 

1096 raise ValueError( # pragma: no cover 

1097 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format( 

1098 desc, new_desc, self._filename)) 

1099 return header, new_desc.replace('"', "") 

1100 

1101 def get_thumbnail(self, max_width=200, max_height=200, use_default=False): 

1102 """ 

1103 Processes the notebook and creates one picture based on the outputs 

1104 to illustrate a notebook. 

1105 

1106 @param max_width maximum size of the thumbnail 

1107 @param max_height maximum size of the thumbnail 

1108 @param use_default force using a default image even if an even is present 

1109 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`) 

1110 """ 

1111 images = [] 

1112 cells = list(self.iter_cells()) 

1113 cells.reverse() 

1114 for cell in cells: 

1115 c = self.cell_image(cell, False) 

1116 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in ( 

1117 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", 

1118 "vnd.bokehjs_load.v0+json"): 

1119 self._check_thumbnail_tuple(c) 

1120 images.append(c) 

1121 if not use_default and len(images) == 0: 

1122 for cell in cells: 

1123 c = self.cell_image(cell, True) 

1124 if c is not None and len(c) > 0 and len(c[0]) > 0: 

1125 self._check_thumbnail_tuple(c) 

1126 images.append(c) 

1127 if len(c[0]) >= 1000: 

1128 break 

1129 if use_default: 

1130 images = [] 

1131 if len(images) == 0: 

1132 # no image, we need to consider the default one 

1133 no_image = os.path.join( 

1134 os.path.dirname(__file__), 'no_image_nb.png') 

1135 with open(no_image, "rb") as f: 

1136 c = (f.read(), "png") 

1137 self._check_thumbnail_tuple(c) 

1138 images.append(c) 

1139 

1140 # select the image 

1141 if len(images) == 0: 

1142 raise ValueError( # pragma: no cover 

1143 "There should be at least one image.") 

1144 if len(images) == 1: 

1145 image = images[0] 

1146 else: 

1147 # maybe later we'll implement a different logic 

1148 # we pick the last one 

1149 image = images[0] 

1150 

1151 # zoom 

1152 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"): 

1153 return None 

1154 if image[1] == 'svg': 

1155 try: 

1156 img = svg2img(image[0]) 

1157 except PYQImageException: # pragma: no cover 

1158 # Enable to convert SVG. 

1159 return None 

1160 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height) 

1161 img = self._scale_image( 

1162 image[0], image[1], max_width=max_width, max_height=max_height) 

1163 return img 

1164 

1165 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200): 

1166 """ 

1167 Scales an image with the same aspect ratio centered in an 

1168 image with a given max_width and max_height. 

1169 

1170 @param in_bytes image as bytes 

1171 @param format indication of the format (can be empty) 

1172 @param max_width maximum size of the thumbnail 

1173 @param max_height maximum size of the thumbnail 

1174 @return Image (PIL) 

1175 """ 

1176 # local import to avoid testing dependency on PIL: 

1177 try: 

1178 from PIL import Image 

1179 except ImportError: # pragma: no cover 

1180 import Image 

1181 

1182 if isinstance(in_bytes, tuple): 

1183 in_bytes = in_bytes[0] 

1184 if isinstance(in_bytes, bytes): 

1185 img = Image.open(BytesIO(in_bytes)) 

1186 elif isinstance(in_bytes, Image.Image): 

1187 img = in_bytes 

1188 else: 

1189 raise TypeError( # pragma: no cover 

1190 "bytes expected, not {0} - format={1}".format( 

1191 type(in_bytes), format)) 

1192 width_in, height_in = img.size 

1193 scale_w = max_width / float(width_in) 

1194 scale_h = max_height / float(height_in) 

1195 

1196 if height_in * scale_w <= max_height: 

1197 scale = scale_w 

1198 else: 

1199 scale = scale_h 

1200 

1201 if scale >= 1.0: 

1202 return img 

1203 

1204 width_sc = int(round(scale * width_in)) 

1205 height_sc = int(round(scale * height_in)) 

1206 

1207 # resize the image and center 

1208 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS) 

1209 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255)) 

1210 pos_insert = ((max_width - width_sc) // 2, 

1211 (max_height - height_sc) // 2) 

1212 thumb.paste(img, pos_insert) 

1213 return thumb 

1214 

1215 def _merge_images(self, results): 

1216 """ 

1217 Merges images defined by (buffer, format). 

1218 The method uses PIL to merge images when possible. 

1219 

1220 @return ``[ (image, format) ]`` 

1221 """ 

1222 if len(results) == 1: 

1223 results = results[0] 

1224 self._check_thumbnail_tuple(results) 

1225 return results 

1226 if len(results) == 0: 

1227 return None 

1228 

1229 formats_counts = Counter(_[1] for _ in results) 

1230 if len(formats_counts) == 1: 

1231 format = results[0][1] 

1232 else: 

1233 items = sorted(((v, k) 

1234 for k, v in formats_counts.items()), reverse=False) 

1235 for it in items: 

1236 format = it 

1237 break 

1238 

1239 results = [_ for _ in results if _[1] == format] 

1240 if format == "svg": 

1241 return ("\n".join(_[0] for _ in results), format) 

1242 

1243 # local import to avoid testing dependency on PIL: 

1244 try: 

1245 from PIL import Image 

1246 except ImportError: # pragma: no cover 

1247 import Image 

1248 

1249 dx = 0. 

1250 dy = 0. 

1251 over = 0.7 

1252 imgs = [] 

1253 for in_bytes, _ in results: 

1254 img = Image.open(BytesIO(in_bytes)) 

1255 imgs.append(img) 

1256 dx = max(dx, img.size[0]) 

1257 dy += img.size[1] * over 

1258 

1259 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220)) 

1260 for img in imgs: 

1261 dy -= img.size[1] * over 

1262 new_im.paste(img, (0, max(int(dy), 0))) 

1263 

1264 if max(dx, dy) > 0: 

1265 image_buffer = BytesIO() 

1266 new_im.save(image_buffer, "PNG") 

1267 b = image_buffer.getvalue(), "png" 

1268 return b 

1269 b = None, "png" 

1270 return b