Coverage for pyquickhelper/ipythonhelper/notebook_runner.py: 88%
650 statements
« prev ^ index » next coverage.py v6.4.3, created at 2022-08-13 03:05 +0200
« prev ^ index » next coverage.py v6.4.3, created at 2022-08-13 03:05 +0200
1"""
2@file
3@brief Modified version of `runipy.notebook_runner
4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_.
5"""
7import base64
8import os
9import re
10import time
11import platform
12import warnings
13from queue import Empty
14from time import sleep
15from collections import Counter
16from io import StringIO, BytesIO
17import numpy
18from nbformat import NotebookNode, writes
19from nbformat.reader import reads
20from ..imghelper.svg_helper import svg2img, PYQImageException
21from ..loghelper.flog import noLOG
24class NotebookError(Exception):
25 """
26 Raised when the execution fails.
27 """
28 pass
31class NotebookKernelError(Exception):
32 """
33 Raised when
34 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/
35 jupyter_client/blocking/client.py#L84>`_ fails.
36 """
37 pass
40class NotebookRunner(object):
42 """
43 The kernel communicates with mime-types while the notebook
44 uses short labels for different cell types. We'll use this to
45 map from kernel types to notebook format types.
47 This classes executes a notebook end to end.
49 .. index:: kernel, notebook
51 The class can use different kernels. The next links gives more
52 information on how to create or test a kernel:
54 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_
55 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_
57 .. faqref::
58 :title: Do I need to shutdown the kernel after running a notebook?
60 .. index:: travis
62 If the class is instantiated with *kernel=True*, a kernel will
63 be started. It must be shutdown otherwise the program might
64 be waiting for it for ever. That is one of the reasons why the
65 travis build does not complete. The build finished but cannot terminate
66 until all kernels are shutdown.
67 """
69 # . available output types
70 MIME_MAP = {
71 'image/jpeg': 'jpeg',
72 'image/png': 'png',
73 'image/gif': 'gif',
74 'text/plain': 'text',
75 'text/html': 'html',
76 'text/latex': 'latex',
77 'application/javascript': 'html',
78 'image/svg+xml': 'svg',
79 }
81 def __init__(self, nb, profile_dir=None, working_dir=None,
82 comment="", fLOG=noLOG, theNotebook=None, code_init=None,
83 kernel_name="python", log_level="30", extended_args=None,
84 kernel=False, filename=None, replacements=None, detailed_log=None,
85 startup_timeout=300):
86 """
87 @param nb notebook as :epkg:`JSON`
88 @param profile_dir profile directory
89 @param working_dir working directory
90 @param comment additional information added to error message
91 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook
92 @param code_init to initialize the notebook with a python code as if it was a cell
93 @param fLOG logging function
94 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')
95 @param kernel_name kernel name, it can be None
96 @param extended_args others arguments to pass to the command line
97 (`--KernelManager.autorestar=True` for example),
98 see :ref:`l-ipython_notebook_args` for a full list
99 @param kernel *kernel* is True by default, the notebook can be run, if False,
100 the notebook can be read but not run
101 @param filename to add the notebook file if there is one in error messages
102 @param replacements replacements to make in every cell before running it,
103 dictionary ``{ string: string }``
104 @param detailed_log to log detailed information when executing the notebook, this should be a function
105 with the same signature as ``print`` or None
106 @param startup_timeout wait for this long for the kernel to be ready,
107 see `wait_for_ready
108 <https://github.com/jupyter/jupyter_client/blob/master/
109 jupyter_client/blocking/client.py#L84>`_
110 """
111 if kernel:
112 try:
113 from jupyter_client import KernelManager
114 except ImportError: # pragma: no cover
115 from ipykernel import KernelManager
117 with warnings.catch_warnings():
118 warnings.filterwarnings("ignore", category=DeprecationWarning)
119 self.km = KernelManager(
120 kernel_name=kernel_name) if kernel_name is not None else KernelManager()
121 else:
122 self.km = None
123 self.detailed_log = detailed_log
124 self.fLOG = fLOG
125 self.theNotebook = theNotebook
126 self.code_init = code_init
127 self._filename = filename if filename is not None else "memory"
128 self.replacements = replacements
129 self.init_args = dict(
130 profile_dir=profile_dir, working_dir=working_dir,
131 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init,
132 kernel_name="python", log_level="30", extended_args=None,
133 kernel=kernel, filename=filename, replacements=replacements)
134 args = []
136 if profile_dir:
137 args.append(f'--profile-dir={os.path.abspath(profile_dir)}')
138 if log_level:
139 args.append(f'--log-level={log_level}')
141 if extended_args is not None and len(extended_args) > 0:
142 for opt in extended_args:
143 if not opt.startswith("--"):
144 raise SyntaxError( # pragma: no cover
145 "every option should start with '--': " + opt)
146 if "=" not in opt:
147 raise SyntaxError( # pragma: no cover
148 "every option should be assigned a value: " + opt)
149 args.append(opt)
151 if kernel:
152 cwd = os.getcwd()
154 if working_dir:
155 os.chdir(working_dir)
157 if self.km is not None:
158 try:
159 with warnings.catch_warnings():
160 warnings.filterwarnings(
161 "ignore", category=ResourceWarning)
162 self.km.start_kernel(extra_arguments=args)
163 except Exception as e: # pragma: no cover
164 raise NotebookKernelError(
165 f"Failure with args: {args}\nand error:\n{str(e)}") from e
167 if platform.system() == 'Darwin':
168 # see http://www.pypedia.com/index.php/notebook_runner
169 # There is sometimes a race condition where the first
170 # execute command hits the kernel before it's ready.
171 # It appears to happen only on Darwin (Mac OS) and an
172 # easy (but clumsy) way to mitigate it is to sleep
173 # for a second.
174 sleep(1) # pragma: no cover
176 if working_dir:
177 os.chdir(cwd)
179 self.kc = self.km.client()
180 self.kc.start_channels(stdin=False)
181 try:
182 self.kc.wait_for_ready(timeout=startup_timeout)
183 except RuntimeError as e: # pragma: no cover
184 # We wait for one second.
185 sleep(startup_timeout)
186 self.kc.stop_channels()
187 self.km.shutdown_kernel()
188 self.km = None
189 self.kc = None
190 self.nb = nb
191 self.comment = comment
192 raise NotebookKernelError(
193 f"Wait_for_ready fails (timeout={startup_timeout}).") from e
194 else:
195 self.km = None
196 self.kc = None
197 self.nb = nb
198 self.comment = comment
200 def __del__(self):
201 """
202 We close the kernel.
203 """
204 if self.km is not None:
205 del self.km
206 if self.kc is not None:
207 del self.kc
209 def to_json(self, filename=None, encoding="utf8"):
210 """
211 Converts the notebook into :epkg:`JSON`.
213 @param filename filename or stream
214 @param encoding encoding
215 @return Json string if filename is None, None otherwise
216 """
217 if isinstance(filename, str):
218 with open(filename, "w", encoding=encoding) as payload:
219 self.to_json(payload)
220 return None
222 if filename is None:
223 st = StringIO()
224 st.write(writes(self.nb))
225 return st.getvalue()
227 filename.write(writes(self.nb))
228 return None
230 def copy(self):
231 """
232 Copies the notebook (just the content).
234 @return instance of @see cl NotebookRunner
235 """
236 st = StringIO()
237 self.to_json(st)
238 args = self.init_args.copy()
239 for name in ["theNotebook", "filename"]:
240 if name in args:
241 del args[name]
242 nb = reads(st.getvalue())
243 return NotebookRunner(nb, **args)
245 def __add__(self, nb):
246 """
247 Merges two notebooks together, returns a new none.
249 @param nb notebook
250 @return new notebook
251 """
252 c = self.copy()
253 c.merge_notebook(nb)
254 return c
256 def shutdown_kernel(self):
257 """
258 Shuts down kernel.
259 """
260 self.fLOG('-- shutdown kernel')
261 if self.kc is None:
262 raise ValueError( # pragma: no cover
263 "No kernel was started, specify kernel=True when initializing the instance.")
264 self.kc.stop_channels()
265 self.km.shutdown_kernel(now=True)
267 def clean_code(self, code):
268 """
269 Cleans the code before running it, the function comment out
270 instruction such as ``show()``.
272 @param code code (string)
273 @return cleaned code
274 """
275 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code
276 if code is None:
277 return code
279 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")]
280 res = []
281 show_is_last = False
282 for line in lines:
283 if line.replace(" ", "") == "show()":
284 line = line.replace("show", "#show")
285 show_is_last = True
286 elif has_bokeh and line.replace(" ", "") == "output_notebook()":
287 line = line.replace("output_notebook", "#output_notebook")
288 else:
289 show_is_last = False
290 if self.replacements is not None:
291 for k, v in self.replacements.items():
292 line = line.replace(k, v)
293 res.append(line)
294 if show_is_last:
295 res.append('"nothing to show"')
296 return "\n".join(res)
298 @staticmethod
299 def get_cell_code(cell):
300 """
301 Returns the code of a cell.
303 @param cell a cell or a string
304 @return boolean (=iscell), string
305 """
306 if isinstance(cell, str):
307 iscell = False
308 return iscell, cell
310 iscell = True
311 try:
312 return iscell, cell.source
313 except AttributeError: # pragma: no cover
314 return iscell, cell.input
316 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15):
317 '''
318 Runs a notebook cell and update the output of that cell inplace.
320 :param index_cell: index of the cell
321 :param cell: cell to execute
322 :param clean_function: cleaning function to apply to the code before running it
323 :param max_nbissue: number of times an issue can be raised before stopping
324 :return: output of the cell
325 '''
326 if self.detailed_log:
327 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format(
328 index_cell, clean_function))
329 iscell, codei = NotebookRunner.get_cell_code(cell)
331 self.fLOG(f'-- running cell:\n{codei}\n')
332 if self.detailed_log:
333 self.detailed_log(
334 '[run_cell] code=\n {0}'.format(
335 "\n ".join(codei.split("\n"))))
337 code = self.clean_code(codei)
338 if clean_function is not None:
339 code = clean_function(code)
340 if self.detailed_log:
341 self.detailed_log(
342 ' cleaned code=\n {0}'.format(
343 "\n ".join(code.split("\n"))))
344 if len(code) == 0:
345 return ""
346 if self.kc is None:
347 raise ValueError( # pragma: no cover
348 "No kernel was started, specify kernel=True when initializing the instance.")
349 self.kc.execute(code)
351 reply = self.kc.get_shell_msg()
352 reason = None
353 try:
354 status = reply['content']['status']
355 except KeyError: # pragma: no cover
356 status = 'error'
357 reason = "no status key in reply['content']"
359 if status == 'error': # pragma: no cover
360 ansi_escape = re.compile(r'\x1b[^m]*m')
361 try:
362 tr = [ansi_escape.sub('', _)
363 for _ in reply['content']['traceback']]
364 except KeyError: # pragma: no cover
365 tr = (["No traceback, available keys in reply['content']"] +
366 list(reply['content']))
367 traceback_text = '\n'.join(tr)
368 self.fLOG("[nberror]\n", traceback_text)
369 if self.detailed_log:
370 self.detailed_log( # pragma: no cover
371 '[run_cell] ERROR=\n {0}'.format(
372 "\n ".join(traceback_text.split("\n"))))
373 else:
374 traceback_text = ''
375 self.fLOG('-- cell returned')
377 outs = list()
378 nbissue = 0
379 statuses = [status]
380 while True:
381 try:
382 msg = self.kc.get_iopub_msg(timeout=1)
383 if msg['msg_type'] == 'status':
384 if msg['content']['execution_state'] == 'idle':
385 status = 'ok'
386 statuses.append(status)
387 break
388 statuses.append(status)
389 except Empty as e: # pragma: no cover
390 # execution state should return to idle before
391 # the queue becomes empty,
392 # if it doesn't, something bad has happened
393 status = "error"
394 statuses.append(status)
395 reason = f"exception Empty was raised ({e!r})"
396 nbissue += 1
397 if nbissue > max_nbissue:
398 # the notebook is empty
399 return ""
400 else:
401 continue
403 content = msg['content']
404 msg_type = msg['msg_type']
405 if self.detailed_log:
406 self.detailed_log(f' msg_type={msg_type}')
408 out = NotebookNode(output_type=msg_type, metadata=dict())
410 if 'execution_count' in content:
411 if iscell:
412 cell['execution_count'] = content['execution_count']
413 out.execution_count = content['execution_count']
415 if msg_type in ('status', 'pyin', 'execute_input'):
416 continue
418 if msg_type == 'stream':
419 out.name = content['name']
420 # in msgspec 5, this is name, text
421 # in msgspec 4, this is name, data
422 if 'text' in content:
423 out.text = content['text']
424 else:
425 out.data = content['data']
427 elif msg_type in ('display_data', 'pyout', 'execute_result'):
428 out.data = content['data']
430 elif msg_type in ('pyerr', 'error'):
431 out.ename = content['ename']
432 out.evalue = content['evalue']
433 out.traceback = content['traceback']
434 out.name = 'stderr'
436 elif msg_type == 'clear_output':
437 outs = list()
438 continue
440 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'):
441 # widgets in a notebook
442 out.data = content["data"]
443 out.comm_id = content["comm_id"]
445 else:
446 dcontent = "\n".join(f"{k}={v}"
447 for k, v in sorted(content.items()))
448 raise NotImplementedError( # pragma: no cover
449 f"Unhandled iopub message: '{msg_type}'\n--CONTENT--\n{dcontent}")
451 outs.append(out)
452 if self.detailed_log:
453 self.detailed_log(f' out={type(out)}')
454 if hasattr(out, "data"):
455 self.detailed_log(f' out={out.data}')
457 if iscell:
458 cell['outputs'] = outs
460 raw = []
461 for _ in outs:
462 try:
463 t = _.data
464 except AttributeError:
465 continue
467 # see MIMEMAP to see the available output type
468 for k, v in t.items():
469 if k.startswith("text"):
470 raw.append(v)
472 sraw = "\n".join(raw)
473 self.fLOG(sraw)
474 if self.detailed_log:
475 self.detailed_log(' sraw=\n {0}'.format(
476 "\n ".join(sraw.split("\n"))))
478 def reply2string(reply):
479 sreply = []
480 for k, v in sorted(reply.items()):
481 if isinstance(v, dict):
482 temp = []
483 for _, __ in sorted(v.items()):
484 temp.append(f" [{_}]={str(__)}")
485 v_ = "\n".join(temp)
486 sreply.append(f"reply['{k}']=dict\n{v_}")
487 else:
488 sreply.append(f"reply['{k}']={str(v)}")
489 sreply = "\n".join(sreply)
490 return sreply
492 if status == 'error': # pragma: no cover
493 sreply = reply2string(reply)
494 if len(code) < 5:
495 scode = [code]
496 else:
497 scode = ""
498 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} "
499 "length={5} -- {6}:\n-----------------\n"
500 "content={12}\nmsg_type: {13} nbissue={14}"
501 "\nstatuses={15}"
502 "\n-----------------\n{0}"
503 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}")
504 raise NotebookError(mes.format(
505 code, traceback_text, sraw, sreply, index_cell, # 0-4
506 len(code), scode, self.comment, status, reason, # 5-9
507 self._filename, index_cell, content, msg_type, nbissue, # 10-14
508 statuses)) # 15
509 if self.detailed_log:
510 self.detailed_log(f'[run_cell] status={status}')
511 return outs
513 def to_python(self):
514 """
515 Converts the notebook into python.
517 @return string
518 """
519 rows = []
520 for cell in self.iter_cells():
521 if cell.cell_type == "code":
522 codei = NotebookRunner.get_cell_code(cell)[1]
523 rows.append(codei)
524 elif cell.cell_type in ("markdown", "raw"):
525 content = cell.source
526 lines = content.split("\n")
527 for line in lines:
528 if line.startswith("#"):
529 rows.append("###")
530 rows.append(line)
531 else:
532 rows.append("# " + line)
533 else:
534 # No text, no code.
535 rows.append(f"# cell.type = {cell.cell_type}")
536 rows.append("")
537 return "\n".join(rows)
539 def iter_code_cells(self):
540 '''
541 Iterates over the notebook cells containing code.
542 '''
543 for cell in self.iter_cells():
544 if cell.cell_type == 'code':
545 yield cell
547 def iter_cells(self):
548 '''
549 Iterates over the notebook cells.
550 '''
551 if hasattr(self.nb, "worksheets"): # pragma: no cover
552 for ws in self.nb.worksheets:
553 for cell in ws.cells:
554 yield cell
555 else:
556 for cell in self.nb.cells:
557 yield cell
559 def first_cell(self):
560 """
561 Returns the first cell.
562 """
563 for cell in self.iter_cells():
564 return cell
566 def _cell_container(self):
567 """
568 Returns a cells container, it may change according to the format.
570 @return cell container
571 """
572 if hasattr(self.nb, "worksheets"): # pragma: no cover
573 last = None
574 for ws in self.nb.worksheets:
575 last = ws
576 if last is None:
577 raise NotebookError("no cell container") # pragma: no cover
578 return last.cells
579 return self.nb.cells
581 def __len__(self):
582 """
583 Returns the number of cells, it iterates on cells
584 to get this information and does cache the information.
586 @return int
587 """
588 return sum(1 for _ in self.iter_cells())
590 def cell_type(self, cell):
591 """
592 Returns the cell type.
594 @param cell from @see me iter_cells
595 @return type
596 """
597 return cell.cell_type
599 def cell_metadata(self, cell):
600 """
601 Returns the cell metadata.
603 @param cell cell
604 @return metadata
605 """
606 return cell.metadata
608 def _check_thumbnail_tuple(self, b):
609 """
610 Checks types for a thumbnail.
612 @param b tuple image, format
613 @return b
615 The function raises an exception if the type is incorrect.
616 """
617 if not isinstance(b, tuple):
618 raise TypeError( # pragma: no cover
619 f"tuple expected, not {type(b)}")
620 if len(b) != 2:
621 raise TypeError( # pragma: no cover
622 f"tuple expected of lengh 2, not {len(b)}")
623 if b[1] == "svg":
624 if not isinstance(b[0], str):
625 raise TypeError( # pragma: no cover
626 f"str expected for svg, not {type(b[0])}")
627 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
628 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'):
629 # Don't know how to extract a snippet out of this.
630 pass
631 else:
632 if not isinstance(b[0], bytes):
633 raise TypeError( # pragma: no cover
634 f"bytes expected for images, not {type(b[0])}-'{b[1]}'\n{b}")
635 return b
637 def create_picture_from(self, text, format, asbytes=True, context=None):
638 """
639 Creates a picture from text.
641 @param text the text
642 @param format text, json, ...
643 @param context (str) indication on the content of text (error, ...)
644 @param asbytes results as bytes or as an image
645 @return tuple (picture, format) or PIL.Image (if asbytes is False)
647 The picture will be bytes, the format png, bmp...
648 The size of the picture will depend on the text.
649 The longer, the bigger. The method relies on matplotlib
650 and then convert the image into a PIL image.
652 HTML could be rendered with QWebPage from PyQt (not implemented).
653 """
654 if not isinstance(text, (str, bytes)):
655 text = str(text)
656 if "\n" not in text:
657 rows = []
658 for i in range(0, len(text), 20):
659 end = min(i + 20, len(text))
660 rows.append(text[i:end])
661 text = "\n".join(text)
662 if len(text) > 200:
663 text = text[:200]
664 size = len(text) // 10
665 figsize = (3 + size, 3 + size)
666 lines = text.replace("\t", " ").replace("\r", "").split("\n")
668 import matplotlib.pyplot as plt
669 from matplotlib.textpath import TextPath
670 from matplotlib.font_manager import FontProperties
671 fig = plt.figure(figsize=figsize)
672 ax = fig.add_subplot(111)
673 fp = FontProperties(size=200)
675 dx = 0
676 dy = 0
677 for i, line in enumerate(lines):
678 if len(line.strip()) > 0:
679 ax.text(0, -dy, line, fontproperties=fp, va='top')
680 tp = TextPath((0, -dy), line, prop=fp)
681 bb = tp.get_extents()
682 dy += bb.height
683 dx = max(dx, bb.width)
685 ratio = abs(dx) * 1. / max(abs(dy), 1)
686 ratio = max(min(ratio, 3), 1)
687 fig.set_size_inches(int((1 + size) * ratio), 1 + size)
688 try:
689 ax.set_xlim(numpy.array([0., dx]))
690 ax.set_ylim(numpy.array([-dy, 0.]))
691 except TypeError as e:
692 warnings.warn(f"[create_picture_from] {e}")
693 ax.set_axis_off()
694 sio = BytesIO()
695 fig.savefig(sio, format="png")
696 plt.close()
698 if asbytes:
699 b = sio.getvalue(), "png"
700 self._check_thumbnail_tuple(b)
701 return b
702 try:
703 from PIL import Image
704 except ImportError: # pragma: no cover
705 import Image
706 img = Image.open(sio)
707 return img
709 def cell_image(self, cell, image_from_text=False):
710 """
711 Returns the cell image or None if not found.
713 @param cell cell to examine
714 @param image_from_text produce an image even if it is not one
715 @return None for no image or a list of tuple (image as bytes, extension)
716 for each output of the cell
717 """
718 kind = self.cell_type(cell)
719 if kind != "code":
720 return None
721 results = []
722 for output in cell.outputs:
723 if output["output_type"] in {"execute_result", "display_data"}:
724 data = output["data"]
725 for k, v in data.items():
726 if k == "text/plain":
727 if image_from_text:
728 b = self.create_picture_from(
729 v, "text", context=output["output_type"])
730 results.append(b)
731 elif k == "application/javascript":
732 if image_from_text:
733 b = self.create_picture_from(v, "js")
734 results.append(b)
735 elif k == "application/json": # pragma: no cover
736 if image_from_text:
737 b = self.create_picture_from(v, "json")
738 results.append(b)
739 elif k == "image/svg+xml":
740 if not isinstance(v, str):
741 raise TypeError( # pragma: no cover
742 f"This should be str not '{type(v)}' (=SVG).")
743 results.append((v, "svg"))
744 elif k == "text/html":
745 if image_from_text:
746 b = self.create_picture_from(v, "html")
747 results.append(b)
748 elif k == "text/latex":
749 if image_from_text: # pragma: no cover
750 b = self.create_picture_from(v, "latex")
751 results.append(b)
752 elif k == "application/vnd.jupyter.widget-view+json":
753 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html
754 if "model_id" not in v:
755 raise KeyError( # pragma: no cover
756 f"model_id is missing from {v}")
757 model_id = v["model_id"]
758 self.fLOG(
759 "[application/vnd.jupyter.widget-view+json] not rendered", model_id)
760 elif k == "application/vnd.jupyter.widget-state+json":
761 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html
762 if "model_id" not in v:
763 raise KeyError( # pragma: no cover
764 f"model_id is missing from {v}")
765 model_id = v["model_id"]
766 self.fLOG(
767 "[application/vnd.jupyter.widget-state+json] not rendered", model_id)
768 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
769 if not isinstance(v, bytes):
770 v = base64.b64decode(v)
771 if not isinstance(v, bytes):
772 raise TypeError( # pragma: no cover
773 f"This should be bytes not '{type(v)}' (=IMG:{k}).")
774 results.append((v, k.split("/")[-1]))
775 elif k in ("text/vnd.plotly.v1+html",
776 "application/vnd.plotly.v1+json",
777 "application/vnd.bokehjs_exec.v0+json",
778 "application/vnd.bokehjs_load.v0+json"):
779 results.append((v, k.split("/")[-1]))
780 else:
781 raise NotImplementedError( # pragma: no cover
782 f"cell type: {kind}\nk={k}\nv={v}\nCELL:\n{cell}")
783 elif output["output_type"] == "error":
784 vl = output["traceback"]
785 if image_from_text:
786 for v in vl:
787 b = self.create_picture_from(
788 v, "text", context="error")
789 results.append(b)
790 elif output["output_type"] == "stream":
791 v = output["text"]
792 if image_from_text:
793 b = self.create_picture_from(v, "text")
794 results.append(b)
795 else:
796 raise NotImplementedError( # pragma: no cover
797 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
798 "".format(kind, output["output_type"], output, cell))
799 if len(results) > 0:
800 res = self._merge_images(results)
801 if res[0] is None:
802 return None
803 self._check_thumbnail_tuple(res)
804 return res
805 return None
807 def cell_height(self, cell):
808 """
809 Approximates the height of a cell by its number of lines it contains.
811 @param cell cell
812 @return number of cell
813 """
814 kind = self.cell_type(cell)
815 if kind == "markdown":
816 content = cell.source
817 lines = content.split("\n")
818 nbs = sum(1 + len(line) // 80 for line in lines)
819 return nbs
820 if kind == "raw":
821 content = cell.source
822 lines = content.split("\n")
823 nbs = sum(1 + len(line) // 80 for line in lines)
824 return nbs
825 if kind == "code":
826 content = cell.source
827 lines = content.split("\n")
828 nbl = len(lines)
830 for output in cell.outputs:
831 if output["output_type"] == "execute_result" or \
832 output["output_type"] == "display_data":
833 data = output["data"]
834 for k, v in data.items():
835 if k == "text/plain":
836 nbl += len(v.split("\n"))
837 elif k == "application/javascript":
838 # rough estimation
839 nbl += len(v.split("\n")) // 2
840 elif k == "application/json":
841 # rough estimation
842 try:
843 nbl += len(v.split("{"))
844 except AttributeError: # pragma: no cover
845 nbl += len(v) // 5 + 1
846 elif k == "image/svg+xml":
847 nbl += len(v) // 5
848 elif k == "text/html":
849 nbl += len(v.split("\n"))
850 elif k == "text/latex":
851 nbl += len(v.split("\\\\")) * 2
852 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
853 nbl += len(v) // 50
854 elif k == "application/vnd.jupyter.widget-view+json":
855 nbl += 5
856 elif k == "application/vnd.jupyter.widget-state+json":
857 nbl += 5
858 elif k in ("text/vnd.plotly.v1+html",
859 "application/vnd.plotly.v1+json",
860 "application/vnd.bokehjs_load.v0+json",
861 "application/vnd.bokehjs_exec.v0+json"):
862 nbl += 10
863 else:
864 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}"
865 raise NotImplementedError(
866 fmt.format(kind, k, v, cell))
867 elif output["output_type"] == "stream":
868 v = output["text"]
869 nbl += len(v.split("\n"))
870 elif output["output_type"] == "error":
871 v = output["traceback"]
872 nbl += len(v)
873 else:
874 raise NotImplementedError( # pragma: no cover
875 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
876 .format(kind, output["output_type"], output, cell))
878 return nbl
880 raise NotImplementedError( # pragma: no cover
881 f"cell type: {kind}\nCELL:\n{cell}")
883 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25):
884 """
885 Tries to add tags for a slide show when they are too few.
887 @param max_nb_cell maximum number of cells within a slide
888 @param max_nb_line maximum number of lines within a slide
889 @return list of modified cells { #slide: (kind, reason, cell) }
890 """
891 res = {}
892 nbline = 0
893 nbcell = 0
894 for i, cell in enumerate(self.iter_cells()):
895 meta = cell.metadata
896 if "slideshow" in meta:
897 st = meta["slideshow"]["slide_type"]
898 if st in ["slide", "subslide"]:
899 nbline = 0
900 nbcell = 0
901 else:
902 if cell.cell_type == "markdown":
903 content = cell.source
904 if content.startswith("# ") or \
905 content.startswith("## ") or \
906 content.startswith("### "):
907 meta["slideshow"] = {'slide_type': 'slide'}
908 nbline = 0
909 nbcell = 0
910 res[i] = ("slide", "section", cell)
912 dh = self.cell_height(cell)
913 dc = 1
914 new_nbline = nbline + dh
915 new_cell = dc + nbcell
916 if "slideshow" not in meta:
917 if new_cell > max_nb_cell or \
918 new_nbline > max_nb_line:
919 res[i] = (
920 "subslide", f"{nbcell}-{nbline} <-> {dc}-{dh}", cell)
921 nbline = 0
922 nbcell = 0
923 meta["slideshow"] = {'slide_type': 'subslide'}
925 nbline += dh
926 nbcell += dc
928 return res
930 def run_notebook(self, skip_exceptions=False, progress_callback=None,
931 additional_path=None, valid=None, clean_function=None,
932 context=None):
933 '''
934 Runs all the cells of a notebook in order and update
935 the outputs in-place.
937 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the
938 subsequent cells are run (by default, the notebook execution stops).
940 @param skip_exceptions skip exception
941 @param progress_callback call back function
942 @param additional_path additional paths (as a list or None if none)
943 @param valid if not None, valid is a function which returns whether
944 or not the cell should be executed or not, if the function
945 returns None, the execution of the notebooks and skip
946 the execution of the other cells
947 @param clean_function function which cleans a cell's code before executing
948 it (None for None)
949 @return dictionary with statistics
951 The function adds the local variable ``theNotebook`` with
952 the absolute file name of the notebook.
953 Function *valid* can return *None* to stop the execution of the notebook
954 before this cell.
955 '''
956 if self.detailed_log:
957 self.detailed_log(
958 f"[run_notebook] Starting execution of '{self._filename}'")
959 # additional path
960 if additional_path is not None:
961 if not isinstance(additional_path, list):
962 raise TypeError( # pragma: no cover
963 "Additional_path should be a list not: " + str(additional_path))
964 code = ["import sys"]
965 for p in additional_path:
966 code.append(f"sys.path.append(r'{p}')")
967 cell = "\n".join(code)
968 self.run_cell(-1, cell)
970 # we add local variable theNotebook
971 if self.theNotebook is not None:
972 cell = f"theNotebook = r'''{self.theNotebook}'''"
973 self.run_cell(-1, cell)
975 # initialisation with a code not inside the notebook
976 if self.code_init is not None:
977 self.run_cell(-1, self.code_init)
979 # execution of the notebook
980 nbcell = 0
981 nbrun = 0
982 nbnerr = 0
983 cl = time.perf_counter()
984 for i, cell in enumerate(self.iter_code_cells()):
985 nbcell += 1
986 codei = NotebookRunner.get_cell_code(cell)[1]
987 if valid is not None:
988 r = valid(codei)
989 if r is None:
990 break
991 if not r:
992 continue
993 try:
994 nbrun += 1
995 self.run_cell(i, cell, clean_function=clean_function)
996 nbnerr += 1
997 except Empty as er: # pragma: no cover
998 raise RuntimeError(
999 f"{self.comment}\nissue when executing:\n{codei}") from er
1000 except NotebookError as e: # pragma: no cover
1001 if not skip_exceptions:
1002 raise
1003 raise RuntimeError(
1004 f"Issue when executing:\n{codei}") from e
1005 if progress_callback:
1006 progress_callback(i)
1007 etime = time.perf_counter() - cl
1008 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime)
1009 if self.detailed_log:
1010 self.detailed_log(
1011 f"[run_notebook] end execution of '{self._filename}'")
1012 self.detailed_log(
1013 f"[run_notebook] execution time: {etime}")
1014 self.detailed_log(f"[run_notebook] statistics : {res}")
1015 return res
1017 def count_code_cells(self):
1018 '''
1019 Returns the number of code cells in the notebook.
1020 '''
1021 return sum(1 for _ in self.iter_code_cells())
1023 def merge_notebook(self, nb):
1024 """
1025 Appends notebook *nb* to this one.
1027 @param nb notebook or list of notebook (@see cl NotebookRunner)
1028 @return number of added cells
1030 .. faqref::
1031 :title: How to merge notebook?
1033 The following code merges two notebooks into the first one
1034 and stores the result unto a file.
1036 ::
1038 from pyquickhelper.ipythonhelper import read_nb
1039 nb1 = read_nb("<file1>", kernel=False)
1040 nb2 = read_nb("<file2>", kernel=False)
1041 nb1.merge_notebook(nb2)
1042 nb1.to_json(outfile)
1043 """
1044 if isinstance(nb, list):
1045 s = 0
1046 for n in nb:
1047 s += self.merge_notebook(n)
1048 return s
1049 else:
1050 last = self._cell_container()
1051 s = 0
1052 for cell in nb.iter_cells():
1053 last.append(cell)
1054 s += 1
1055 return s
1057 def get_description(self):
1058 """
1059 Gets summary and description of this notebook.
1060 We expect the first cell to contain a title and a description
1061 of its content.
1063 @return header, description
1064 """
1065 def split_header(s, get_header=True):
1066 s = s.lstrip().rstrip()
1067 parts = s.splitlines()
1068 if parts[0].startswith('#'):
1069 if get_header:
1070 header = re.sub('#+\\s*', '', parts.pop(0))
1071 if not parts:
1072 return header, ''
1073 else:
1074 header = ''
1075 rest = '\n'.join(parts).lstrip().split('\n\n')
1076 desc = rest[0].replace('\n', ' ')
1077 return header, desc
1079 if get_header:
1080 if parts[0].startswith(('=', '-')):
1081 parts = parts[1:]
1082 header = parts.pop(0)
1083 if parts and parts[0].startswith(('=', '-')):
1084 parts.pop(0)
1085 if not parts:
1086 return header, ''
1087 else:
1088 header = ''
1089 rest = '\n'.join(parts).lstrip().split('\n\n')
1090 desc = rest[0].replace('\n', ' ')
1091 return header, desc
1093 first_cell = self.first_cell()
1095 if not first_cell['cell_type'] == 'markdown':
1096 raise ValueError( # pragma: no cover
1097 "The first cell is not in markdown but '{0}' filename='{1}'.".format(
1098 first_cell['cell_type'], self._filename))
1100 header, desc = split_header(first_cell['source'])
1101 if not desc and len(self.nb['cells']) > 1:
1102 second_cell = self.nb['cells'][1]
1103 if second_cell['cell_type'] == 'markdown':
1104 _, desc = split_header(second_cell['source'], False)
1106 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))"
1107 reg = re.compile(reg_link)
1108 new_desc = reg.sub("\\2", desc)
1109 if "http://" in new_desc or "https://" in new_desc:
1110 raise ValueError( # pragma: no cover
1111 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format(
1112 desc, new_desc, self._filename))
1113 return header, new_desc.replace('"', "")
1115 def get_thumbnail(self, max_width=200, max_height=200, use_default=False):
1116 """
1117 Processes the notebook and creates one picture based on the outputs
1118 to illustrate a notebook.
1120 @param max_width maximum size of the thumbnail
1121 @param max_height maximum size of the thumbnail
1122 @param use_default force using a default image even if an even is present
1123 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`)
1124 """
1125 images = []
1126 cells = list(self.iter_cells())
1127 cells.reverse()
1128 for cell in cells:
1129 c = self.cell_image(cell, False)
1130 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in (
1131 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
1132 "vnd.bokehjs_load.v0+json"):
1133 self._check_thumbnail_tuple(c)
1134 images.append(c)
1135 if not use_default and len(images) == 0:
1136 for cell in cells:
1137 c = self.cell_image(cell, True)
1138 if c is not None and len(c) > 0 and len(c[0]) > 0:
1139 self._check_thumbnail_tuple(c)
1140 images.append(c)
1141 if len(c[0]) >= 1000:
1142 break
1143 if use_default:
1144 images = []
1145 if len(images) == 0:
1146 # no image, we need to consider the default one
1147 no_image = os.path.join(
1148 os.path.dirname(__file__), 'no_image_nb.png')
1149 with open(no_image, "rb") as f:
1150 c = (f.read(), "png")
1151 self._check_thumbnail_tuple(c)
1152 images.append(c)
1154 # select the image
1155 if len(images) == 0:
1156 raise ValueError( # pragma: no cover
1157 "There should be at least one image.")
1158 if len(images) == 1:
1159 image = images[0]
1160 else:
1161 # maybe later we'll implement a different logic
1162 # we pick the last one
1163 image = images[0]
1165 # zoom
1166 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"):
1167 return None
1168 if image[1] == 'svg':
1169 try:
1170 img = svg2img(image[0])
1171 except PYQImageException: # pragma: no cover
1172 # Enable to convert SVG.
1173 return None
1174 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height)
1175 img = self._scale_image(
1176 image[0], image[1], max_width=max_width, max_height=max_height)
1177 return img
1179 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200):
1180 """
1181 Scales an image with the same aspect ratio centered in an
1182 image with a given max_width and max_height.
1184 @param in_bytes image as bytes
1185 @param format indication of the format (can be empty)
1186 @param max_width maximum size of the thumbnail
1187 @param max_height maximum size of the thumbnail
1188 @return Image (PIL)
1189 """
1190 # local import to avoid testing dependency on PIL:
1191 try:
1192 from PIL import Image
1193 except ImportError: # pragma: no cover
1194 import Image
1196 if isinstance(in_bytes, tuple):
1197 in_bytes = in_bytes[0]
1198 if isinstance(in_bytes, bytes):
1199 img = Image.open(BytesIO(in_bytes))
1200 elif isinstance(in_bytes, Image.Image):
1201 img = in_bytes
1202 else:
1203 raise TypeError( # pragma: no cover
1204 f"bytes expected, not {type(in_bytes)} - format={format}")
1205 width_in, height_in = img.size
1206 scale_w = max_width / float(width_in)
1207 scale_h = max_height / float(height_in)
1209 if height_in * scale_w <= max_height:
1210 scale = scale_w
1211 else:
1212 scale = scale_h
1214 if scale >= 1.0:
1215 return img
1217 width_sc = int(round(scale * width_in))
1218 height_sc = int(round(scale * height_in))
1220 # resize the image and center
1221 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
1222 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
1223 pos_insert = ((max_width - width_sc) // 2,
1224 (max_height - height_sc) // 2)
1225 thumb.paste(img, pos_insert)
1226 return thumb
1228 def _merge_images(self, results):
1229 """
1230 Merges images defined by (buffer, format).
1231 The method uses PIL to merge images when possible.
1233 @return ``[ (image, format) ]``
1234 """
1235 if len(results) == 1:
1236 results = results[0]
1237 self._check_thumbnail_tuple(results)
1238 return results
1239 if len(results) == 0:
1240 return None
1242 formats_counts = Counter(_[1] for _ in results)
1243 if len(formats_counts) == 1:
1244 format = results[0][1]
1245 else:
1246 items = sorted(((v, k)
1247 for k, v in formats_counts.items()), reverse=False)
1248 for it in items:
1249 format = it
1250 break
1252 results = [_ for _ in results if _[1] == format]
1253 if format == "svg":
1254 return ("\n".join(_[0] for _ in results), format)
1256 # local import to avoid testing dependency on PIL:
1257 try:
1258 from PIL import Image
1259 except ImportError: # pragma: no cover
1260 import Image
1262 dx = 0.
1263 dy = 0.
1264 over = 0.7
1265 imgs = []
1266 for in_bytes, _ in results:
1267 img = Image.open(BytesIO(in_bytes))
1268 imgs.append(img)
1269 dx = max(dx, img.size[0])
1270 dy += img.size[1] * over
1272 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220))
1273 for img in imgs:
1274 dy -= img.size[1] * over
1275 new_im.paste(img, (0, max(int(dy), 0)))
1277 if max(dx, dy) > 0:
1278 image_buffer = BytesIO()
1279 new_im.save(image_buffer, "PNG")
1280 b = image_buffer.getvalue(), "png"
1281 return b
1282 b = None, "png"
1283 return b