Coverage for pyquickhelper/ipythonhelper/notebook_runner.py: 88%
651 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1"""
2@file
3@brief Modified version of `runipy.notebook_runner
4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_.
5"""
7import base64
8import os
9import re
10import time
11import platform
12import warnings
13from queue import Empty
14from time import sleep
15from collections import Counter
16from io import StringIO, BytesIO
17import numpy
18from nbformat import NotebookNode, writes
19from nbformat.reader import reads
20from ..imghelper.svg_helper import svg2img, PYQImageException
21from ..loghelper.flog import noLOG
24class NotebookError(Exception):
25 """
26 Raised when the execution fails.
27 """
28 pass
31class NotebookKernelError(Exception):
32 """
33 Raised when
34 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/
35 jupyter_client/blocking/client.py#L84>`_ fails.
36 """
37 pass
40class NotebookRunner(object):
42 """
43 The kernel communicates with mime-types while the notebook
44 uses short labels for different cell types. We'll use this to
45 map from kernel types to notebook format types.
47 This classes executes a notebook end to end.
49 .. index:: kernel, notebook
51 The class can use different kernels. The next links gives more
52 information on how to create or test a kernel:
54 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_
55 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_
57 .. faqref::
58 :title: Do I need to shutdown the kernel after running a notebook?
60 .. index:: travis
62 If the class is instantiated with *kernel=True*, a kernel will
63 be started. It must be shutdown otherwise the program might
64 be waiting for it for ever. That is one of the reasons why the
65 travis build does not complete. The build finished but cannot terminate
66 until all kernels are shutdown.
67 """
69 # . available output types
70 MIME_MAP = {
71 'image/jpeg': 'jpeg',
72 'image/png': 'png',
73 'image/gif': 'gif',
74 'text/plain': 'text',
75 'text/html': 'html',
76 'text/latex': 'latex',
77 'application/javascript': 'html',
78 'image/svg+xml': 'svg',
79 }
81 def __init__(self, nb, profile_dir=None, working_dir=None,
82 comment="", fLOG=noLOG, theNotebook=None, code_init=None,
83 kernel_name="python", log_level="30", extended_args=None,
84 kernel=False, filename=None, replacements=None, detailed_log=None,
85 startup_timeout=300, raise_exception=False):
86 """
87 @param nb notebook as :epkg:`JSON`
88 @param profile_dir profile directory
89 @param working_dir working directory
90 @param comment additional information added to error message
91 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook
92 @param code_init to initialize the notebook with a python code as if it was a cell
93 @param fLOG logging function
94 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')
95 @param kernel_name kernel name, it can be None
96 @param extended_args others arguments to pass to the command line
97 (`--KernelManager.autorestar=True` for example),
98 see :ref:`l-ipython_notebook_args` for a full list
99 @param kernel *kernel* is True by default, the notebook can be run, if False,
100 the notebook can be read but not run
101 @param filename to add the notebook file if there is one in error messages
102 @param replacements replacements to make in every cell before running it,
103 dictionary ``{ string: string }``
104 @param detailed_log to log detailed information when executing the notebook, this should be a function
105 with the same signature as ``print`` or None
106 @param startup_timeout wait for this long for the kernel to be ready,
107 see `wait_for_ready
108 <https://github.com/jupyter/jupyter_client/blob/master/
109 jupyter_client/blocking/client.py#L84>`_
110 @param raise_exception raise an exception if a cell raises one
111 """
112 if kernel:
113 try:
114 from jupyter_client import KernelManager
115 except ImportError: # pragma: no cover
116 from ipykernel import KernelManager
118 with warnings.catch_warnings():
119 warnings.filterwarnings("ignore", category=DeprecationWarning)
120 self.km = KernelManager(
121 kernel_name=kernel_name) if kernel_name is not None else KernelManager()
122 else:
123 self.km = None
124 self.raise_exception = raise_exception
125 self.detailed_log = detailed_log
126 self.fLOG = fLOG
127 self.theNotebook = theNotebook
128 self.code_init = code_init
129 self._filename = filename if filename is not None else "memory"
130 self.replacements = replacements
131 self.init_args = dict(
132 profile_dir=profile_dir, working_dir=working_dir,
133 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init,
134 kernel_name="python", log_level="30", extended_args=None,
135 kernel=kernel, filename=filename, replacements=replacements)
136 args = []
138 if profile_dir:
139 args.append(f'--profile-dir={os.path.abspath(profile_dir)}')
140 if log_level:
141 args.append(f'--log-level={log_level}')
143 if extended_args is not None and len(extended_args) > 0:
144 for opt in extended_args:
145 if not opt.startswith("--"):
146 raise SyntaxError( # pragma: no cover
147 "every option should start with '--': " + opt)
148 if "=" not in opt:
149 raise SyntaxError( # pragma: no cover
150 "every option should be assigned a value: " + opt)
151 args.append(opt)
153 if kernel:
154 cwd = os.getcwd()
156 if working_dir:
157 os.chdir(working_dir)
159 if self.km is not None:
160 try:
161 with warnings.catch_warnings():
162 warnings.filterwarnings(
163 "ignore", category=ResourceWarning)
164 self.km.start_kernel(extra_arguments=args)
165 except Exception as e: # pragma: no cover
166 raise NotebookKernelError(
167 f"Failure with args: {args}\nand error:\n{str(e)}") from e
169 if platform.system() == 'Darwin':
170 # see http://www.pypedia.com/index.php/notebook_runner
171 # There is sometimes a race condition where the first
172 # execute command hits the kernel before it's ready.
173 # It appears to happen only on Darwin (Mac OS) and an
174 # easy (but clumsy) way to mitigate it is to sleep
175 # for a second.
176 sleep(1) # pragma: no cover
178 if working_dir:
179 os.chdir(cwd)
181 self.kc = self.km.client()
182 self.kc.start_channels(stdin=False)
183 try:
184 self.kc.wait_for_ready(timeout=startup_timeout)
185 except RuntimeError as e: # pragma: no cover
186 # We wait for one second.
187 sleep(startup_timeout)
188 self.kc.stop_channels()
189 self.km.shutdown_kernel()
190 self.km = None
191 self.kc = None
192 self.nb = nb
193 self.comment = comment
194 raise NotebookKernelError(
195 f"Wait_for_ready fails (timeout={startup_timeout}).") from e
196 else:
197 self.km = None
198 self.kc = None
199 self.nb = nb
200 self.comment = comment
202 def __del__(self):
203 """
204 We close the kernel.
205 """
206 if self.km is not None:
207 del self.km
208 if self.kc is not None:
209 del self.kc
211 def to_json(self, filename=None, encoding="utf8"):
212 """
213 Converts the notebook into :epkg:`JSON`.
215 @param filename filename or stream
216 @param encoding encoding
217 @return Json string if filename is None, None otherwise
218 """
219 if isinstance(filename, str):
220 with open(filename, "w", encoding=encoding) as payload:
221 self.to_json(payload)
222 return None
224 if filename is None:
225 st = StringIO()
226 st.write(writes(self.nb))
227 return st.getvalue()
229 filename.write(writes(self.nb))
230 return None
232 def copy(self):
233 """
234 Copies the notebook (just the content).
236 @return instance of @see cl NotebookRunner
237 """
238 st = StringIO()
239 self.to_json(st)
240 args = self.init_args.copy()
241 for name in ["theNotebook", "filename"]:
242 if name in args:
243 del args[name]
244 nb = reads(st.getvalue())
245 return NotebookRunner(nb, **args)
247 def __add__(self, nb):
248 """
249 Merges two notebooks together, returns a new none.
251 @param nb notebook
252 @return new notebook
253 """
254 c = self.copy()
255 c.merge_notebook(nb)
256 return c
258 def shutdown_kernel(self):
259 """
260 Shuts down kernel.
261 """
262 self.fLOG('-- shutdown kernel')
263 if self.kc is None:
264 raise ValueError( # pragma: no cover
265 "No kernel was started, specify kernel=True when initializing the instance.")
266 self.kc.stop_channels()
267 self.km.shutdown_kernel(now=True)
269 def clean_code(self, code):
270 """
271 Cleans the code before running it, the function comment out
272 instruction such as ``show()``.
274 @param code code (string)
275 @return cleaned code
276 """
277 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code
278 if code is None:
279 return code
281 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")]
282 res = []
283 show_is_last = False
284 for line in lines:
285 if line.replace(" ", "") == "show()":
286 line = line.replace("show", "#show")
287 show_is_last = True
288 elif has_bokeh and line.replace(" ", "") == "output_notebook()":
289 line = line.replace("output_notebook", "#output_notebook")
290 else:
291 show_is_last = False
292 if self.replacements is not None:
293 for k, v in self.replacements.items():
294 line = line.replace(k, v)
295 res.append(line)
296 if show_is_last:
297 res.append('"nothing to show"')
298 return "\n".join(res)
300 @staticmethod
301 def get_cell_code(cell):
302 """
303 Returns the code of a cell.
305 @param cell a cell or a string
306 @return boolean (=iscell), string
307 """
308 if isinstance(cell, str):
309 iscell = False
310 return iscell, cell
312 iscell = True
313 try:
314 return iscell, cell.source
315 except AttributeError: # pragma: no cover
316 return iscell, cell.input
318 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15):
319 '''
320 Runs a notebook cell and update the output of that cell inplace.
322 :param index_cell: index of the cell
323 :param cell: cell to execute
324 :param clean_function: cleaning function to apply to the code before running it
325 :param max_nbissue: number of times an issue can be raised before stopping
326 :return: output of the cell
327 '''
328 if self.detailed_log:
329 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format(
330 index_cell, clean_function))
331 iscell, codei = NotebookRunner.get_cell_code(cell)
333 self.fLOG(f'-- running cell:\n{codei}\n')
334 if self.detailed_log:
335 self.detailed_log(
336 '[run_cell] code=\n {0}'.format(
337 "\n ".join(codei.split("\n"))))
339 code = self.clean_code(codei)
340 if clean_function is not None:
341 code = clean_function(code)
342 if self.detailed_log:
343 self.detailed_log(
344 ' cleaned code=\n {0}'.format(
345 "\n ".join(code.split("\n"))))
346 if len(code) == 0:
347 return ""
348 if self.kc is None:
349 raise ValueError( # pragma: no cover
350 "No kernel was started, specify kernel=True when initializing the instance.")
351 self.kc.execute(code)
353 reply = self.kc.get_shell_msg()
354 reason = None
355 try:
356 status = reply['content']['status']
357 except KeyError: # pragma: no cover
358 status = 'error'
359 reason = "no status key in reply['content']"
361 if status == 'error': # pragma: no cover
362 ansi_escape = re.compile(r'\x1b[^m]*m')
363 try:
364 tr = [ansi_escape.sub('', _)
365 for _ in reply['content']['traceback']]
366 except KeyError: # pragma: no cover
367 tr = (["No traceback, available keys in reply['content']"] +
368 list(reply['content']))
369 traceback_text = '\n'.join(tr)
370 if self.raise_exception:
371 raise NotebookError(traceback_text)
372 self.fLOG("[nberror]\n", traceback_text)
373 if self.detailed_log:
374 self.detailed_log( # pragma: no cover
375 '[run_cell] ERROR=\n {0}'.format(
376 "\n ".join(traceback_text.split("\n"))))
377 else:
378 traceback_text = ''
379 self.fLOG('-- cell returned')
381 outs = list()
382 nbissue = 0
383 statuses = [status]
384 while True:
385 try:
386 msg = self.kc.get_iopub_msg(timeout=1)
387 if msg['msg_type'] == 'status':
388 if msg['content']['execution_state'] == 'idle':
389 status = 'ok'
390 statuses.append(status)
391 break
392 statuses.append(status)
393 except Empty as e: # pragma: no cover
394 # execution state should return to idle before
395 # the queue becomes empty,
396 # if it doesn't, something bad has happened
397 status = "error"
398 statuses.append(status)
399 reason = f"exception Empty was raised ({e!r})"
400 nbissue += 1
401 if nbissue > max_nbissue:
402 # the notebook is empty
403 return ""
404 else:
405 continue
407 content = msg['content']
408 msg_type = msg['msg_type']
409 if self.detailed_log:
410 self.detailed_log(f' msg_type={msg_type}')
412 out = NotebookNode(output_type=msg_type, metadata=dict())
414 if 'execution_count' in content:
415 if iscell:
416 cell['execution_count'] = content['execution_count']
417 out.execution_count = content['execution_count']
419 if msg_type in ('status', 'pyin', 'execute_input'):
420 continue
422 if msg_type == 'stream':
423 out.name = content['name']
424 # in msgspec 5, this is name, text
425 # in msgspec 4, this is name, data
426 if 'text' in content:
427 out.text = content['text']
428 else:
429 out.data = content['data']
431 elif msg_type in ('display_data', 'pyout', 'execute_result'):
432 out.data = content['data']
434 elif msg_type in ('pyerr', 'error'):
435 out.ename = content['ename']
436 out.evalue = content['evalue']
437 out.traceback = content['traceback']
438 out.name = 'stderr'
440 elif msg_type == 'clear_output':
441 outs = list()
442 continue
444 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'):
445 # widgets in a notebook
446 out.data = content["data"]
447 out.comm_id = content["comm_id"]
449 else:
450 dcontent = "\n".join(f"{k}={v}"
451 for k, v in sorted(content.items()))
452 raise NotImplementedError( # pragma: no cover
453 f"Unhandled iopub message: '{msg_type}'\n--CONTENT--\n{dcontent}")
455 outs.append(out)
456 if self.detailed_log:
457 self.detailed_log(f' out={type(out)}')
458 if hasattr(out, "data"):
459 self.detailed_log(f' out={out.data}')
461 if iscell:
462 cell['outputs'] = outs
464 raw = []
465 for _ in outs:
466 try:
467 t = _.data
468 except AttributeError:
469 continue
471 # see MIMEMAP to see the available output type
472 for k, v in t.items():
473 if k.startswith("text"):
474 raw.append(v)
476 sraw = "\n".join(raw)
477 self.fLOG(sraw)
478 if self.detailed_log:
479 self.detailed_log(' sraw=\n {0}'.format(
480 "\n ".join(sraw.split("\n"))))
482 def reply2string(reply):
483 sreply = []
484 for k, v in sorted(reply.items()):
485 if isinstance(v, dict):
486 temp = []
487 for _, __ in sorted(v.items()):
488 temp.append(f" [{_}]={str(__)}")
489 v_ = "\n".join(temp)
490 sreply.append(f"reply['{k}']=dict\n{v_}")
491 else:
492 sreply.append(f"reply['{k}']={str(v)}")
493 sreply = "\n".join(sreply)
494 return sreply
496 if status == 'error': # pragma: no cover
497 sreply = reply2string(reply)
498 if len(code) < 5:
499 scode = [code]
500 else:
501 scode = ""
502 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} "
503 "length={5} -- {6}:\n-----------------\n"
504 "content={12}\nmsg_type: {13} nbissue={14}"
505 "\nstatuses={15}"
506 "\n-----------------\n{0}"
507 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}")
508 raise NotebookError(mes.format(
509 code, traceback_text, sraw, sreply, index_cell, # 0-4
510 len(code), scode, self.comment, status, reason, # 5-9
511 self._filename, index_cell, content, msg_type, nbissue, # 10-14
512 statuses)) # 15
513 if self.detailed_log:
514 self.detailed_log(f'[run_cell] status={status}')
515 return outs
517 def to_python(self):
518 """
519 Converts the notebook into python.
521 @return string
522 """
523 rows = []
524 for cell in self.iter_cells():
525 if cell.cell_type == "code":
526 codei = NotebookRunner.get_cell_code(cell)[1]
527 rows.append(codei)
528 elif cell.cell_type in ("markdown", "raw"):
529 content = cell.source
530 lines = content.split("\n")
531 for line in lines:
532 if line.startswith("#"):
533 rows.append("###")
534 rows.append(line)
535 else:
536 rows.append("# " + line)
537 else:
538 # No text, no code.
539 rows.append(f"# cell.type = {cell.cell_type}")
540 rows.append("")
541 return "\n".join(rows)
543 def iter_code_cells(self):
544 '''
545 Iterates over the notebook cells containing code.
546 '''
547 for cell in self.iter_cells():
548 if cell.cell_type == 'code':
549 yield cell
551 def iter_cells(self):
552 '''
553 Iterates over the notebook cells.
554 '''
555 if hasattr(self.nb, "worksheets"): # pragma: no cover
556 for ws in self.nb.worksheets:
557 for cell in ws.cells:
558 yield cell
559 else:
560 for cell in self.nb.cells:
561 yield cell
563 def first_cell(self):
564 """
565 Returns the first cell.
566 """
567 for cell in self.iter_cells():
568 return cell
570 def _cell_container(self):
571 """
572 Returns a cells container, it may change according to the format.
574 @return cell container
575 """
576 if hasattr(self.nb, "worksheets"): # pragma: no cover
577 last = None
578 for ws in self.nb.worksheets:
579 last = ws
580 if last is None:
581 raise NotebookError("no cell container") # pragma: no cover
582 return last.cells
583 return self.nb.cells
585 def __len__(self):
586 """
587 Returns the number of cells, it iterates on cells
588 to get this information and does cache the information.
590 @return int
591 """
592 return sum(1 for _ in self.iter_cells())
594 def cell_type(self, cell):
595 """
596 Returns the cell type.
598 @param cell from @see me iter_cells
599 @return type
600 """
601 return cell.cell_type
603 def cell_metadata(self, cell):
604 """
605 Returns the cell metadata.
607 @param cell cell
608 @return metadata
609 """
610 return cell.metadata
612 def _check_thumbnail_tuple(self, b):
613 """
614 Checks types for a thumbnail.
616 @param b tuple image, format
617 @return b
619 The function raises an exception if the type is incorrect.
620 """
621 if not isinstance(b, tuple):
622 raise TypeError( # pragma: no cover
623 f"tuple expected, not {type(b)}")
624 if len(b) != 2:
625 raise TypeError( # pragma: no cover
626 f"tuple expected of lengh 2, not {len(b)}")
627 if b[1] == "svg":
628 if not isinstance(b[0], str):
629 raise TypeError( # pragma: no cover
630 f"str expected for svg, not {type(b[0])}")
631 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
632 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'):
633 # Don't know how to extract a snippet out of this.
634 pass
635 else:
636 if not isinstance(b[0], bytes):
637 raise TypeError( # pragma: no cover
638 f"bytes expected for images, not {type(b[0])}-'{b[1]}'\n{b}")
639 return b
641 def create_picture_from(self, text, format, asbytes=True, context=None):
642 """
643 Creates a picture from text.
645 @param text the text
646 @param format text, json, ...
647 @param context (str) indication on the content of text (error, ...)
648 @param asbytes results as bytes or as an image
649 @return tuple (picture, format) or PIL.Image (if asbytes is False)
651 The picture will be bytes, the format png, bmp...
652 The size of the picture will depend on the text.
653 The longer, the bigger. The method relies on matplotlib
654 and then convert the image into a PIL image.
656 HTML could be rendered with QWebPage from PyQt (not implemented).
657 """
658 if not isinstance(text, (str, bytes)):
659 text = str(text)
660 if "\n" not in text:
661 rows = []
662 for i in range(0, len(text), 20):
663 end = min(i + 20, len(text))
664 rows.append(text[i:end])
665 text = "\n".join(text)
666 if len(text) > 200:
667 text = text[:200]
668 size = len(text) // 10
669 figsize = (3 + size, 3 + size)
670 lines = text.replace("\t", " ").replace("\r", "").split("\n")
672 import matplotlib.pyplot as plt
673 from matplotlib.textpath import TextPath
674 from matplotlib.font_manager import FontProperties
675 fig = plt.figure(figsize=figsize)
676 ax = fig.add_subplot(111)
677 fp = FontProperties(size=200)
679 dx = 0
680 dy = 0
681 for i, line in enumerate(lines):
682 if len(line.strip()) > 0:
683 ax.text(0, -dy, line, fontproperties=fp, va='top')
684 tp = TextPath((0, -dy), line, prop=fp)
685 bb = tp.get_extents()
686 dy += bb.height
687 dx = max(dx, bb.width)
689 ratio = abs(dx) * 1. / max(abs(dy), 1)
690 ratio = max(min(ratio, 3), 1)
691 fig.set_size_inches(int((1 + size) * ratio), 1 + size)
692 try:
693 ax.set_xlim(numpy.array([0., dx]))
694 ax.set_ylim(numpy.array([-dy, 0.]))
695 except TypeError as e:
696 warnings.warn(f"[create_picture_from] {e}")
697 ax.set_axis_off()
698 sio = BytesIO()
699 fig.savefig(sio, format="png")
700 plt.close()
702 if asbytes:
703 b = sio.getvalue(), "png"
704 self._check_thumbnail_tuple(b)
705 return b
706 try:
707 from PIL import Image
708 except ImportError: # pragma: no cover
709 import Image
710 img = Image.open(sio)
711 return img
713 def cell_image(self, cell, image_from_text=False):
714 """
715 Returns the cell image or None if not found.
717 @param cell cell to examine
718 @param image_from_text produce an image even if it is not one
719 @return None for no image or a list of tuple (image as bytes, extension)
720 for each output of the cell
721 """
722 kind = self.cell_type(cell)
723 if kind != "code":
724 return None
725 results = []
726 for output in cell.outputs:
727 if output["output_type"] in {"execute_result", "display_data"}:
728 data = output["data"]
729 for k, v in data.items():
730 if k == "text/plain":
731 if image_from_text:
732 b = self.create_picture_from(
733 v, "text", context=output["output_type"])
734 results.append(b)
735 elif k == "application/javascript":
736 if image_from_text:
737 b = self.create_picture_from(v, "js")
738 results.append(b)
739 elif k == "application/json": # pragma: no cover
740 if image_from_text:
741 b = self.create_picture_from(v, "json")
742 results.append(b)
743 elif k == "image/svg+xml":
744 if not isinstance(v, str):
745 raise TypeError( # pragma: no cover
746 f"This should be str not '{type(v)}' (=SVG).")
747 results.append((v, "svg"))
748 elif k == "text/html":
749 if image_from_text:
750 b = self.create_picture_from(v, "html")
751 results.append(b)
752 elif k == "text/latex":
753 if image_from_text: # pragma: no cover
754 b = self.create_picture_from(v, "latex")
755 results.append(b)
756 elif k == "application/vnd.jupyter.widget-view+json":
757 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html
758 if "model_id" not in v:
759 raise KeyError( # pragma: no cover
760 f"model_id is missing from {v}")
761 model_id = v["model_id"]
762 self.fLOG(
763 "[application/vnd.jupyter.widget-view+json] not rendered", model_id)
764 elif k == "application/vnd.jupyter.widget-state+json":
765 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html
766 if "model_id" not in v:
767 raise KeyError( # pragma: no cover
768 f"model_id is missing from {v}")
769 model_id = v["model_id"]
770 self.fLOG(
771 "[application/vnd.jupyter.widget-state+json] not rendered", model_id)
772 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
773 if not isinstance(v, bytes):
774 v = base64.b64decode(v)
775 if not isinstance(v, bytes):
776 raise TypeError( # pragma: no cover
777 f"This should be bytes not '{type(v)}' (=IMG:{k}).")
778 results.append((v, k.split("/")[-1]))
779 elif k in ("text/vnd.plotly.v1+html",
780 "application/vnd.plotly.v1+json",
781 "application/vnd.bokehjs_exec.v0+json",
782 "application/vnd.bokehjs_load.v0+json"):
783 results.append((v, k.split("/")[-1]))
784 else:
785 raise NotImplementedError( # pragma: no cover
786 f"cell type: {kind}\nk={k}\nv={v}\nCELL:\n{cell}")
787 elif output["output_type"] == "error":
788 vl = output["traceback"]
789 if image_from_text:
790 for v in vl:
791 b = self.create_picture_from(
792 v, "text", context="error")
793 results.append(b)
794 elif output["output_type"] == "stream":
795 v = output["text"]
796 if image_from_text:
797 b = self.create_picture_from(v, "text")
798 results.append(b)
799 else:
800 raise NotImplementedError( # pragma: no cover
801 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
802 "".format(kind, output["output_type"], output, cell))
803 if len(results) > 0:
804 res = self._merge_images(results)
805 if res[0] is None:
806 return None
807 self._check_thumbnail_tuple(res)
808 return res
809 return None
811 def cell_height(self, cell):
812 """
813 Approximates the height of a cell by its number of lines it contains.
815 @param cell cell
816 @return number of cell
817 """
818 kind = self.cell_type(cell)
819 if kind == "markdown":
820 content = cell.source
821 lines = content.split("\n")
822 nbs = sum(1 + len(line) // 80 for line in lines)
823 return nbs
824 if kind == "raw":
825 content = cell.source
826 lines = content.split("\n")
827 nbs = sum(1 + len(line) // 80 for line in lines)
828 return nbs
829 if kind == "code":
830 content = cell.source
831 lines = content.split("\n")
832 nbl = len(lines)
834 for output in cell.outputs:
835 if output["output_type"] == "execute_result" or \
836 output["output_type"] == "display_data":
837 data = output["data"]
838 for k, v in data.items():
839 if k == "text/plain":
840 nbl += len(v.split("\n"))
841 elif k == "application/javascript":
842 # rough estimation
843 nbl += len(v.split("\n")) // 2
844 elif k == "application/json":
845 # rough estimation
846 try:
847 nbl += len(v.split("{"))
848 except AttributeError: # pragma: no cover
849 nbl += len(v) // 5 + 1
850 elif k == "image/svg+xml":
851 nbl += len(v) // 5
852 elif k == "text/html":
853 nbl += len(v.split("\n"))
854 elif k == "text/latex":
855 nbl += len(v.split("\\\\")) * 2
856 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
857 nbl += len(v) // 50
858 elif k == "application/vnd.jupyter.widget-view+json":
859 nbl += 5
860 elif k == "application/vnd.jupyter.widget-state+json":
861 nbl += 5
862 elif k in ("text/vnd.plotly.v1+html",
863 "application/vnd.plotly.v1+json",
864 "application/vnd.bokehjs_load.v0+json",
865 "application/vnd.bokehjs_exec.v0+json"):
866 nbl += 10
867 else:
868 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}"
869 raise NotImplementedError(
870 fmt.format(kind, k, v, cell))
871 elif output["output_type"] == "stream":
872 v = output["text"]
873 nbl += len(v.split("\n"))
874 elif output["output_type"] == "error":
875 v = output["traceback"]
876 nbl += len(v)
877 else:
878 raise NotImplementedError( # pragma: no cover
879 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
880 .format(kind, output["output_type"], output, cell))
882 return nbl
884 raise NotImplementedError( # pragma: no cover
885 f"cell type: {kind}\nCELL:\n{cell}")
887 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25):
888 """
889 Tries to add tags for a slide show when they are too few.
891 @param max_nb_cell maximum number of cells within a slide
892 @param max_nb_line maximum number of lines within a slide
893 @return list of modified cells { #slide: (kind, reason, cell) }
894 """
895 res = {}
896 nbline = 0
897 nbcell = 0
898 for i, cell in enumerate(self.iter_cells()):
899 meta = cell.metadata
900 if "slideshow" in meta:
901 st = meta["slideshow"]["slide_type"]
902 if st in ["slide", "subslide"]:
903 nbline = 0
904 nbcell = 0
905 else:
906 if cell.cell_type == "markdown":
907 content = cell.source
908 if content.startswith("# ") or \
909 content.startswith("## ") or \
910 content.startswith("### "):
911 meta["slideshow"] = {'slide_type': 'slide'}
912 nbline = 0
913 nbcell = 0
914 res[i] = ("slide", "section", cell)
916 dh = self.cell_height(cell)
917 dc = 1
918 new_nbline = nbline + dh
919 new_cell = dc + nbcell
920 if "slideshow" not in meta:
921 if new_cell > max_nb_cell or \
922 new_nbline > max_nb_line:
923 res[i] = (
924 "subslide", f"{nbcell}-{nbline} <-> {dc}-{dh}", cell)
925 nbline = 0
926 nbcell = 0
927 meta["slideshow"] = {'slide_type': 'subslide'}
929 nbline += dh
930 nbcell += dc
932 return res
934 def run_notebook(self, skip_exceptions=False, progress_callback=None,
935 additional_path=None, valid=None, clean_function=None,
936 context=None):
937 '''
938 Runs all the cells of a notebook in order and update
939 the outputs in-place.
941 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the
942 subsequent cells are run (by default, the notebook execution stops).
944 @param skip_exceptions skip exception
945 @param progress_callback call back function
946 @param additional_path additional paths (as a list or None if none)
947 @param valid if not None, valid is a function which returns whether
948 or not the cell should be executed or not, if the function
949 returns None, the execution of the notebooks and skip
950 the execution of the other cells
951 @param clean_function function which cleans a cell's code before executing
952 it (None for None)
953 @return dictionary with statistics
955 The function adds the local variable ``theNotebook`` with
956 the absolute file name of the notebook.
957 Function *valid* can return *None* to stop the execution of the notebook
958 before this cell.
959 '''
960 if self.detailed_log:
961 self.detailed_log(
962 f"[run_notebook] Starting execution of '{self._filename}'")
963 # additional path
964 if additional_path is not None:
965 if not isinstance(additional_path, list):
966 raise TypeError( # pragma: no cover
967 "Additional_path should be a list not: " + str(additional_path))
968 code = ["import sys"]
969 for p in additional_path:
970 code.append(f"sys.path.append(r'{p}')")
971 cell = "\n".join(code)
972 self.run_cell(-1, cell)
974 # we add local variable theNotebook
975 if self.theNotebook is not None:
976 cell = f"theNotebook = r'''{self.theNotebook}'''"
977 self.run_cell(-1, cell)
979 # initialisation with a code not inside the notebook
980 if self.code_init is not None:
981 self.run_cell(-1, self.code_init)
983 # execution of the notebook
984 nbcell = 0
985 nbrun = 0
986 nbnerr = 0
987 cl = time.perf_counter()
988 for i, cell in enumerate(self.iter_code_cells()):
989 nbcell += 1
990 codei = NotebookRunner.get_cell_code(cell)[1]
991 if valid is not None:
992 r = valid(codei)
993 if r is None:
994 break
995 if not r:
996 continue
997 try:
998 nbrun += 1
999 self.run_cell(i, cell, clean_function=clean_function)
1000 nbnerr += 1
1001 except Empty as er: # pragma: no cover
1002 raise RuntimeError(
1003 f"{self.comment}\nissue when executing:\n{codei}") from er
1004 except NotebookError as e: # pragma: no cover
1005 if not skip_exceptions:
1006 raise
1007 raise RuntimeError(
1008 f"Issue when executing:\n{codei}") from e
1009 if progress_callback:
1010 progress_callback(i)
1011 etime = time.perf_counter() - cl
1012 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime)
1013 if self.detailed_log:
1014 self.detailed_log(
1015 f"[run_notebook] end execution of '{self._filename}'")
1016 self.detailed_log(
1017 f"[run_notebook] execution time: {etime}")
1018 self.detailed_log(f"[run_notebook] statistics : {res}")
1019 return res
1021 def count_code_cells(self):
1022 '''
1023 Returns the number of code cells in the notebook.
1024 '''
1025 return sum(1 for _ in self.iter_code_cells())
1027 def merge_notebook(self, nb):
1028 """
1029 Appends notebook *nb* to this one.
1031 @param nb notebook or list of notebook (@see cl NotebookRunner)
1032 @return number of added cells
1034 .. faqref::
1035 :title: How to merge notebook?
1037 The following code merges two notebooks into the first one
1038 and stores the result unto a file.
1040 ::
1042 from pyquickhelper.ipythonhelper import read_nb
1043 nb1 = read_nb("<file1>", kernel=False)
1044 nb2 = read_nb("<file2>", kernel=False)
1045 nb1.merge_notebook(nb2)
1046 nb1.to_json(outfile)
1047 """
1048 if isinstance(nb, list):
1049 s = 0
1050 for n in nb:
1051 s += self.merge_notebook(n)
1052 return s
1053 else:
1054 last = self._cell_container()
1055 s = 0
1056 for cell in nb.iter_cells():
1057 last.append(cell)
1058 s += 1
1059 return s
1061 def get_description(self):
1062 """
1063 Gets summary and description of this notebook.
1064 We expect the first cell to contain a title and a description
1065 of its content.
1067 @return header, description
1068 """
1069 def split_header(s, get_header=True):
1070 s = s.lstrip().rstrip()
1071 parts = s.splitlines()
1072 if parts[0].startswith('#'):
1073 if get_header:
1074 header = re.sub('#+\\s*', '', parts.pop(0))
1075 if not parts:
1076 return header, ''
1077 else:
1078 header = ''
1079 rest = '\n'.join(parts).lstrip().split('\n\n')
1080 desc = rest[0].replace('\n', ' ')
1081 return header, desc
1083 if get_header:
1084 if parts[0].startswith(('=', '-')):
1085 parts = parts[1:]
1086 header = parts.pop(0)
1087 if parts and parts[0].startswith(('=', '-')):
1088 parts.pop(0)
1089 if not parts:
1090 return header, ''
1091 else:
1092 header = ''
1093 rest = '\n'.join(parts).lstrip().split('\n\n')
1094 desc = rest[0].replace('\n', ' ')
1095 return header, desc
1097 first_cell = self.first_cell()
1099 if not first_cell['cell_type'] == 'markdown':
1100 raise ValueError( # pragma: no cover
1101 "The first cell is not in markdown but '{0}' filename='{1}'.".format(
1102 first_cell['cell_type'], self._filename))
1104 header, desc = split_header(first_cell['source'])
1105 if not desc and len(self.nb['cells']) > 1:
1106 second_cell = self.nb['cells'][1]
1107 if second_cell['cell_type'] == 'markdown':
1108 _, desc = split_header(second_cell['source'], False)
1110 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))"
1111 reg = re.compile(reg_link)
1112 new_desc = reg.sub("\\2", desc)
1113 if "http://" in new_desc or "https://" in new_desc:
1114 raise ValueError( # pragma: no cover
1115 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format(
1116 desc, new_desc, self._filename))
1117 return header, new_desc.replace('"', "")
1119 def get_thumbnail(self, max_width=200, max_height=200, use_default=False):
1120 """
1121 Processes the notebook and creates one picture based on the outputs
1122 to illustrate a notebook.
1124 @param max_width maximum size of the thumbnail
1125 @param max_height maximum size of the thumbnail
1126 @param use_default force using a default image even if an even is present
1127 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`)
1128 """
1129 images = []
1130 cells = list(self.iter_cells())
1131 cells.reverse()
1132 for cell in cells:
1133 c = self.cell_image(cell, False)
1134 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in (
1135 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
1136 "vnd.bokehjs_load.v0+json"):
1137 self._check_thumbnail_tuple(c)
1138 images.append(c)
1139 if not use_default and len(images) == 0:
1140 for cell in cells:
1141 c = self.cell_image(cell, True)
1142 if c is not None and len(c) > 0 and len(c[0]) > 0:
1143 self._check_thumbnail_tuple(c)
1144 images.append(c)
1145 if len(c[0]) >= 1000:
1146 break
1147 if use_default:
1148 images = []
1149 if len(images) == 0:
1150 # no image, we need to consider the default one
1151 no_image = os.path.join(
1152 os.path.dirname(__file__), 'no_image_nb.png')
1153 with open(no_image, "rb") as f:
1154 c = (f.read(), "png")
1155 self._check_thumbnail_tuple(c)
1156 images.append(c)
1158 # select the image
1159 if len(images) == 0:
1160 raise ValueError( # pragma: no cover
1161 "There should be at least one image.")
1162 if len(images) == 1:
1163 image = images[0]
1164 else:
1165 # maybe later we'll implement a different logic
1166 # we pick the last one
1167 image = images[0]
1169 # zoom
1170 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"):
1171 return None
1172 if image[1] == 'svg':
1173 try:
1174 img = svg2img(image[0])
1175 except PYQImageException: # pragma: no cover
1176 # Enable to convert SVG.
1177 return None
1178 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height)
1179 img = self._scale_image(
1180 image[0], image[1], max_width=max_width, max_height=max_height)
1181 return img
1183 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200):
1184 """
1185 Scales an image with the same aspect ratio centered in an
1186 image with a given max_width and max_height.
1188 @param in_bytes image as bytes
1189 @param format indication of the format (can be empty)
1190 @param max_width maximum size of the thumbnail
1191 @param max_height maximum size of the thumbnail
1192 @return Image (PIL)
1193 """
1194 # local import to avoid testing dependency on PIL:
1195 try:
1196 from PIL import Image
1197 except ImportError: # pragma: no cover
1198 import Image
1200 if isinstance(in_bytes, tuple):
1201 in_bytes = in_bytes[0]
1202 if isinstance(in_bytes, bytes):
1203 img = Image.open(BytesIO(in_bytes))
1204 elif isinstance(in_bytes, Image.Image):
1205 img = in_bytes
1206 else:
1207 raise TypeError( # pragma: no cover
1208 f"bytes expected, not {type(in_bytes)} - format={format}")
1209 width_in, height_in = img.size
1210 scale_w = max_width / float(width_in)
1211 scale_h = max_height / float(height_in)
1213 if height_in * scale_w <= max_height:
1214 scale = scale_w
1215 else:
1216 scale = scale_h
1218 if scale >= 1.0:
1219 return img
1221 width_sc = int(round(scale * width_in))
1222 height_sc = int(round(scale * height_in))
1224 # resize the image and center
1225 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
1226 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
1227 pos_insert = ((max_width - width_sc) // 2,
1228 (max_height - height_sc) // 2)
1229 thumb.paste(img, pos_insert)
1230 return thumb
1232 def _merge_images(self, results):
1233 """
1234 Merges images defined by (buffer, format).
1235 The method uses PIL to merge images when possible.
1237 @return ``[ (image, format) ]``
1238 """
1239 if len(results) == 1:
1240 results = results[0]
1241 self._check_thumbnail_tuple(results)
1242 return results
1243 if len(results) == 0:
1244 return None
1246 formats_counts = Counter(_[1] for _ in results)
1247 if len(formats_counts) == 1:
1248 format = results[0][1]
1249 else:
1250 items = sorted(((v, k)
1251 for k, v in formats_counts.items()), reverse=False)
1252 for it in items:
1253 format = it
1254 break
1256 results = [_ for _ in results if _[1] == format]
1257 if format == "svg":
1258 return ("\n".join(_[0] for _ in results), format)
1260 # local import to avoid testing dependency on PIL:
1261 try:
1262 from PIL import Image
1263 except ImportError: # pragma: no cover
1264 import Image
1266 dx = 0.
1267 dy = 0.
1268 over = 0.7
1269 imgs = []
1270 for in_bytes, _ in results:
1271 img = Image.open(BytesIO(in_bytes))
1272 imgs.append(img)
1273 dx = max(dx, img.size[0])
1274 dy += img.size[1] * over
1276 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220))
1277 for img in imgs:
1278 dy -= img.size[1] * over
1279 new_im.paste(img, (0, max(int(dy), 0)))
1281 if max(dx, dy) > 0:
1282 image_buffer = BytesIO()
1283 new_im.save(image_buffer, "PNG")
1284 b = image_buffer.getvalue(), "png"
1285 return b
1286 b = None, "png"
1287 return b