Coverage for pyquickhelper/helpgen/rst_converters.py: 84%
213 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1"""
2@file
3@brief Helpers to convert docstring to various format.
4"""
5import re
6import textwrap
7import os
8from io import StringIO
9from docutils import core, languages
10from docutils.io import StringInput, StringOutput
11from .utils_sphinx_doc import migrating_doxygen_doc
12from .helpgen_exceptions import HelpGenConvertError
13from ..texthelper.texts_language import TITLES
14from ..loghelper.flog import noLOG
17def default_sphinx_options(fLOG=noLOG, **options):
18 """
19 Defines or overrides default options for :epkg:`Sphinx`, listed below.
21 .. runpython::
23 from pyquickhelper.helpgen.rst_converters import default_sphinx_options
24 options = default_sphinx_options()
25 for k, v in sorted(options.items()):
26 print("{0} = {1}".format(k, v))
28 :epkg:`latex` is not available on :epkg:`Windows`.
29 """
30 # delayed import to speed up time
31 from .conf_path_tools import find_graphviz_dot, find_dvipng_path
33 res = { # 'output_encoding': options.get('output_encoding', 'unicode'),
34 # 'doctitle_xform': options.get('doctitle_xform', True),
35 # 'initial_header_level': options.get('initial_header_level', 2),
36 # 'input_encoding': options.get('input_encoding', 'utf-8-sig'),
37 'blog_background': options.get('blog_background', False),
38 'sharepost': options.get('sharepost', None),
39 'todoext_link_only': options.get('todoext_link_only', False),
40 'mathdef_link_only': options.get('mathdef_link_only', True),
41 'blocref_link_only': options.get('blocref_link_only', False),
42 'faqref_link_only': options.get('faqref_link_only', False),
43 'nbref_link_only': options.get('nbref_link_only', False),
44 'todo_link_only': options.get('todo_link_only', False),
45 'language': options.get('language', 'en'),
46 # 'outdir': options.get('outdir', '.'),
47 # 'imagedir': options.get('imagedir', '.'),
48 # 'confdir': options.get('confdir', '.'),
49 # 'doctreedir': options.get('doctreedir', '.'),
50 'math_number_all': options.get('math_number_all', False),
51 # graphviz
52 'graphviz_output_format': options.get('graphviz_output_format', 'png'),
53 'graphviz_dot': options.get('graphviz_dot', find_graphviz_dot(exc=False)),
54 # latex
55 'imgmath_image_format': options.get('imgmath_image_format', 'png'),
56 # containers
57 'out_blogpostlist': [],
58 'out_runpythonlist': [],
59 # 'warning_stream': StringIO(),
60 }
62 if res['imgmath_image_format'] == 'png':
63 try:
64 imgmath_latex, imgmath_dvipng, imgmath_dvisvgm = find_dvipng_path(
65 exc=False)
66 has_latex = True
67 except FileNotFoundError:
68 # miktex is not available,
69 has_latex = False
71 if has_latex:
72 res['imgmath_latex'] = imgmath_latex
73 res['imgmath_dvipng'] = imgmath_dvipng
74 res['imgmath_dvisvgm'] = imgmath_dvisvgm
76 for k, v in options.items():
77 if k not in res:
78 res[k] = v
80 return res
83def rst2html(s, fLOG=noLOG, writer="html", keep_warnings=False,
84 directives=None, language="en",
85 layout='docutils', document_name="<<string>>",
86 external_docnames=None, filter_nodes=None,
87 new_extensions=None, update_builder=None,
88 ret_doctree=False, destination=None, destination_path=None,
89 **options):
90 """
91 Converts a string from :epkg:`RST`
92 into :epkg:`HTML` format or transformed :epkg:`RST`.
94 @param s string to convert
95 @param fLOG logging function (warnings will be logged)
96 @param writer ``'html'`` for :epkg:`HTML` format,
97 ``'rst'`` for :epkg:`RST` format,
98 ``'md'`` for :epkg:`MD` format,
99 ``'elatex'`` for :epkg:`latex` format,
100 ``'doctree'`` to get the doctree, *writer* can also be a tuple
101 for custom formats and must be like ``('buider_name', builder_class)``.
102 @param keep_warnings keep_warnings in the final HTML
103 @param directives new directives to add (see below)
104 @param language language
105 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below.
106 @param document_name document name, not really important since the input is a string
107 @param external_docnames if the string to parse makes references to other documents,
108 if one is missing, an exception is raised.
109 @param filter_nodes transforms the doctree before writing the results (layout must be 'sphinx'),
110 the function takes a doctree as a single parameter
111 @param new_extensions additional extension to setup
112 @param update_builder update the builder after it is instantiated
113 @param ret_doctree returns the doctree
114 @param destination set a destination (requires for some extension)
115 @param destination_path set a destination path (requires for some extension)
116 @param options :epkg:`Sphinx` options see
117 `Render math as images <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_,
118 a subset of options is used, see @see fn default_sphinx_options.
119 By default, the theme (option *html_theme*) will ``'basic'``.
120 @return HTML format
122 *directives* is None or a list of 2 or 5-uple:
124 * a directive name (mandatory)
125 * a directive class: see `Sphinx Directive
126 <https://www.sphinx-doc.org/en/master/development/tutorials/helloworld.html>`_,
127 see also @see cl RunPythonDirective as an example (mandatory)
128 * a docutils node: see @see cl runpython_node as an example
129 * two functions: see @see fn visit_runpython_node, @see fn depart_runpython_node as an example
131 The parameter *layout* specify the kind of HTML you need.
133 * ``'docutils'``: very simple :epkg:`HTML`, style is not included, recursive
134 directives are not processed (recursive means they modify the doctree).
135 The produced :epkg:`HTML` only includes the body (no :epkg:`HTML` header).
136 * ``'sphinx'``: in memory :epkg:`sphinx`, the produced :epkg:`HTML` includes the header, it is also recursive
137 as directives can modify the doctree.
138 * ``'sphinx_body'``: same as ``'sphinx'`` but only the body is returned.
140 If the writer is a tuple, it must be a 2-uple ``(builder_name, builder_class)``.
141 However, the builder class must contain an attribute ``_writer_class`` with
142 the associated writer. The builcer class must also implement a method
143 ``iter_pages`` which enumerates all written pages:
144 ``def iter_pages(self) -> Dict[str,str]`` where the key is the document name
145 and the value is its content.
147 .. exref::
148 :title: How to test a Sphinx directive?
150 The following code defines a simple directive
151 definedbased on an existing one.
152 It also defined what to do if a new node
153 is inserted in the documentation.
155 ::
157 from docutils import nodes
158 from pyquickhelper.helpgen import rst2html
160 class runpythonthis_node(nodes.Structural, nodes.Element):
161 pass
163 class RunPythonThisDirective (RunPythonDirective):
164 runpython_class = runpythonthis_node
166 def visit_node(self, node):
167 self.body.append("<p><b>visit_node</b></p>")
168 def depart_node(self, node):
169 self.body.append("<p><b>depart_node</b></p>")
171 content = '''
172 test a directive
173 ================
175 .. runpythonthis::
177 print("this code shoud appear" + "___")
178 '''.replace(" ", "")
179 # to remove spaces at the beginning of the line
181 tives = [ ("runpythonthis", RunPythonThisDirective,
182 runpythonthis_node, visit_node, depart_node) ]
184 html = rst2html(content, writer="html", keep_warnings=True,
185 directives=tives)
187 Unfortunately, this functionality is only tested on :epkg:`Python` 3.
188 It might not work on :epkg:`Python` 2.7.
189 The function produces files if the document contains latex
190 converted into image.
192 .. faqref::
193 :title: How to get more about latex errors?
194 :index: latex
196 :epkg:`Sphinx` is not easy to use when it comes to debug latex expressions.
197 I did not find an easy way to read the error returned by latex about
198 a missing bracket or an unknown command. I finally added a short piece
199 of code in ``sphinx.ext.imgmath.py`` just after the call to
200 the executable indicated by *imgmath_latex*
202 ::
204 if b'...' in stdout or b'LaTeX Error' in stdout:
205 print(self.builder.config.imgmath_latex_preamble)
206 print(p.returncode)
207 print("################")
208 print(latex)
209 print("..........")
210 print(stdout.decode("ascii").replace("\\r", ""))
211 print("-----")
212 print(stderr)
214 It displays the output if an error happened.
216 .. faqref::
217 :title: How to hide command line window while compiling latex?
218 :lid: command line window
220 :epkg:`Sphinx` calls :epkg:`latex` through command line.
221 On :epkg:`Windows`, a command line window
222 can annoyingly show up anytime a formula is compiled.
223 The following can be added to hide it:
225 ::
227 startupinfo = STARTUPINFO()
228 startupinfo.dwFlags |= STARTF_USESHOWWINDOW
230 And ``, startupinfo=startupinfo`` must be added to lines ``p = Popen(...``.
232 By default, the function now interprets :epkg:`Sphinx`
233 directives and not only *docutils* ones.
234 Parameter *directives* adds a directive
235 before parsing the :epkg:`RST`.
236 The function is more consistent.
237 Format ``rst`` is available as well as
238 custom builders.
239 New nodes are optional in *directives*.
240 Markdown format was added.
241 """
242 # delayed import to speed up time
243 def _get_MockSphinxApp():
244 from .sphinxm_mock_app import MockSphinxApp
245 return MockSphinxApp
246 MockSphinxApp = _get_MockSphinxApp()
248 if 'html_theme' not in options:
249 options['html_theme'] = 'basic'
250 defopt = default_sphinx_options(**options)
251 if "master_doc" not in defopt:
252 defopt["master_doc"] = document_name
253 if writer in ('latex', 'elatex') and 'latex_documents' not in defopt:
254 latex_documents = [(document_name, ) * 5]
255 defopt['latex_documents'] = latex_documents
257 if writer in ["custom", "sphinx", "HTMLWriterWithCustomDirectives", "html"]:
258 mockapp, writer, title_names = MockSphinxApp.create(
259 "sphinx", directives, confoverrides=defopt,
260 new_extensions=new_extensions,
261 fLOG=fLOG, destination_path=destination_path)
262 writer_name = "HTMLWriterWithCustomDirectives"
263 elif writer in ("rst", "md", "latex", "elatex", 'text', 'doctree'):
264 writer_name = writer
265 mockapp, writer, title_names = MockSphinxApp.create(
266 writer, directives, confoverrides=defopt,
267 new_extensions=new_extensions,
268 fLOG=fLOG, destination_path=destination_path)
269 elif isinstance(writer, tuple):
270 # We extect something like ("builder_name", builder_class)
271 writer_name = writer
272 mockapp, writer, title_names = MockSphinxApp.create(
273 writer, directives, confoverrides=defopt,
274 new_extensions=new_extensions,
275 fLOG=fLOG, destination_path=destination_path)
276 else:
277 raise ValueError(
278 f"Unexpected writer '{writer}', should be 'rst' or 'html' or 'md' or 'elatex' or 'text'.")
280 if writer is None and directives is not None and len(directives) > 0:
281 raise NotImplementedError(
282 "The writer must not be null if custom directives will be added, check the documentation of the fucntion.")
284 # delayed import to speed up time
285 from sphinx.environment import default_settings
286 settings_overrides = default_settings.copy()
287 settings_overrides["warning_stream"] = StringIO()
288 settings_overrides["master_doc"] = document_name
289 settings_overrides["source"] = document_name
290 settings_overrides["contentsname"] = document_name
291 settings_overrides.update({k: v[0]
292 for k, v in mockapp.new_options.items()})
294 # next
295 settings_overrides.update(defopt)
296 config = mockapp.config
297 config.blog_background = True
298 config.blog_background_page = False
299 config.sharepost = None
301 if hasattr(writer, "add_configuration_options"):
302 writer.add_configuration_options(mockapp.new_options)
303 for k in {'outdir', 'imagedir', 'confdir', 'doctreedir'}:
304 setattr(writer.builder, k, settings_overrides.get(k, ''))
305 if destination_path is not None:
306 writer.builder.outdir = destination_path
307 if update_builder:
308 update_builder(writer.builder)
310 env = mockapp.env
311 if env is None:
312 raise ValueError("No environment was built.")
314 env.temp_data["docname"] = document_name
315 env.temp_data["source"] = document_name
316 if mockapp.builder.env is None:
317 mockapp.builder.env = env
318 else:
319 mockapp.builder.env.temp_data["docname"] = document_name
320 mockapp.builder.env.temp_data["source"] = document_name
321 settings_overrides["env"] = env
323 lang = languages.get_language(language)
324 for name in title_names:
325 if name not in lang.labels:
326 lang.labels[name] = TITLES[language][name]
328 for k, v in sorted(settings_overrides.items()):
329 fLOG(f"[rst2html] {k}={v}{' --- added' if hasattr(config, k) else ''}")
330 for k, v in sorted(settings_overrides.items()):
331 if hasattr(writer.builder.config, k) and writer.builder.config[k] != v:
332 writer.builder.config[k] = v
334 _, pub = core.publish_programmatically(
335 source=s, source_path=None, destination_path=destination_path, writer=writer,
336 writer_name=writer_name, settings_overrides=settings_overrides,
337 source_class=StringInput, destination_class=StringOutput,
338 destination=destination, reader=None, reader_name='standalone',
339 parser=None, parser_name='restructuredtext', settings=None,
340 settings_spec=None, config_section=None, enable_exit_status=False)
342 doctree = pub.document
344 if filter_nodes is not None:
345 if layout == "docutils" and writer != "doctree":
346 raise ValueError(
347 "filter_nodes is not None, layout must not be 'docutils'")
348 filter_nodes(doctree)
350 mockapp.finalize(doctree, external_docnames=external_docnames)
351 parts = pub.writer.parts
353 if not keep_warnings:
354 if isinstance(parts["whole"], list):
355 # Not html.
356 exp = "".join(parts["whole"])
357 else:
358 exp = re.sub(
359 '(<div class="system-message">(.|\\n)*?</div>)', "", parts["whole"])
360 else:
361 if isinstance(parts["whole"], list):
362 exp = "".join(parts["whole"])
363 else:
364 exp = parts["whole"]
366 if ret_doctree:
367 return doctree
369 if layout == "docutils":
370 return exp
371 else:
372 page = None
373 pages = []
374 main = (f"/{document_name}.m.html",
375 f"/{document_name}.m.{writer_name}",
376 document_name)
377 if not hasattr(writer.builder, "iter_pages"):
378 raise AttributeError(
379 f"Class '{writer.builder}' must have a method 'iter_pages' which returns a dictionary.")
380 contents = []
381 for k, v in writer.builder.iter_pages():
382 pages.append(k)
383 contents.append(v)
384 if k in main:
385 page = v
386 break
387 if page is None and len(contents) == 1:
388 page = contents[0]
389 if page is None:
390 raise ValueError(
391 f"No page contents was produced, only '{pages}'.")
392 if layout == "sphinx":
393 if isinstance(page, str):
394 return page
395 else:
396 return "\n".join(page)
397 elif layout == "sphinx_body":
398 lines = page.replace('</head>', '</head>\n').split("\n")
399 keep = []
400 begin = False
401 for line in lines:
402 s = line.strip(" \n\r")
403 if s == "</body>":
404 begin = False
405 if begin:
406 keep.append(line)
407 if s == "<body>":
408 begin = True
409 res = "\n".join(keep)
410 return res
411 else:
412 raise ValueError(
413 f"Unexpected value for layout '{layout}'")
416def correct_indentation(text):
417 """
418 Tries to improve the indentation before running :epkg:`docutils`.
420 @param text text to correct
421 @return corrected text
422 """
423 title = {}
424 rows = text.split("\n")
425 for row in rows:
426 row = row.replace("\t", " ")
427 cr = row.lstrip()
428 ind = len(row) - len(cr)
430 tit = cr.strip("\r\n\t ")
431 if len(tit) > 0 and tit[0] in "-+=*^" and tit == tit[0] * len(tit):
432 title[ind] = title.get(ind, 0) + 1
434 if len(title) > 0:
435 mint = min(title.keys())
436 else:
437 mint = 0
438 if mint > 0:
439 newrows = []
440 for row in rows:
441 i = 0
442 while i < len(row) and row[i] == ' ':
443 i += 1
445 rem = min(i, mint)
446 if rem > 0:
447 newrows.append(row[rem:])
448 else:
449 newrows.append(row)
451 return "\n".join(newrows)
452 else:
453 return text
456def docstring2html(function_or_string, format="html", fLOG=noLOG, writer="html",
457 keep_warnings=False, directives=None, language="en",
458 layout='docutils', document_name="<<string>>",
459 filter_nodes=None, **options):
460 """
461 Converts a docstring into a :epkg:`HTML` format.
463 @param function_or_string function, class, method or doctring
464 @param format output format (``'html'`` or '``rawhtml``')
465 @param fLOG logging function
466 @param writer ``'html'`` for :epkg:`HTML` format,
467 ``'rst'`` for :epkg:`RST` format,
468 ``'md'`` for :epkg:`MD` format
469 @param keep_warnings keep_warnings in the final :epkg:`HTML`
470 @param directives new directives to add (see below)
471 @param language language
472 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below.
473 @param document_name document_name for this string
474 @param filter_nodes transform the doctree before writing the results
475 (layout must be 'sphinx')
476 @param options Sphinx options see `Render math as images
477 <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_,
478 a subset of options is used, see @see fn default_sphinx_options.
479 By default, the theme (option *html_theme*) will ``'basic'``.
480 @return (str) :epkg:`HTML` format or (IPython.core.display.HTML)
482 .. exref::
483 :title: Produce HTML documentation for a function or class
485 The following code can display the dosstring in :epkg:`HTML` format
486 to display it in a :epkg:`notebook`.
488 ::
490 from pyquickhelper.helpgen import docstring2html
491 import sklearn.linear_model
492 docstring2html(sklearn.linear_model.LogisticRegression)
494 The output format is defined by:
496 * ``'html'``: IPython :epkg:`HTML` object
497 * ``'rawhtml'``: :epkg:`HTML` as text + style
498 * ``'rst'``: :epkg:`rst`
499 * ``'text'``: raw text
500 """
501 if not isinstance(function_or_string, str):
502 doc = function_or_string.__doc__
503 else:
504 doc = function_or_string
506 if format == "text":
507 return doc
509 if doc is None:
510 return ""
512 javadoc = migrating_doxygen_doc(doc, "None", log=False)[1]
513 rows = javadoc.split("\n")
514 from .utils_sphinx_doc import _private_migrating_doxygen_doc
515 rst = _private_migrating_doxygen_doc(
516 rows, index_first_line=0, filename="None")
517 rst = "\n".join(rst)
518 ded = textwrap.dedent(rst)
520 try:
521 html = rst2html(ded, fLOG=fLOG, writer=writer,
522 keep_warnings=keep_warnings, directives=directives,
523 language=language, filter_nodes=filter_nodes,
524 document_name=document_name,
525 layout=layout, **options)
526 except Exception:
527 # we check the indentation
528 ded = correct_indentation(ded)
529 try:
530 html = rst2html(ded, fLOG=fLOG, writer=writer,
531 keep_warnings=keep_warnings, directives=directives,
532 language=language, filter_nodes=filter_nodes,
533 document_name=document_name,
534 layout=layout, **options)
535 except Exception as e:
536 lines = ded.split("\n")
537 lines = ["%04d %s" % (i + 1, _.strip("\n\r"))
538 for i, _ in enumerate(lines)]
539 raise HelpGenConvertError(
540 "Unable to process:\n{0}".format("\n".join(lines))) from e
542 ret_doctree = writer == "doctree"
543 if ret_doctree:
544 writer = "doctree"
546 if writer in ('doctree', 'rst', 'md'):
547 return html
549 if format == "html":
550 from IPython.core.display import HTML
551 return HTML(html)
552 if format in ("rawhtml", 'rst', 'md', 'doctree'):
553 return html
554 raise ValueError(
555 "Unexpected format: '{}', should be html, rawhtml, text, rst, "
556 "md, doctree.".format(format))
559def rst2rst_folder(rststring, folder, document_name="index", **options):
560 """
561 Converts a :epkg:`RST` string into simplified :epkg:`RST`.
563 @param rststring :epkg:`rst` string
564 @param folder the builder needs to write the resuts in a
565 folder defined by this parameter
566 @param document_name main document
567 @param options additional options (same as *conf.py*)
568 @return converted string
569 """
570 if not os.path.exists(folder):
571 raise FileNotFoundError(folder)
573 new_options = {}
574 new_options.update(options)
576 def update_builder(builder):
577 builder.outdir = folder
579 rst = rst2html(rststring, writer="rst", document_name="example",
580 update_builder=update_builder, layout="sphinx",
581 **new_options)
582 return rst