Coverage for pyquickhelper/helpgen/ 84%

213 statements  

« prev     ^ index     » next v7.2.7, created at 2023-06-03 02:21 +0200



3@brief Helpers to convert docstring to various format. 


5import re 

6import textwrap 

7import os 

8from io import StringIO 

9from docutils import core, languages 

10from import StringInput, StringOutput 

11from .utils_sphinx_doc import migrating_doxygen_doc 

12from .helpgen_exceptions import HelpGenConvertError 

13from ..texthelper.texts_language import TITLES 

14from ..loghelper.flog import noLOG 



17def default_sphinx_options(fLOG=noLOG, **options): 

18 """ 

19 Defines or overrides default options for :epkg:`Sphinx`, listed below. 


21 .. runpython:: 


23 from pyquickhelper.helpgen.rst_converters import default_sphinx_options 

24 options = default_sphinx_options() 

25 for k, v in sorted(options.items()): 

26 print("{0} = {1}".format(k, v)) 


28 :epkg:`latex` is not available on :epkg:`Windows`. 

29 """ 

30 # delayed import to speed up time 

31 from .conf_path_tools import find_graphviz_dot, find_dvipng_path 


33 res = { # 'output_encoding': options.get('output_encoding', 'unicode'), 

34 # 'doctitle_xform': options.get('doctitle_xform', True), 

35 # 'initial_header_level': options.get('initial_header_level', 2), 

36 # 'input_encoding': options.get('input_encoding', 'utf-8-sig'), 

37 'blog_background': options.get('blog_background', False), 

38 'sharepost': options.get('sharepost', None), 

39 'todoext_link_only': options.get('todoext_link_only', False), 

40 'mathdef_link_only': options.get('mathdef_link_only', True), 

41 'blocref_link_only': options.get('blocref_link_only', False), 

42 'faqref_link_only': options.get('faqref_link_only', False), 

43 'nbref_link_only': options.get('nbref_link_only', False), 

44 'todo_link_only': options.get('todo_link_only', False), 

45 'language': options.get('language', 'en'), 

46 # 'outdir': options.get('outdir', '.'), 

47 # 'imagedir': options.get('imagedir', '.'), 

48 # 'confdir': options.get('confdir', '.'), 

49 # 'doctreedir': options.get('doctreedir', '.'), 

50 'math_number_all': options.get('math_number_all', False), 

51 # graphviz 

52 'graphviz_output_format': options.get('graphviz_output_format', 'png'), 

53 'graphviz_dot': options.get('graphviz_dot', find_graphviz_dot(exc=False)), 

54 # latex 

55 'imgmath_image_format': options.get('imgmath_image_format', 'png'), 

56 # containers 

57 'out_blogpostlist': [], 

58 'out_runpythonlist': [], 

59 # 'warning_stream': StringIO(), 

60 } 


62 if res['imgmath_image_format'] == 'png': 

63 try: 

64 imgmath_latex, imgmath_dvipng, imgmath_dvisvgm = find_dvipng_path( 

65 exc=False) 

66 has_latex = True 

67 except FileNotFoundError: 

68 # miktex is not available, 

69 has_latex = False 


71 if has_latex: 

72 res['imgmath_latex'] = imgmath_latex 

73 res['imgmath_dvipng'] = imgmath_dvipng 

74 res['imgmath_dvisvgm'] = imgmath_dvisvgm 


76 for k, v in options.items(): 

77 if k not in res: 

78 res[k] = v 


80 return res 



83def rst2html(s, fLOG=noLOG, writer="html", keep_warnings=False, 

84 directives=None, language="en", 

85 layout='docutils', document_name="<<string>>", 

86 external_docnames=None, filter_nodes=None, 

87 new_extensions=None, update_builder=None, 

88 ret_doctree=False, destination=None, destination_path=None, 

89 **options): 

90 """ 

91 Converts a string from :epkg:`RST` 

92 into :epkg:`HTML` format or transformed :epkg:`RST`. 


94 @param s string to convert 

95 @param fLOG logging function (warnings will be logged) 

96 @param writer ``'html'`` for :epkg:`HTML` format, 

97 ``'rst'`` for :epkg:`RST` format, 

98 ``'md'`` for :epkg:`MD` format, 

99 ``'elatex'`` for :epkg:`latex` format, 

100 ``'doctree'`` to get the doctree, *writer* can also be a tuple 

101 for custom formats and must be like ``('buider_name', builder_class)``. 

102 @param keep_warnings keep_warnings in the final HTML 

103 @param directives new directives to add (see below) 

104 @param language language 

105 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. 

106 @param document_name document name, not really important since the input is a string 

107 @param external_docnames if the string to parse makes references to other documents, 

108 if one is missing, an exception is raised. 

109 @param filter_nodes transforms the doctree before writing the results (layout must be 'sphinx'), 

110 the function takes a doctree as a single parameter 

111 @param new_extensions additional extension to setup 

112 @param update_builder update the builder after it is instantiated 

113 @param ret_doctree returns the doctree 

114 @param destination set a destination (requires for some extension) 

115 @param destination_path set a destination path (requires for some extension) 

116 @param options :epkg:`Sphinx` options see 

117 `Render math as images <>`_, 

118 a subset of options is used, see @see fn default_sphinx_options. 

119 By default, the theme (option *html_theme*) will ``'basic'``. 

120 @return HTML format 


122 *directives* is None or a list of 2 or 5-uple: 


124 * a directive name (mandatory) 

125 * a directive class: see `Sphinx Directive 

126 <>`_, 

127 see also @see cl RunPythonDirective as an example (mandatory) 

128 * a docutils node: see @see cl runpython_node as an example 

129 * two functions: see @see fn visit_runpython_node, @see fn depart_runpython_node as an example 


131 The parameter *layout* specify the kind of HTML you need. 


133 * ``'docutils'``: very simple :epkg:`HTML`, style is not included, recursive 

134 directives are not processed (recursive means they modify the doctree). 

135 The produced :epkg:`HTML` only includes the body (no :epkg:`HTML` header). 

136 * ``'sphinx'``: in memory :epkg:`sphinx`, the produced :epkg:`HTML` includes the header, it is also recursive 

137 as directives can modify the doctree. 

138 * ``'sphinx_body'``: same as ``'sphinx'`` but only the body is returned. 


140 If the writer is a tuple, it must be a 2-uple ``(builder_name, builder_class)``. 

141 However, the builder class must contain an attribute ``_writer_class`` with 

142 the associated writer. The builcer class must also implement a method 

143 ``iter_pages`` which enumerates all written pages: 

144 ``def iter_pages(self) -> Dict[str,str]`` where the key is the document name 

145 and the value is its content. 


147 .. exref:: 

148 :title: How to test a Sphinx directive? 


150 The following code defines a simple directive 

151 definedbased on an existing one. 

152 It also defined what to do if a new node 

153 is inserted in the documentation. 


155 :: 


157 from docutils import nodes 

158 from pyquickhelper.helpgen import rst2html 


160 class runpythonthis_node(nodes.Structural, nodes.Element): 

161 pass 


163 class RunPythonThisDirective (RunPythonDirective): 

164 runpython_class = runpythonthis_node 


166 def visit_node(self, node): 

167 self.body.append("<p><b>visit_node</b></p>") 

168 def depart_node(self, node): 

169 self.body.append("<p><b>depart_node</b></p>") 


171 content = ''' 

172 test a directive 

173 ================ 


175 .. runpythonthis:: 


177 print("this code shoud appear" + "___") 

178 '''.replace(" ", "") 

179 # to remove spaces at the beginning of the line 


181 tives = [ ("runpythonthis", RunPythonThisDirective, 

182 runpythonthis_node, visit_node, depart_node) ] 


184 html = rst2html(content, writer="html", keep_warnings=True, 

185 directives=tives) 


187 Unfortunately, this functionality is only tested on :epkg:`Python` 3. 

188 It might not work on :epkg:`Python` 2.7. 

189 The function produces files if the document contains latex 

190 converted into image. 


192 .. faqref:: 

193 :title: How to get more about latex errors? 

194 :index: latex 


196 :epkg:`Sphinx` is not easy to use when it comes to debug latex expressions. 

197 I did not find an easy way to read the error returned by latex about 

198 a missing bracket or an unknown command. I finally added a short piece 

199 of code in ```` just after the call to 

200 the executable indicated by *imgmath_latex* 


202 :: 


204 if b'...' in stdout or b'LaTeX Error' in stdout: 

205 print(self.builder.config.imgmath_latex_preamble) 

206 print(p.returncode) 

207 print("################") 

208 print(latex) 

209 print("..........") 

210 print(stdout.decode("ascii").replace("\\r", "")) 

211 print("-----") 

212 print(stderr) 


214 It displays the output if an error happened. 


216 .. faqref:: 

217 :title: How to hide command line window while compiling latex? 

218 :lid: command line window 


220 :epkg:`Sphinx` calls :epkg:`latex` through command line. 

221 On :epkg:`Windows`, a command line window 

222 can annoyingly show up anytime a formula is compiled. 

223 The following can be added to hide it: 


225 :: 


227 startupinfo = STARTUPINFO() 

228 startupinfo.dwFlags |= STARTF_USESHOWWINDOW 


230 And ``, startupinfo=startupinfo`` must be added to lines ``p = Popen(...``. 


232 By default, the function now interprets :epkg:`Sphinx` 

233 directives and not only *docutils* ones. 

234 Parameter *directives* adds a directive 

235 before parsing the :epkg:`RST`. 

236 The function is more consistent. 

237 Format ``rst`` is available as well as 

238 custom builders. 

239 New nodes are optional in *directives*. 

240 Markdown format was added. 

241 """ 

242 # delayed import to speed up time 

243 def _get_MockSphinxApp(): 

244 from .sphinxm_mock_app import MockSphinxApp 

245 return MockSphinxApp 

246 MockSphinxApp = _get_MockSphinxApp() 


248 if 'html_theme' not in options: 

249 options['html_theme'] = 'basic' 

250 defopt = default_sphinx_options(**options) 

251 if "master_doc" not in defopt: 

252 defopt["master_doc"] = document_name 

253 if writer in ('latex', 'elatex') and 'latex_documents' not in defopt: 

254 latex_documents = [(document_name, ) * 5] 

255 defopt['latex_documents'] = latex_documents 


257 if writer in ["custom", "sphinx", "HTMLWriterWithCustomDirectives", "html"]: 

258 mockapp, writer, title_names = MockSphinxApp.create( 

259 "sphinx", directives, confoverrides=defopt, 

260 new_extensions=new_extensions, 

261 fLOG=fLOG, destination_path=destination_path) 

262 writer_name = "HTMLWriterWithCustomDirectives" 

263 elif writer in ("rst", "md", "latex", "elatex", 'text', 'doctree'): 

264 writer_name = writer 

265 mockapp, writer, title_names = MockSphinxApp.create( 

266 writer, directives, confoverrides=defopt, 

267 new_extensions=new_extensions, 

268 fLOG=fLOG, destination_path=destination_path) 

269 elif isinstance(writer, tuple): 

270 # We extect something like ("builder_name", builder_class) 

271 writer_name = writer 

272 mockapp, writer, title_names = MockSphinxApp.create( 

273 writer, directives, confoverrides=defopt, 

274 new_extensions=new_extensions, 

275 fLOG=fLOG, destination_path=destination_path) 

276 else: 

277 raise ValueError( 

278 f"Unexpected writer '{writer}', should be 'rst' or 'html' or 'md' or 'elatex' or 'text'.") 


280 if writer is None and directives is not None and len(directives) > 0: 

281 raise NotImplementedError( 

282 "The writer must not be null if custom directives will be added, check the documentation of the fucntion.") 


284 # delayed import to speed up time 

285 from sphinx.environment import default_settings 

286 settings_overrides = default_settings.copy() 

287 settings_overrides["warning_stream"] = StringIO() 

288 settings_overrides["master_doc"] = document_name 

289 settings_overrides["source"] = document_name 

290 settings_overrides["contentsname"] = document_name 

291 settings_overrides.update({k: v[0] 

292 for k, v in mockapp.new_options.items()}) 


294 # next 

295 settings_overrides.update(defopt) 

296 config = mockapp.config 

297 config.blog_background = True 

298 config.blog_background_page = False 

299 config.sharepost = None 


301 if hasattr(writer, "add_configuration_options"): 

302 writer.add_configuration_options(mockapp.new_options) 

303 for k in {'outdir', 'imagedir', 'confdir', 'doctreedir'}: 

304 setattr(writer.builder, k, settings_overrides.get(k, '')) 

305 if destination_path is not None: 

306 writer.builder.outdir = destination_path 

307 if update_builder: 

308 update_builder(writer.builder) 


310 env = mockapp.env 

311 if env is None: 

312 raise ValueError("No environment was built.") 


314 env.temp_data["docname"] = document_name 

315 env.temp_data["source"] = document_name 

316 if mockapp.builder.env is None: 

317 mockapp.builder.env = env 

318 else: 

319 mockapp.builder.env.temp_data["docname"] = document_name 

320 mockapp.builder.env.temp_data["source"] = document_name 

321 settings_overrides["env"] = env 


323 lang = languages.get_language(language) 

324 for name in title_names: 

325 if name not in lang.labels: 

326 lang.labels[name] = TITLES[language][name] 


328 for k, v in sorted(settings_overrides.items()): 

329 fLOG(f"[rst2html] {k}={v}{' --- added' if hasattr(config, k) else ''}") 

330 for k, v in sorted(settings_overrides.items()): 

331 if hasattr(writer.builder.config, k) and writer.builder.config[k] != v: 

332 writer.builder.config[k] = v 


334 _, pub = core.publish_programmatically( 

335 source=s, source_path=None, destination_path=destination_path, writer=writer, 

336 writer_name=writer_name, settings_overrides=settings_overrides, 

337 source_class=StringInput, destination_class=StringOutput, 

338 destination=destination, reader=None, reader_name='standalone', 

339 parser=None, parser_name='restructuredtext', settings=None, 

340 settings_spec=None, config_section=None, enable_exit_status=False) 


342 doctree = pub.document 


344 if filter_nodes is not None: 

345 if layout == "docutils" and writer != "doctree": 

346 raise ValueError( 

347 "filter_nodes is not None, layout must not be 'docutils'") 

348 filter_nodes(doctree) 


350 mockapp.finalize(doctree, external_docnames=external_docnames) 

351 parts = 


353 if not keep_warnings: 

354 if isinstance(parts["whole"], list): 

355 # Not html. 

356 exp = "".join(parts["whole"]) 

357 else: 

358 exp = re.sub( 

359 '(<div class="system-message">(.|\\n)*?</div>)', "", parts["whole"]) 

360 else: 

361 if isinstance(parts["whole"], list): 

362 exp = "".join(parts["whole"]) 

363 else: 

364 exp = parts["whole"] 


366 if ret_doctree: 

367 return doctree 


369 if layout == "docutils": 

370 return exp 

371 else: 

372 page = None 

373 pages = [] 

374 main = (f"/{document_name}.m.html", 

375 f"/{document_name}.m.{writer_name}", 

376 document_name) 

377 if not hasattr(writer.builder, "iter_pages"): 

378 raise AttributeError( 

379 f"Class '{writer.builder}' must have a method 'iter_pages' which returns a dictionary.") 

380 contents = [] 

381 for k, v in writer.builder.iter_pages(): 

382 pages.append(k) 

383 contents.append(v) 

384 if k in main: 

385 page = v 

386 break 

387 if page is None and len(contents) == 1: 

388 page = contents[0] 

389 if page is None: 

390 raise ValueError( 

391 f"No page contents was produced, only '{pages}'.") 

392 if layout == "sphinx": 

393 if isinstance(page, str): 

394 return page 

395 else: 

396 return "\n".join(page) 

397 elif layout == "sphinx_body": 

398 lines = page.replace('</head>', '</head>\n').split("\n") 

399 keep = [] 

400 begin = False 

401 for line in lines: 

402 s = line.strip(" \n\r") 

403 if s == "</body>": 

404 begin = False 

405 if begin: 

406 keep.append(line) 

407 if s == "<body>": 

408 begin = True 

409 res = "\n".join(keep) 

410 return res 

411 else: 

412 raise ValueError( 

413 f"Unexpected value for layout '{layout}'") 



416def correct_indentation(text): 

417 """ 

418 Tries to improve the indentation before running :epkg:`docutils`. 


420 @param text text to correct 

421 @return corrected text 

422 """ 

423 title = {} 

424 rows = text.split("\n") 

425 for row in rows: 

426 row = row.replace("\t", " ") 

427 cr = row.lstrip() 

428 ind = len(row) - len(cr) 


430 tit = cr.strip("\r\n\t ") 

431 if len(tit) > 0 and tit[0] in "-+=*^" and tit == tit[0] * len(tit): 

432 title[ind] = title.get(ind, 0) + 1 


434 if len(title) > 0: 

435 mint = min(title.keys()) 

436 else: 

437 mint = 0 

438 if mint > 0: 

439 newrows = [] 

440 for row in rows: 

441 i = 0 

442 while i < len(row) and row[i] == ' ': 

443 i += 1 


445 rem = min(i, mint) 

446 if rem > 0: 

447 newrows.append(row[rem:]) 

448 else: 

449 newrows.append(row) 


451 return "\n".join(newrows) 

452 else: 

453 return text 



456def docstring2html(function_or_string, format="html", fLOG=noLOG, writer="html", 

457 keep_warnings=False, directives=None, language="en", 

458 layout='docutils', document_name="<<string>>", 

459 filter_nodes=None, **options): 

460 """ 

461 Converts a docstring into a :epkg:`HTML` format. 


463 @param function_or_string function, class, method or doctring 

464 @param format output format (``'html'`` or '``rawhtml``') 

465 @param fLOG logging function 

466 @param writer ``'html'`` for :epkg:`HTML` format, 

467 ``'rst'`` for :epkg:`RST` format, 

468 ``'md'`` for :epkg:`MD` format 

469 @param keep_warnings keep_warnings in the final :epkg:`HTML` 

470 @param directives new directives to add (see below) 

471 @param language language 

472 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. 

473 @param document_name document_name for this string 

474 @param filter_nodes transform the doctree before writing the results 

475 (layout must be 'sphinx') 

476 @param options Sphinx options see `Render math as images 

477 <>`_, 

478 a subset of options is used, see @see fn default_sphinx_options. 

479 By default, the theme (option *html_theme*) will ``'basic'``. 

480 @return (str) :epkg:`HTML` format or (IPython.core.display.HTML) 


482 .. exref:: 

483 :title: Produce HTML documentation for a function or class 


485 The following code can display the dosstring in :epkg:`HTML` format 

486 to display it in a :epkg:`notebook`. 


488 :: 


490 from pyquickhelper.helpgen import docstring2html 

491 import sklearn.linear_model 

492 docstring2html(sklearn.linear_model.LogisticRegression) 


494 The output format is defined by: 


496 * ``'html'``: IPython :epkg:`HTML` object 

497 * ``'rawhtml'``: :epkg:`HTML` as text + style 

498 * ``'rst'``: :epkg:`rst` 

499 * ``'text'``: raw text 

500 """ 

501 if not isinstance(function_or_string, str): 

502 doc = function_or_string.__doc__ 

503 else: 

504 doc = function_or_string 


506 if format == "text": 

507 return doc 


509 if doc is None: 

510 return "" 


512 javadoc = migrating_doxygen_doc(doc, "None", log=False)[1] 

513 rows = javadoc.split("\n") 

514 from .utils_sphinx_doc import _private_migrating_doxygen_doc 

515 rst = _private_migrating_doxygen_doc( 

516 rows, index_first_line=0, filename="None") 

517 rst = "\n".join(rst) 

518 ded = textwrap.dedent(rst) 


520 try: 

521 html = rst2html(ded, fLOG=fLOG, writer=writer, 

522 keep_warnings=keep_warnings, directives=directives, 

523 language=language, filter_nodes=filter_nodes, 

524 document_name=document_name, 

525 layout=layout, **options) 

526 except Exception: 

527 # we check the indentation 

528 ded = correct_indentation(ded) 

529 try: 

530 html = rst2html(ded, fLOG=fLOG, writer=writer, 

531 keep_warnings=keep_warnings, directives=directives, 

532 language=language, filter_nodes=filter_nodes, 

533 document_name=document_name, 

534 layout=layout, **options) 

535 except Exception as e: 

536 lines = ded.split("\n") 

537 lines = ["%04d %s" % (i + 1, _.strip("\n\r")) 

538 for i, _ in enumerate(lines)] 

539 raise HelpGenConvertError( 

540 "Unable to process:\n{0}".format("\n".join(lines))) from e 


542 ret_doctree = writer == "doctree" 

543 if ret_doctree: 

544 writer = "doctree" 


546 if writer in ('doctree', 'rst', 'md'): 

547 return html 


549 if format == "html": 

550 from IPython.core.display import HTML 

551 return HTML(html) 

552 if format in ("rawhtml", 'rst', 'md', 'doctree'): 

553 return html 

554 raise ValueError( 

555 "Unexpected format: '{}', should be html, rawhtml, text, rst, " 

556 "md, doctree.".format(format)) 



559def rst2rst_folder(rststring, folder, document_name="index", **options): 

560 """ 

561 Converts a :epkg:`RST` string into simplified :epkg:`RST`. 


563 @param rststring :epkg:`rst` string 

564 @param folder the builder needs to write the resuts in a 

565 folder defined by this parameter 

566 @param document_name main document 

567 @param options additional options (same as **) 

568 @return converted string 

569 """ 

570 if not os.path.exists(folder): 

571 raise FileNotFoundError(folder) 


573 new_options = {} 

574 new_options.update(options) 


576 def update_builder(builder): 

577 builder.outdir = folder 


579 rst = rst2html(rststring, writer="rst", document_name="example", 

580 update_builder=update_builder, layout="sphinx", 

581 **new_options) 

582 return rst