Coverage for pyquickhelper/helpgen/rst_converters.py: 84%

213 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1""" 

2@file 

3@brief Helpers to convert docstring to various format. 

4""" 

5import re 

6import textwrap 

7import os 

8from io import StringIO 

9from docutils import core, languages 

10from docutils.io import StringInput, StringOutput 

11from .utils_sphinx_doc import migrating_doxygen_doc 

12from .helpgen_exceptions import HelpGenConvertError 

13from ..texthelper.texts_language import TITLES 

14from ..loghelper.flog import noLOG 

15 

16 

17def default_sphinx_options(fLOG=noLOG, **options): 

18 """ 

19 Defines or overrides default options for :epkg:`Sphinx`, listed below. 

20 

21 .. runpython:: 

22 

23 from pyquickhelper.helpgen.rst_converters import default_sphinx_options 

24 options = default_sphinx_options() 

25 for k, v in sorted(options.items()): 

26 print("{0} = {1}".format(k, v)) 

27 

28 :epkg:`latex` is not available on :epkg:`Windows`. 

29 """ 

30 # delayed import to speed up time 

31 from .conf_path_tools import find_graphviz_dot, find_dvipng_path 

32 

33 res = { # 'output_encoding': options.get('output_encoding', 'unicode'), 

34 # 'doctitle_xform': options.get('doctitle_xform', True), 

35 # 'initial_header_level': options.get('initial_header_level', 2), 

36 # 'input_encoding': options.get('input_encoding', 'utf-8-sig'), 

37 'blog_background': options.get('blog_background', False), 

38 'sharepost': options.get('sharepost', None), 

39 'todoext_link_only': options.get('todoext_link_only', False), 

40 'mathdef_link_only': options.get('mathdef_link_only', True), 

41 'blocref_link_only': options.get('blocref_link_only', False), 

42 'faqref_link_only': options.get('faqref_link_only', False), 

43 'nbref_link_only': options.get('nbref_link_only', False), 

44 'todo_link_only': options.get('todo_link_only', False), 

45 'language': options.get('language', 'en'), 

46 # 'outdir': options.get('outdir', '.'), 

47 # 'imagedir': options.get('imagedir', '.'), 

48 # 'confdir': options.get('confdir', '.'), 

49 # 'doctreedir': options.get('doctreedir', '.'), 

50 'math_number_all': options.get('math_number_all', False), 

51 # graphviz 

52 'graphviz_output_format': options.get('graphviz_output_format', 'png'), 

53 'graphviz_dot': options.get('graphviz_dot', find_graphviz_dot(exc=False)), 

54 # latex 

55 'imgmath_image_format': options.get('imgmath_image_format', 'png'), 

56 # containers 

57 'out_blogpostlist': [], 

58 'out_runpythonlist': [], 

59 # 'warning_stream': StringIO(), 

60 } 

61 

62 if res['imgmath_image_format'] == 'png': 

63 try: 

64 imgmath_latex, imgmath_dvipng, imgmath_dvisvgm = find_dvipng_path( 

65 exc=False) 

66 has_latex = True 

67 except FileNotFoundError: 

68 # miktex is not available, 

69 has_latex = False 

70 

71 if has_latex: 

72 res['imgmath_latex'] = imgmath_latex 

73 res['imgmath_dvipng'] = imgmath_dvipng 

74 res['imgmath_dvisvgm'] = imgmath_dvisvgm 

75 

76 for k, v in options.items(): 

77 if k not in res: 

78 res[k] = v 

79 

80 return res 

81 

82 

83def rst2html(s, fLOG=noLOG, writer="html", keep_warnings=False, 

84 directives=None, language="en", 

85 layout='docutils', document_name="<<string>>", 

86 external_docnames=None, filter_nodes=None, 

87 new_extensions=None, update_builder=None, 

88 ret_doctree=False, destination=None, destination_path=None, 

89 **options): 

90 """ 

91 Converts a string from :epkg:`RST` 

92 into :epkg:`HTML` format or transformed :epkg:`RST`. 

93 

94 @param s string to convert 

95 @param fLOG logging function (warnings will be logged) 

96 @param writer ``'html'`` for :epkg:`HTML` format, 

97 ``'rst'`` for :epkg:`RST` format, 

98 ``'md'`` for :epkg:`MD` format, 

99 ``'elatex'`` for :epkg:`latex` format, 

100 ``'doctree'`` to get the doctree, *writer* can also be a tuple 

101 for custom formats and must be like ``('buider_name', builder_class)``. 

102 @param keep_warnings keep_warnings in the final HTML 

103 @param directives new directives to add (see below) 

104 @param language language 

105 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. 

106 @param document_name document name, not really important since the input is a string 

107 @param external_docnames if the string to parse makes references to other documents, 

108 if one is missing, an exception is raised. 

109 @param filter_nodes transforms the doctree before writing the results (layout must be 'sphinx'), 

110 the function takes a doctree as a single parameter 

111 @param new_extensions additional extension to setup 

112 @param update_builder update the builder after it is instantiated 

113 @param ret_doctree returns the doctree 

114 @param destination set a destination (requires for some extension) 

115 @param destination_path set a destination path (requires for some extension) 

116 @param options :epkg:`Sphinx` options see 

117 `Render math as images <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_, 

118 a subset of options is used, see @see fn default_sphinx_options. 

119 By default, the theme (option *html_theme*) will ``'basic'``. 

120 @return HTML format 

121 

122 *directives* is None or a list of 2 or 5-uple: 

123 

124 * a directive name (mandatory) 

125 * a directive class: see `Sphinx Directive 

126 <https://www.sphinx-doc.org/en/master/development/tutorials/helloworld.html>`_, 

127 see also @see cl RunPythonDirective as an example (mandatory) 

128 * a docutils node: see @see cl runpython_node as an example 

129 * two functions: see @see fn visit_runpython_node, @see fn depart_runpython_node as an example 

130 

131 The parameter *layout* specify the kind of HTML you need. 

132 

133 * ``'docutils'``: very simple :epkg:`HTML`, style is not included, recursive 

134 directives are not processed (recursive means they modify the doctree). 

135 The produced :epkg:`HTML` only includes the body (no :epkg:`HTML` header). 

136 * ``'sphinx'``: in memory :epkg:`sphinx`, the produced :epkg:`HTML` includes the header, it is also recursive 

137 as directives can modify the doctree. 

138 * ``'sphinx_body'``: same as ``'sphinx'`` but only the body is returned. 

139 

140 If the writer is a tuple, it must be a 2-uple ``(builder_name, builder_class)``. 

141 However, the builder class must contain an attribute ``_writer_class`` with 

142 the associated writer. The builcer class must also implement a method 

143 ``iter_pages`` which enumerates all written pages: 

144 ``def iter_pages(self) -> Dict[str,str]`` where the key is the document name 

145 and the value is its content. 

146 

147 .. exref:: 

148 :title: How to test a Sphinx directive? 

149 

150 The following code defines a simple directive 

151 definedbased on an existing one. 

152 It also defined what to do if a new node 

153 is inserted in the documentation. 

154 

155 :: 

156 

157 from docutils import nodes 

158 from pyquickhelper.helpgen import rst2html 

159 

160 class runpythonthis_node(nodes.Structural, nodes.Element): 

161 pass 

162 

163 class RunPythonThisDirective (RunPythonDirective): 

164 runpython_class = runpythonthis_node 

165 

166 def visit_node(self, node): 

167 self.body.append("<p><b>visit_node</b></p>") 

168 def depart_node(self, node): 

169 self.body.append("<p><b>depart_node</b></p>") 

170 

171 content = ''' 

172 test a directive 

173 ================ 

174 

175 .. runpythonthis:: 

176 

177 print("this code shoud appear" + "___") 

178 '''.replace(" ", "") 

179 # to remove spaces at the beginning of the line 

180 

181 tives = [ ("runpythonthis", RunPythonThisDirective, 

182 runpythonthis_node, visit_node, depart_node) ] 

183 

184 html = rst2html(content, writer="html", keep_warnings=True, 

185 directives=tives) 

186 

187 Unfortunately, this functionality is only tested on :epkg:`Python` 3. 

188 It might not work on :epkg:`Python` 2.7. 

189 The function produces files if the document contains latex 

190 converted into image. 

191 

192 .. faqref:: 

193 :title: How to get more about latex errors? 

194 :index: latex 

195 

196 :epkg:`Sphinx` is not easy to use when it comes to debug latex expressions. 

197 I did not find an easy way to read the error returned by latex about 

198 a missing bracket or an unknown command. I finally added a short piece 

199 of code in ``sphinx.ext.imgmath.py`` just after the call to 

200 the executable indicated by *imgmath_latex* 

201 

202 :: 

203 

204 if b'...' in stdout or b'LaTeX Error' in stdout: 

205 print(self.builder.config.imgmath_latex_preamble) 

206 print(p.returncode) 

207 print("################") 

208 print(latex) 

209 print("..........") 

210 print(stdout.decode("ascii").replace("\\r", "")) 

211 print("-----") 

212 print(stderr) 

213 

214 It displays the output if an error happened. 

215 

216 .. faqref:: 

217 :title: How to hide command line window while compiling latex? 

218 :lid: command line window 

219 

220 :epkg:`Sphinx` calls :epkg:`latex` through command line. 

221 On :epkg:`Windows`, a command line window 

222 can annoyingly show up anytime a formula is compiled. 

223 The following can be added to hide it: 

224 

225 :: 

226 

227 startupinfo = STARTUPINFO() 

228 startupinfo.dwFlags |= STARTF_USESHOWWINDOW 

229 

230 And ``, startupinfo=startupinfo`` must be added to lines ``p = Popen(...``. 

231 

232 By default, the function now interprets :epkg:`Sphinx` 

233 directives and not only *docutils* ones. 

234 Parameter *directives* adds a directive 

235 before parsing the :epkg:`RST`. 

236 The function is more consistent. 

237 Format ``rst`` is available as well as 

238 custom builders. 

239 New nodes are optional in *directives*. 

240 Markdown format was added. 

241 """ 

242 # delayed import to speed up time 

243 def _get_MockSphinxApp(): 

244 from .sphinxm_mock_app import MockSphinxApp 

245 return MockSphinxApp 

246 MockSphinxApp = _get_MockSphinxApp() 

247 

248 if 'html_theme' not in options: 

249 options['html_theme'] = 'basic' 

250 defopt = default_sphinx_options(**options) 

251 if "master_doc" not in defopt: 

252 defopt["master_doc"] = document_name 

253 if writer in ('latex', 'elatex') and 'latex_documents' not in defopt: 

254 latex_documents = [(document_name, ) * 5] 

255 defopt['latex_documents'] = latex_documents 

256 

257 if writer in ["custom", "sphinx", "HTMLWriterWithCustomDirectives", "html"]: 

258 mockapp, writer, title_names = MockSphinxApp.create( 

259 "sphinx", directives, confoverrides=defopt, 

260 new_extensions=new_extensions, 

261 fLOG=fLOG, destination_path=destination_path) 

262 writer_name = "HTMLWriterWithCustomDirectives" 

263 elif writer in ("rst", "md", "latex", "elatex", 'text', 'doctree'): 

264 writer_name = writer 

265 mockapp, writer, title_names = MockSphinxApp.create( 

266 writer, directives, confoverrides=defopt, 

267 new_extensions=new_extensions, 

268 fLOG=fLOG, destination_path=destination_path) 

269 elif isinstance(writer, tuple): 

270 # We extect something like ("builder_name", builder_class) 

271 writer_name = writer 

272 mockapp, writer, title_names = MockSphinxApp.create( 

273 writer, directives, confoverrides=defopt, 

274 new_extensions=new_extensions, 

275 fLOG=fLOG, destination_path=destination_path) 

276 else: 

277 raise ValueError( 

278 f"Unexpected writer '{writer}', should be 'rst' or 'html' or 'md' or 'elatex' or 'text'.") 

279 

280 if writer is None and directives is not None and len(directives) > 0: 

281 raise NotImplementedError( 

282 "The writer must not be null if custom directives will be added, check the documentation of the fucntion.") 

283 

284 # delayed import to speed up time 

285 from sphinx.environment import default_settings 

286 settings_overrides = default_settings.copy() 

287 settings_overrides["warning_stream"] = StringIO() 

288 settings_overrides["master_doc"] = document_name 

289 settings_overrides["source"] = document_name 

290 settings_overrides["contentsname"] = document_name 

291 settings_overrides.update({k: v[0] 

292 for k, v in mockapp.new_options.items()}) 

293 

294 # next 

295 settings_overrides.update(defopt) 

296 config = mockapp.config 

297 config.blog_background = True 

298 config.blog_background_page = False 

299 config.sharepost = None 

300 

301 if hasattr(writer, "add_configuration_options"): 

302 writer.add_configuration_options(mockapp.new_options) 

303 for k in {'outdir', 'imagedir', 'confdir', 'doctreedir'}: 

304 setattr(writer.builder, k, settings_overrides.get(k, '')) 

305 if destination_path is not None: 

306 writer.builder.outdir = destination_path 

307 if update_builder: 

308 update_builder(writer.builder) 

309 

310 env = mockapp.env 

311 if env is None: 

312 raise ValueError("No environment was built.") 

313 

314 env.temp_data["docname"] = document_name 

315 env.temp_data["source"] = document_name 

316 if mockapp.builder.env is None: 

317 mockapp.builder.env = env 

318 else: 

319 mockapp.builder.env.temp_data["docname"] = document_name 

320 mockapp.builder.env.temp_data["source"] = document_name 

321 settings_overrides["env"] = env 

322 

323 lang = languages.get_language(language) 

324 for name in title_names: 

325 if name not in lang.labels: 

326 lang.labels[name] = TITLES[language][name] 

327 

328 for k, v in sorted(settings_overrides.items()): 

329 fLOG(f"[rst2html] {k}={v}{' --- added' if hasattr(config, k) else ''}") 

330 for k, v in sorted(settings_overrides.items()): 

331 if hasattr(writer.builder.config, k) and writer.builder.config[k] != v: 

332 writer.builder.config[k] = v 

333 

334 _, pub = core.publish_programmatically( 

335 source=s, source_path=None, destination_path=destination_path, writer=writer, 

336 writer_name=writer_name, settings_overrides=settings_overrides, 

337 source_class=StringInput, destination_class=StringOutput, 

338 destination=destination, reader=None, reader_name='standalone', 

339 parser=None, parser_name='restructuredtext', settings=None, 

340 settings_spec=None, config_section=None, enable_exit_status=False) 

341 

342 doctree = pub.document 

343 

344 if filter_nodes is not None: 

345 if layout == "docutils" and writer != "doctree": 

346 raise ValueError( 

347 "filter_nodes is not None, layout must not be 'docutils'") 

348 filter_nodes(doctree) 

349 

350 mockapp.finalize(doctree, external_docnames=external_docnames) 

351 parts = pub.writer.parts 

352 

353 if not keep_warnings: 

354 if isinstance(parts["whole"], list): 

355 # Not html. 

356 exp = "".join(parts["whole"]) 

357 else: 

358 exp = re.sub( 

359 '(<div class="system-message">(.|\\n)*?</div>)', "", parts["whole"]) 

360 else: 

361 if isinstance(parts["whole"], list): 

362 exp = "".join(parts["whole"]) 

363 else: 

364 exp = parts["whole"] 

365 

366 if ret_doctree: 

367 return doctree 

368 

369 if layout == "docutils": 

370 return exp 

371 else: 

372 page = None 

373 pages = [] 

374 main = (f"/{document_name}.m.html", 

375 f"/{document_name}.m.{writer_name}", 

376 document_name) 

377 if not hasattr(writer.builder, "iter_pages"): 

378 raise AttributeError( 

379 f"Class '{writer.builder}' must have a method 'iter_pages' which returns a dictionary.") 

380 contents = [] 

381 for k, v in writer.builder.iter_pages(): 

382 pages.append(k) 

383 contents.append(v) 

384 if k in main: 

385 page = v 

386 break 

387 if page is None and len(contents) == 1: 

388 page = contents[0] 

389 if page is None: 

390 raise ValueError( 

391 f"No page contents was produced, only '{pages}'.") 

392 if layout == "sphinx": 

393 if isinstance(page, str): 

394 return page 

395 else: 

396 return "\n".join(page) 

397 elif layout == "sphinx_body": 

398 lines = page.replace('</head>', '</head>\n').split("\n") 

399 keep = [] 

400 begin = False 

401 for line in lines: 

402 s = line.strip(" \n\r") 

403 if s == "</body>": 

404 begin = False 

405 if begin: 

406 keep.append(line) 

407 if s == "<body>": 

408 begin = True 

409 res = "\n".join(keep) 

410 return res 

411 else: 

412 raise ValueError( 

413 f"Unexpected value for layout '{layout}'") 

414 

415 

416def correct_indentation(text): 

417 """ 

418 Tries to improve the indentation before running :epkg:`docutils`. 

419 

420 @param text text to correct 

421 @return corrected text 

422 """ 

423 title = {} 

424 rows = text.split("\n") 

425 for row in rows: 

426 row = row.replace("\t", " ") 

427 cr = row.lstrip() 

428 ind = len(row) - len(cr) 

429 

430 tit = cr.strip("\r\n\t ") 

431 if len(tit) > 0 and tit[0] in "-+=*^" and tit == tit[0] * len(tit): 

432 title[ind] = title.get(ind, 0) + 1 

433 

434 if len(title) > 0: 

435 mint = min(title.keys()) 

436 else: 

437 mint = 0 

438 if mint > 0: 

439 newrows = [] 

440 for row in rows: 

441 i = 0 

442 while i < len(row) and row[i] == ' ': 

443 i += 1 

444 

445 rem = min(i, mint) 

446 if rem > 0: 

447 newrows.append(row[rem:]) 

448 else: 

449 newrows.append(row) 

450 

451 return "\n".join(newrows) 

452 else: 

453 return text 

454 

455 

456def docstring2html(function_or_string, format="html", fLOG=noLOG, writer="html", 

457 keep_warnings=False, directives=None, language="en", 

458 layout='docutils', document_name="<<string>>", 

459 filter_nodes=None, **options): 

460 """ 

461 Converts a docstring into a :epkg:`HTML` format. 

462 

463 @param function_or_string function, class, method or doctring 

464 @param format output format (``'html'`` or '``rawhtml``') 

465 @param fLOG logging function 

466 @param writer ``'html'`` for :epkg:`HTML` format, 

467 ``'rst'`` for :epkg:`RST` format, 

468 ``'md'`` for :epkg:`MD` format 

469 @param keep_warnings keep_warnings in the final :epkg:`HTML` 

470 @param directives new directives to add (see below) 

471 @param language language 

472 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. 

473 @param document_name document_name for this string 

474 @param filter_nodes transform the doctree before writing the results 

475 (layout must be 'sphinx') 

476 @param options Sphinx options see `Render math as images 

477 <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_, 

478 a subset of options is used, see @see fn default_sphinx_options. 

479 By default, the theme (option *html_theme*) will ``'basic'``. 

480 @return (str) :epkg:`HTML` format or (IPython.core.display.HTML) 

481 

482 .. exref:: 

483 :title: Produce HTML documentation for a function or class 

484 

485 The following code can display the dosstring in :epkg:`HTML` format 

486 to display it in a :epkg:`notebook`. 

487 

488 :: 

489 

490 from pyquickhelper.helpgen import docstring2html 

491 import sklearn.linear_model 

492 docstring2html(sklearn.linear_model.LogisticRegression) 

493 

494 The output format is defined by: 

495 

496 * ``'html'``: IPython :epkg:`HTML` object 

497 * ``'rawhtml'``: :epkg:`HTML` as text + style 

498 * ``'rst'``: :epkg:`rst` 

499 * ``'text'``: raw text 

500 """ 

501 if not isinstance(function_or_string, str): 

502 doc = function_or_string.__doc__ 

503 else: 

504 doc = function_or_string 

505 

506 if format == "text": 

507 return doc 

508 

509 if doc is None: 

510 return "" 

511 

512 javadoc = migrating_doxygen_doc(doc, "None", log=False)[1] 

513 rows = javadoc.split("\n") 

514 from .utils_sphinx_doc import _private_migrating_doxygen_doc 

515 rst = _private_migrating_doxygen_doc( 

516 rows, index_first_line=0, filename="None") 

517 rst = "\n".join(rst) 

518 ded = textwrap.dedent(rst) 

519 

520 try: 

521 html = rst2html(ded, fLOG=fLOG, writer=writer, 

522 keep_warnings=keep_warnings, directives=directives, 

523 language=language, filter_nodes=filter_nodes, 

524 document_name=document_name, 

525 layout=layout, **options) 

526 except Exception: 

527 # we check the indentation 

528 ded = correct_indentation(ded) 

529 try: 

530 html = rst2html(ded, fLOG=fLOG, writer=writer, 

531 keep_warnings=keep_warnings, directives=directives, 

532 language=language, filter_nodes=filter_nodes, 

533 document_name=document_name, 

534 layout=layout, **options) 

535 except Exception as e: 

536 lines = ded.split("\n") 

537 lines = ["%04d %s" % (i + 1, _.strip("\n\r")) 

538 for i, _ in enumerate(lines)] 

539 raise HelpGenConvertError( 

540 "Unable to process:\n{0}".format("\n".join(lines))) from e 

541 

542 ret_doctree = writer == "doctree" 

543 if ret_doctree: 

544 writer = "doctree" 

545 

546 if writer in ('doctree', 'rst', 'md'): 

547 return html 

548 

549 if format == "html": 

550 from IPython.core.display import HTML 

551 return HTML(html) 

552 if format in ("rawhtml", 'rst', 'md', 'doctree'): 

553 return html 

554 raise ValueError( 

555 "Unexpected format: '{}', should be html, rawhtml, text, rst, " 

556 "md, doctree.".format(format)) 

557 

558 

559def rst2rst_folder(rststring, folder, document_name="index", **options): 

560 """ 

561 Converts a :epkg:`RST` string into simplified :epkg:`RST`. 

562 

563 @param rststring :epkg:`rst` string 

564 @param folder the builder needs to write the resuts in a 

565 folder defined by this parameter 

566 @param document_name main document 

567 @param options additional options (same as *conf.py*) 

568 @return converted string 

569 """ 

570 if not os.path.exists(folder): 

571 raise FileNotFoundError(folder) 

572 

573 new_options = {} 

574 new_options.update(options) 

575 

576 def update_builder(builder): 

577 builder.outdir = folder 

578 

579 rst = rst2html(rststring, writer="rst", document_name="example", 

580 update_builder=update_builder, layout="sphinx", 

581 **new_options) 

582 return rst