Coverage for pyquickhelper/helpgen/post_process.py: 91%

413 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Contains the main function to generate the documentation 

5for a module designed the same way as this one, @see fn generate_help_sphinx. 

6""" 

7import os 

8import re 

9import warnings 

10import glob 

11from .utils_sphinx_doc_helpers import HelpGenException 

12 

13 

14template_examples = """ 

15 

16List of programs 

17++++++++++++++++ 

18 

19.. toctree:: 

20 :maxdepth: 2 

21 

22.. autosummary:: __init__.py 

23 :toctree: %s/ 

24 :template: modules.rst 

25 

26Another list 

27++++++++++++ 

28 

29""" 

30 

31 

32def update_notebook_link(text, format, nblinks, fLOG): 

33 """ 

34 A notebook can contain a link ``[anchor](find://...)`` 

35 and it will be converted into: ``:ref:...`` in rst format. 

36 

37 @param text text to look into 

38 @param format format 

39 @param nblinks list of mappings *(reference: url)* 

40 @param fLOG logging function 

41 @return modified text 

42 """ 

43 def get_url_from_nblinks(nblinks, url, format): 

44 if isinstance(nblinks, dict): 

45 if (url, format) in nblinks: 

46 url = nblinks[url, format] 

47 elif url in nblinks: 

48 url = nblinks[url] 

49 if url.startswith("find://"): 

50 short = url[7:] 

51 if (short, format) in nblinks: 

52 url = nblinks[short, format] 

53 elif short in nblinks: 

54 url = nblinks[short] 

55 else: 

56 url = nblinks(url, format) 

57 if url.startswith("find://"): 

58 if format == 'python': 

59 url = url[7:] 

60 else: # pragma: no cover 

61 snb = "\n".join(f"'{k}': '{v}'" 

62 for k, v in sorted(nblinks.items())) 

63 extension = ( 

64 "You shoud add links into variable 'nblinks' " 

65 "into documentation configuration file.") 

66 extension += f"\nnblinks={nblinks}" 

67 raise HelpGenException( 

68 "Unable to find a replacement for '{0}' format='{1}' in \n{2}\n{3}".format( 

69 url, format, snb, extension)) 

70 return url 

71 

72 if nblinks is None: 

73 nblinks = {} 

74 if format == "rst": 

75 def reprst(le): 

76 anc, url = le.groups() 

77 url = get_url_from_nblinks(nblinks, url, format) 

78 if "://" in url: 

79 new_url = f"`{anc} <{url}>`_" 

80 else: 

81 new_url = f":ref:`{anc} <{url}>`" 

82 if fLOG: 

83 fLOG(" [update_notebook_link]1 add in ", 

84 format, ":", new_url) 

85 return new_url 

86 reg = re.compile("`([^`]+?) <find://([^`<>]+?)>`_") 

87 new_text = reg.sub(reprst, text) 

88 elif format in ("html", "slides", "slides2"): 

89 def rephtml(le): 

90 anc, url = le.groups() 

91 url = get_url_from_nblinks(nblinks, url, format) 

92 new_url = f"<a href=\"{anc}.html\">{url}</a>" 

93 if fLOG: 

94 fLOG(" [update_notebook_link]2 add in ", 

95 format, ":", new_url) 

96 return new_url 

97 reg = re.compile("<a href=\\\"find://([^\\\"]+?)\\\">([^`<>]+?)</a>") 

98 new_text = reg.sub(rephtml, text) 

99 elif format in ("ipynb", "python"): 

100 def repipy(le): 

101 anc, url = le.groups() 

102 url = get_url_from_nblinks(nblinks, "find://" + url, format) 

103 if not url.startswith("http"): 

104 mes = "\n".join(f"{k}: '{v}'" 

105 for k, v in sorted(nblinks.items())) 

106 extension = "You should add this link into the documentation " \ 

107 "configuration file in variable 'nblinks'." 

108 raise HelpGenException( # pragma: no cover 

109 "A reference was not found: '{0}' - '{1}' " 

110 "format={2}, nblinks=\n{3}\n{4}".format( 

111 anc, url, format, mes, extension)) 

112 new_url = f"[{anc}]({url})" 

113 if fLOG: 

114 fLOG(" [update_notebook_link]3 add in ", 

115 format, ":", new_url) 

116 return new_url 

117 reg = re.compile("[\\[]([^[]+?)[\\]][(]find://([^ ]+)[)]") 

118 new_text = reg.sub(repipy, text) 

119 elif format in ("latex", "elatex"): 

120 def replat(le): 

121 url, anc = le.groups() 

122 url = get_url_from_nblinks(nblinks, url, format) 

123 if not url.endswith(".html") and not url.endswith(".js") and not url.endswith(".css"): 

124 url += ".html" 

125 new_url = f"\\href{{{url}}}{{{anc}}}" 

126 if fLOG: 

127 fLOG(" [update_notebook_link]4 add in ", 

128 format, ":", new_url) 

129 return new_url 

130 reg = re.compile("\\\\href{find://([^{} ]+?)}{([^{}]+)}") 

131 new_text = reg.sub(replat, text) 

132 # {\hyperref[\detokenize{c_classes/classes:chap-classe}] 

133 # {\sphinxcrossref{\DUrole{std,std-ref}{Classes}}}} 

134 else: 

135 raise NotImplementedError( # pragma: no cover 

136 f"Unsupported format '{format}'\n{text}") 

137 return new_text 

138 

139 

140def _notebook_replacements(nbtext, notebook_replacements, fLOG=None): 

141 """ 

142 Makes some replacements in a notebook. 

143 

144 @param nbtext text to process 

145 @param notebook_replacements dictionary of replacements 

146 @param fLOG logging function 

147 @return text 

148 """ 

149 if notebook_replacements is None: 

150 return nbtext 

151 for k, v in notebook_replacements: 

152 if k in nbtext: 

153 fLOG( 

154 f"[_notebook_replacements] replace '{k}' -> '{v}'") 

155 nbtext = nbtext.replace(k, v) 

156 if '"nbformat": 4,' in nbtext: # pragma: no cover 

157 rep = ['"nbformat_minor": 0', '"nbformat_minor": 1', 

158 '"nbformat_minor": 2'] 

159 for r in rep: 

160 if r in nbtext: 

161 nbtext = nbtext.replace(r, '"nbformat_minor": 4') 

162 return nbtext 

163 

164 

165def post_process_latex_output(root, doall, latex_book=False, exc=True, 

166 custom_latex_processing=None, nblinks=None, 

167 remove_unicode=True, fLOG=None, notebook_replacements=None): 

168 """ 

169 Postprocesses the latex file produced by :epkg:`sphinx`. 

170 

171 @param root root path or latex file to process 

172 @param doall do all transformations 

173 @param latex_book customized for a book 

174 @param exc raises an exception or a warning 

175 @param custom_latex_processing function which does some post processing of the full latex file 

176 @param nblinks dictionary ``{ ref : url }`` where to look for references 

177 @param remove_unicode remove unicode characters (fails with latex) 

178 @param notebook_replacements string replacement in notebooks 

179 @param fLOG logging function 

180 """ 

181 if os.path.isfile(root): 

182 file = root 

183 if fLOG: 

184 fLOG(f"[post_process_latex_output] clean {file!r}") 

185 with open(file, "r", encoding="utf8") as f: 

186 content = f.read() 

187 with open(file + ".tex1~", "w", encoding="utf8") as f: 

188 f.write(content) 

189 content = post_process_latex( 

190 content, doall, latex_book=latex_book, exc=exc, 

191 custom_latex_processing=custom_latex_processing, nblinks=nblinks, 

192 file=file, remove_unicode=remove_unicode, fLOG=fLOG, 

193 notebook_replacements=notebook_replacements) 

194 with open(file, "w", encoding="utf8") as f: 

195 f.write(content) 

196 else: # pragma: no cover 

197 build = os.path.join(root, "_doc", "sphinxdoc", "build", "latex") 

198 if not os.path.exists(build): 

199 raise FileNotFoundError(build) 

200 for tex in os.listdir(build): 

201 if tex.endswith(".tex"): 

202 file = os.path.join(build, tex) 

203 fLOG("[post_process_latex_output] modify file", file) 

204 with open(file, "r", encoding="utf8") as f: 

205 content = f.read() 

206 with open(file + ".tex2~", "w", encoding="utf8") as f: 

207 f.write(content) 

208 content = post_process_latex( 

209 content, doall, info=file, latex_book=latex_book, exc=exc, 

210 custom_latex_processing=custom_latex_processing, nblinks=nblinks, 

211 file=file, remove_unicode=remove_unicode, fLOG=fLOG, 

212 notebook_replacements=notebook_replacements) 

213 with open(file, "w", encoding="utf8") as f: 

214 f.write(content) 

215 

216 

217def post_process_python_output(root, doall, exc=True, nblinks=None, fLOG=None, notebook_replacements=None): 

218 """ 

219 Postprocesses the python file produced by :epkg:`sphinx`. 

220 

221 @param root root path or python file to process 

222 @param doall unused 

223 @param exc raise an exception if needed 

224 @param nblinks dictionary ``{ref: url}`` 

225 @param notebook_replacements string replacement in notebooks 

226 @param fLOG logging function 

227 """ 

228 if os.path.isfile(root): 

229 file = root 

230 if fLOG: 

231 fLOG(f"[post_process_python_output] clean {file!r}") 

232 with open(file, "r", encoding="utf8") as f: 

233 content = f.read() 

234 content = post_process_python( 

235 content, doall, nblinks=nblinks, file=file, fLOG=fLOG, 

236 notebook_replacements=notebook_replacements) 

237 with open(file, "w", encoding="utf8") as f: 

238 f.write(content) 

239 else: # pragma: no cover 

240 build = os.path.join(root, "_doc", "sphinxdoc", "build", "latex") 

241 if not os.path.exists(build) and exc: 

242 raise FileNotFoundError(build) 

243 for tex in os.listdir(build): 

244 if tex.endswith(".tex"): 

245 file = os.path.join(build, tex) 

246 fLOG("[post_process_python_output] modify file", file) 

247 with open(file, "r", encoding="utf8") as f: 

248 content = f.read() 

249 content = post_process_python( 

250 content, doall, info=file, nblinks=nblinks, file=file, fLOG=fLOG) 

251 with open(file, "w", encoding="utf8") as f: 

252 f.write(content) 

253 

254 

255def post_process_latex_output_any(file, custom_latex_processing, nblinks=None, 

256 remove_unicode=False, fLOG=None, notebook_replacements=None): 

257 """ 

258 Postprocesses the latex file produced by :epkg:`sphinx`. 

259 

260 @param file latex filename 

261 @param custom_latex_processing function which does some post processing of the full latex file 

262 @param nblinks dictionary ``{url: link}`` 

263 @param remove_unicode remove unicode characters 

264 @param notebook_replacements string replacement in notebooks 

265 @param fLOG logging function 

266 """ 

267 if fLOG: 

268 fLOG("[post_process_latex_output_any] ** post_process_latex_output_any ", file) 

269 if not os.path.exists(file): 

270 raise FileNotFoundError( # pragma: no cover 

271 "Unable to find '{}', other files in the same folder\n{}".format( 

272 file, "\n".join(os.listdir(os.path.dirname(file))))) 

273 with open(file, "r", encoding="utf8") as f: 

274 content = f.read() 

275 with open(file + f".tex3.u{1 if remove_unicode else 0}~", "w", encoding="utf8") as f: 

276 f.write(content) 

277 content = post_process_latex(content, True, info=file, nblinks=nblinks, file=file, 

278 remove_unicode=remove_unicode, fLOG=fLOG, 

279 notebook_replacements=notebook_replacements) 

280 with open(file, "w", encoding="utf8") as f: 

281 f.write(content) 

282 

283 

284def post_process_rst_output(file, html, pdf, python, slides, is_notebook=False, 

285 exc=True, github=False, notebook=None, nblinks=None, fLOG=None, 

286 notebook_replacements=None): 

287 """ 

288 Processes a :epkg:`rst` file generated from the conversion of a notebook. 

289 

290 @param file filename 

291 @param pdf if True, add a link to the :epkg:`pdf`, 

292 assuming it will exists at the same location 

293 @param html if True, add a link to the :epkg:`html` conversion 

294 @param python if True, add a link to the :epkg:`Python` conversion 

295 @param slides if True, add a link to the slides conversion 

296 @param is_notebook does something more if the file is a notebook 

297 @param exc raises an exception (True) or a warning (False) 

298 @param github add a link to the notebook on :epkg:`github` 

299 @param notebook location of the notebook, file might be a copy 

300 @param nblinks links added to a notebook, dictionary ``{ref: url}`` 

301 @param notebook_replacements string replacement in notebooks 

302 @param fLOG logging function 

303 

304 The function adds the following replacement 

305 ``st = st.replace("\\\\mathbb{1}", "\\\\mathbf{1\\\\!\\\\!1}")``. 

306 and checks that audio is only included in :epkg:`HTML`. 

307 """ 

308 if fLOG: 

309 fLOG(f"[post_process_rst_output] clean {file!r}") 

310 

311 name = os.path.split(file)[1] 

312 noext = os.path.splitext(name)[0] 

313 with open(file, "r", encoding="utf8") as f: 

314 lines = f.readlines() 

315 with open(file + "~", "w", encoding="utf8") as f: 

316 f.write("".join(lines)) 

317 

318 # Probably not the best way to fix that. 

319 # For some reason, nbconvert adds None as the first row. 

320 if lines[0] == 'None\n': 

321 lines[0] = '\n' # pragma: no cover 

322 

323 if any(line == 'None\n' for line in lines): 

324 raise HelpGenException( # pragma: no cover 

325 "One row unexpectedly contains only None in '{}'\n{}".format( 

326 file, "".join(lines[:20]))) 

327 

328 # Removes empty lines in inserted code, also adds line number. 

329 def startss(line): 

330 for b in ["::", ".. parsed-literal::", ".. code:: python", 

331 ".. code-block:: python"]: 

332 if line.startswith(b): 

333 return b 

334 return None 

335 

336 codeb = [".. code:: python", ".. code-block:: python"] 

337 inbloc = False 

338 for pos, line in enumerate(lines): 

339 if not inbloc: 

340 b = startss(line) 

341 if b is None: 

342 pass 

343 else: 

344 if b in codeb: 

345 # we remove line number for the notebooks 

346 if "notebook" not in file: 

347 lines[pos] = f"{codeb[-1]}\n :linenos:\n\n" 

348 else: 

349 lines[pos] = f"{codeb[-1]}\n\n" 

350 inbloc = True 

351 memopos = pos 

352 else: 

353 if len(line.strip(" \r\n")) == 0 and pos < len(lines) - 1 and \ 

354 lines[pos + 1].startswith(" ") and len(lines[pos + 1].strip(" \r\n")) > 0: 

355 lines[pos] = "" 

356 

357 elif not line.startswith(" ") and line != "\n": 

358 inbloc = False 

359 

360 if lines[memopos].startswith("::"): 

361 code = "".join( 

362 (_[4:] if _.startswith(" ") else _) for _ in lines[memopos + 1:pos]) 

363 if len(code) == 0: 

364 fLOG( # pragma: no cover 

365 "[post_process_rst_output] EMPTY-SECTION in ", file) 

366 else: 

367 try: 

368 cmp = compile(code, "", "exec") 

369 if cmp is not None: 

370 lines[memopos] = "{0}\n :linenos:\n".format( 

371 ".. code-block:: python") 

372 except Exception: # pragma: no cover 

373 pass 

374 

375 memopos = None 

376 

377 # code and images 

378 imgreg = re.compile("[.][.] image:: (.*)") 

379 for pos in range(0, len(lines)): 

380 # lines[pos] = lines[pos].replace(".. code:: python","::") 

381 if lines[pos].strip().startswith(".. image::"): 

382 # we assume every image should be placed in the same folder as the 

383 # notebook itself 

384 img = imgreg.findall(lines[pos]) 

385 if len(img) == 0: 

386 raise HelpGenException( # pragma: no cover 

387 f"Unable to extract image name in '{lines[pos]}'") 

388 nameimg = img[0] 

389 short = nameimg.replace("%5C", "/") 

390 short = os.path.split(short)[-1] 

391 lines[pos] = lines[pos].replace(nameimg, short) 

392 

393 # title 

394 for pos, line in enumerate(lines): 

395 line = line.strip("\n\r") 

396 if len(line) > 0 and line == "=" * len(line): 

397 # lines[pos] = lines[pos].replace("=", "*") 

398 pos2 = pos - 1 

399 li = len(lines[pos]) 

400 while len(lines[pos2]) != li: 

401 pos2 -= 1 

402 sep = "" if lines[pos2].endswith("\n") else "\n" 

403 lines[pos2] = f"{lines[pos]}{sep}{lines[pos2]}" 

404 for p in range(pos2 + 1, pos): 

405 if lines[p] == "\n": # pragma: no cover 

406 lines[p] = "" 

407 break 

408 

409 pos += 1 

410 if pos >= len(lines): 

411 mes = f"Unable to find a title in notebook '{file}'" 

412 if exc: 

413 raise HelpGenException(mes) # pragma: no cover 

414 warnings.warn(mes, UserWarning) 

415 

416 # label 

417 labelname = name.replace(" ", "").replace("_", "").replace( 

418 ":", "").replace(".", "").replace(",", "") 

419 label = f"\n.. _{labelname}:\n\n" 

420 lines.insert(0, label) 

421 

422 # links 

423 links = [f'**Links:** :download:`notebook <{noext}.ipynb>`'] 

424 if html: 

425 links.append(f':downloadlink:`html <{noext}2html.html>`') 

426 if pdf: 

427 links.append(f':download:`PDF <{noext}.pdf>`') 

428 if python: 

429 links.append(f':download:`python <{noext}.py>`') 

430 if slides: 

431 links.append(f':downloadlink:`slides <{noext}.slides.html>`') 

432 

433 if github: # pragma: no cover 

434 if notebook is None: 

435 raise ValueError( 

436 f"Cannot add a link on github, notebook is None for file='{file}'") 

437 docname = notebook 

438 folder = docname 

439 git = os.path.join(folder, ".git") 

440 while len(folder) > 0 and not os.path.exists(git): 

441 folder = os.path.split(folder)[0] 

442 git = os.path.join(folder, ".git") 

443 if len(folder) > 0: 

444 path = docname[len(folder):] 

445 tried = [] 

446 if path.strip('/\\').startswith('build'): 

447 # The notebook may be in a build folder but is not 

448 # the original notebook. The function does something 

449 # if the path starts with `build`. 

450 subfolds = os.listdir(folder) 

451 for sub in subfolds: 

452 fulls = os.path.join(folder, sub) 

453 if not os.path.isdir(fulls): 

454 continue 

455 if not ('_doc' in sub or 'notebook' in sub or 'example' in sub): 

456 continue 

457 # Search for another version of the file. 

458 last_name = os.path.split(docname)[-1] 

459 tried.append((last_name, fulls)) 

460 selected = glob.glob( 

461 fulls + "/**/" + last_name, recursive=True) 

462 if len(selected) == 1: 

463 docname = selected[0] 

464 path = docname[len(folder):] 

465 break 

466 if "blob/master/build" in path or "build/notebooks" in path: 

467 # raise RuntimeError( # pragma: no cover 

468 warnings.warn( # pragma: no cover 

469 "Unexpected substring found in %r in folder %r\n" 

470 "--TRIED--\n%r" % (path, folder, "\n".join(map(str, tried)))) 

471 links.append( 

472 ":githublink:`GitHub|{0}|*`".format(path.replace("\\", "/").lstrip("/"))) 

473 lines[pos] = f"{lines[pos]}\n\n.. only:: html\n\n {', '.join(links)}\n\n" 

474 

475 # we remove the 

476 # <div 

477 # style="position:absolute; 

478 # .... 

479 # </div> 

480 reg = re.compile( 

481 "([.]{2} raw[:]{2} html[\\n ]+<div[\\n ]+style=.?position:absolute;(.|\\n)*?[.]{2} raw[:]{2} html[\\n ]+</div>)") 

482 merged = "".join(lines) 

483 r = reg.findall(merged) 

484 if len(r) > 0: 

485 fLOG("[post_process_rst_output] *** remove div absolute in ", file) 

486 for spa in r: 

487 rep = spa[0] 

488 nbl = len(rep.split("\n")) 

489 merged = merged.replace(rep, "\n" * nbl) 

490 lines = [(_ + "\n") for _ in merged.split("\n")] 

491 

492 # bullets 

493 for pos, line in enumerate(lines): 

494 if pos == 0: 

495 continue 

496 if len(line) > 0 and (line.startswith("- ") or line.startswith("* ")) \ 

497 and pos < len(lines) - 1: 

498 next = lines[pos + 1] 

499 prev = lines[pos - 1] 

500 if (next.startswith("- ") or next.startswith("* ")) \ 

501 and not (prev.startswith("- ") or prev.startswith("* ")) \ 

502 and not prev.startswith(" "): 

503 lines[pos - 1] += "\n" 

504 elif line.startswith("- ") and next.startswith(" ") \ 

505 and not prev.startswith(" ") and not prev.startswith("- "): 

506 lines[pos - 1] += "\n" 

507 elif line.startswith("- "): 

508 pass 

509 

510 # remove last :: 

511 i = len(lines) 

512 for i in range(len(lines), 1, -1): 

513 s = lines[i - 1].strip(" \n\r") 

514 if len(s) != 0 and s != "::": 

515 break 

516 

517 if i < len(lines): 

518 del lines[i:] 

519 

520 # specific treatment for notebooks 

521 if is_notebook: 

522 # change links <#Alink --> <#alink 

523 reg = re.compile("(<#[A-Z][a-zA-Z0-9_+-]+>)") 

524 for i, line in enumerate(lines): 

525 r = reg.search(line) 

526 if r: 

527 memo = r.groups()[0] 

528 new_memo = "<#" + memo[2].lower() + memo[3:] 

529 new_memo = new_memo.replace("+", "") 

530 line = line.replace(memo, new_memo) 

531 lines[i] = line 

532 

533 # checking for find:// 

534 content = "".join(lines) 

535 content = update_notebook_link(content, "rst", nblinks=nblinks, fLOG=fLOG) 

536 if "find://" in content: 

537 raise HelpGenException( # pragma: no cover 

538 "find:// was found in '{0}'.\nYou should " 

539 "add or extend 'nblinks' in conf.py.".format(file)) 

540 

541 # notebooks replacements 

542 content = _notebook_replacements(content, notebook_replacements, fLOG) 

543 

544 # replaces the function 

545 content = content.replace("\\mathbb{1}", "\\mathbf{1\\!\\!1}") 

546 

547 with open(file, "w", encoding="utf8") as f: 

548 f.write(content) 

549 

550 

551def post_process_html_output(file, pdf, python, slides, exc=True, 

552 nblinks=None, fLOG=None, 

553 notebook_replacements=None): 

554 """ 

555 Processes a HTML file generated from the conversion of a notebook. 

556 

557 @param file filename 

558 @param pdf if True, add a link to the PDF, assuming it will exists 

559 at the same location 

560 @param python if True, add a link to the Python conversion 

561 @param slides if True, add a link to the slides conversion 

562 @param exc raises an exception (True) or a warning (False) 

563 @param nblinks dictionary ``{ref: url}`` 

564 @param notebook_replacements string replacement in notebooks 

565 @param fLOG logging function 

566 """ 

567 if not os.path.exists(file): 

568 raise FileNotFoundError(file) # pragma: no cover 

569 if fLOG: 

570 fLOG(f"[post_process_html_output] clean {file!r}") 

571 with open(file, "r", encoding="utf8") as f: 

572 text = f.read() 

573 

574 # mathjax 

575 text = text.replace( 

576 "https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML", 

577 "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML") 

578 

579 # notebook replacements 

580 if fLOG: 

581 fLOG("[post_process_html_output] nb:", notebook_replacements) 

582 text = _notebook_replacements(text, notebook_replacements, fLOG) 

583 

584 text = update_notebook_link(text, "html", nblinks=nblinks, fLOG=fLOG) 

585 if "find://" in text: 

586 raise HelpGenException( # pragma: no cover 

587 "find:// was found in '{0}'.\nYou should add " 

588 "or extend 'nblinks' in conf.py.".format(file)) 

589 

590 # js 

591 if fLOG: 

592 fLOG("[post_process_html_output] js: replacements") 

593 repl = { 

594 'https://unpkg.com/@jupyter-widgets/html-manager@^0.20.0/dist/embed-amd.js': 

595 '../_static/embed-amd.js', 

596 '</head>': '<script src="../_static/require.js"></script>\n</head>'} 

597 lines = text.split('\n') 

598 new_lines = [] 

599 for line in lines: 

600 if "https://cdnjs.cloudflare.com/ajax/libs/require.js" in line: 

601 if fLOG: 

602 fLOG(f"[post_process_html_output] js: skip {line!r}") 

603 continue 

604 new_lines.append(line) 

605 text = "\n".join(new_lines) 

606 for k, v in repl.items(): 

607 if k in text: 

608 if fLOG: # pragma: no cover 

609 fLOG(f"[post_process_html_output] js: replace {k!r} -> {v!r}") 

610 text = text.replace(k, v) 

611 

612 with open(file, "w", encoding="utf8") as f: 

613 f.write(text) 

614 

615 

616def post_process_slides_output(file, pdf, python, slides, exc=True, 

617 nblinks=None, fLOG=None, 

618 notebook_replacements=None): 

619 """ 

620 Processes a :epkg:`HTML` file generated from the conversion of a notebook. 

621 

622 @param file filename 

623 @param pdf if True, add a link to the PDF, assuming it will 

624 exists at the same location 

625 @param python if True, add a link to the Python conversion 

626 @param slides if True, add a link to the slides conversion 

627 @param exc raises an exception (True) or a warning (False) 

628 @param nblinks dictionary ``{ref: url}`` 

629 @param notebook_replacements string replacement in notebooks 

630 @param fLOG logging function 

631 """ 

632 if (len(file) > 5000 or not os.path.exists(file)) and "<html" in file: 

633 text = file # pragma: no cover 

634 save = False # pragma: no cover 

635 else: 

636 if not os.path.exists(file): 

637 raise FileNotFoundError(file) # pragma: no cover 

638 if fLOG: 

639 fLOG(f"[post_process_slides_output] clean {file!r}") 

640 # fold, name = os.path.split(file) 

641 with open(file, "r", encoding="utf8") as f: 

642 text = f.read() 

643 save = True 

644 

645 # reveal.js 

646 require = "require(" in text 

647 text = text.replace("reveal.js/dist/reveal.css", 

648 "reveal.js/css/reveal.css") 

649 text = text.replace("reveal.js/dist/reveal.js", 

650 "reveal.js/js/reveal.js") 

651 text = text.replace("reveal.js/dist/theme/simple.css", 

652 "reveal.js/css/theme/simple.css") 

653 text = text.replace("https://unpkg.com/@jupyter-widgets/html-manager@0.20.0/dist/embed-amd.js", 

654 "embed-amd.js") 

655 lines = text.split("\n") 

656 for i, line in enumerate(lines): 

657 if '<script src="reveal.js/lib/js/head.min.js"></script>' in line: 

658 lines[i] = ( 

659 '<script src="reveal.js/js/jquery.min.js"></script>\n' + lines[i]) 

660 if '<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>' in line: 

661 lines[i] = "" 

662 if '<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/' in line: 

663 lines[i] = "" 

664 if lines[i] == "</script>" and require: 

665 lines[i] += '\n<script src="require.js"></script>' 

666 require = False 

667 text = "\n".join(lines) 

668 

669 # mathjax 

670 text = text.replace("https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML", 

671 "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML") 

672 text = update_notebook_link(text, "slides", nblinks=nblinks, fLOG=fLOG) 

673 if "find://" in text: 

674 raise HelpGenException( # pragma: no cover 

675 "find:// was found in '{0}'.\nYou should add " 

676 "or extend 'nblinks' in conf.py.".format(file)) 

677 

678 # notebook replacements 

679 text = _notebook_replacements(text, notebook_replacements, fLOG) 

680 

681 if save: 

682 with open(file, "w", encoding="utf8") as f: 

683 f.write(text) 

684 return text 

685 

686 

687def post_process_latex(st, doall, info=None, latex_book=False, exc=True, 

688 custom_latex_processing=None, nblinks=None, file=None, 

689 remove_unicode=False, fLOG=None, notebook_replacements=None): 

690 """ 

691 Modifies a :epkg:`latex` file after its generation by :epkg:`sphinx`. 

692 

693 @param st string 

694 @param doall do all transformations 

695 @param info for more understandable error messages 

696 @param latex_book customized for a book 

697 @param exc raises an exception or a warning 

698 @param custom_latex_processing function which takes and returns a string, 

699 final post processing 

700 @param nblinks dictionary ``{ref: url}`` 

701 @param file only used when an exception is raised 

702 @param remove_unicode remove unicode character (fails when converting into PDF) 

703 @param notebook_replacements string replacement in notebooks 

704 @param fLOG logging function 

705 @return string 

706 

707 *SVG* included in a notebook (or in *RST* file) usually do not word. 

708 :epkg:`Inkscape` should be used to convert them into Latex. 

709 The function is less strict on the checking of `$`. 

710 The function replaces ``\\mathbb{1}`` by ``\\mathbf{1\\!\\!1}``. 

711 

712 .. index:: chinese characters, latex, unicode 

713 

714 .. faqref:: 

715 :title: Why a ¿ is showing the final PDF? 

716 

717 Unicode, chinese characters are an issue because the latex compiler 

718 prompts on those if the necessary packages are not installed. 

719 `pdflatex <https://en.wikipedia.org/w/index.php?title=PdfTeX&redirect=no>`_ 

720 does not accepts inline chinese 

721 characters, `xetex <https://en.wikipedia.org/wiki/XeTeX>`_ 

722 should be used instead: 

723 see `How to input Traditional Chinese in pdfLaTeX 

724 <http://tex.stackexchange.com/questions/200449/how-to-input-traditional-chinese-in-pdflatex>`_. 

725 Until this is being implemented, the unicode will unfortunately be removed 

726 in this function. 

727 """ 

728 if fLOG: 

729 fLOG("[post_process_latex] ** enter post_process_latex", 

730 doall, "%post_process_latex" in st) 

731 weird_character = set(chr(i) for i in range(1, 9)) 

732 

733 def clean_unicode(c): 

734 if c == "’": 

735 return "'" 

736 if c == "…": 

737 return "..." 

738 if ord(c) >= 255 or c in weird_character: 

739 return "\\textquestiondown " 

740 return c 

741 

742 def clean_line(line): 

743 if line.startswith("\\documentclass"): 

744 line = line.replace("{None}", "{report}") 

745 return line 

746 

747 lines = st.split("\n") 

748 lines = list(map(clean_line, lines)) 

749 st = "\n".join("".join(map(clean_unicode, line)) for line in lines) 

750 

751 # we count the number of times we have \$ (which is unexpected unless the 

752 # currency is used. 

753 dollar = st.split("\\$") 

754 if len(dollar) > 0 and ( 

755 info is None or os.path.splitext(info)[-1] != ".html"): 

756 # it could be an issue, for the time being, we raise 

757 # an exception if a formula is too long 

758 exp = re.compile(r"(.{200}[\\]\$\$)") 

759 found = 0 

760 records = [] 

761 for m in exp.finditer(st): 

762 found += 1 

763 p1, p2 = m.start(), m.end() 

764 sub = st[p1:p2].strip(" \r\n").replace( 

765 "\n", " ").replace("\r", "").replace("\t", " ") 

766 sub2 = sub[-10:] 

767 records.append((info, p1, p2, sub, sub2, "")) 

768 if len(records) > 0: # pragma: no cover 

769 messages = [str(i) + ":" + ("unexpected \\$ in a latex file:\n {0}\n" + 

770 "at position: {1},{2}\n substring: {3}\n " + 

771 "around: {4}\n temp=[{5}]").format(*rec) 

772 for i, rec in enumerate(records)] 

773 for mes in messages: 

774 warnings.warn(mes, UserWarning) 

775 

776 st = st.replace("<br />", "\\\\") 

777 st = st.replace("»", '"') 

778 st = st.replace("\\mathbb{1}", "\\mathbf{1\\!\\!1}") 

779 st = st.replace( 

780 "\\documentclass[11pt]{article}", "\\documentclass[10pt]{article}") 

781 

782 if not doall and not latex_book: 

783 st = st.replace( 

784 "\\maketitle", "\\maketitle\n\n\\newchapter{Introduction}") 

785 

786 st = st.replace("%5C", "/") \ 

787 .replace("%3A", ":") \ 

788 .replace("\\includegraphics{notebooks\\", "\\includegraphics {") 

789 st = st.replace( 

790 "\\begin{document}", "\\setlength{\\parindent}{0cm}%s\\begin {document}" % "\n") 

791 st = st.replace("DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}", 

792 "DefineVerbatimEnvironment{Highlighting}{Verbatim} {commandchars=\\\\\\{\\},fontsize=\\small}") 

793 st = st.replace("\\textquotesingle{}", "'") 

794 st = st.replace("\u0001", "\\u1") 

795 st = st.replace("\\begin{notice}{note}\\end{notice}", "") 

796 

797 # hyperref 

798 if doall and "%post_process_latex" not in st: 

799 st = "%post_process_latex\n" + st 

800 reg = re.compile("hyperref[\\[]([a-zA-Z0-9]+)[\\]][\\{](.*?)[\\}]") 

801 allhyp = reg.findall(st) 

802 sections = [] 

803 for id, section in allhyp: 

804 sec = r"\subsection{%s} \label{%s}" % (section, id) 

805 sections.append((id, section, sec)) 

806 elif not doall and not latex_book: 

807 sections = [] 

808 # first section 

809 lines = st.split("\n") 

810 for i, line in enumerate(lines): 

811 if "\\section" in line: 

812 lines[i] = "\\newchapter{Documentation}\n" + lines[i] 

813 break 

814 st = "\n".join(lines) 

815 else: 

816 sections = [] 

817 

818 if len(sections) > 0: 

819 lines = st.split("\n") 

820 for i, line in enumerate(lines): 

821 for _, section, sec in sections: 

822 if line.strip("\r\n ") == section: 

823 fLOG(" **", section, " --> ", sec) 

824 lines[i] = sec 

825 st = "\n".join(lines) 

826 

827 if not latex_book: 

828 st = st.replace("\\chapter", "\\section") 

829 st = st.replace("\\newchapter", "\\chapter") 

830 

831 comment_out = [ 

832 '\\usepackage{parskip}', 

833 '\\usepackage{fontspec}', 

834 '\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}\n', 

835 ] 

836 for co in comment_out: 

837 if co in st: 

838 st = st.replace(co, "%" + co) 

839 if "\\usepackage{multirow}" in st: 

840 st = st.replace( 

841 "\\usepackage{svg}\\usepackage{multirow}", 

842 "\\usepackage{multirow}\\usepackage{amssymb}\\usepackage{latexsym}\\usepackage{amsfonts}\\usepackage{ulem}\\usepackage{textcomp}") 

843 elif "\\usepackage{hyperref}" in st: 

844 st = st.replace( 

845 "\\usepackage{svg}\\usepackage{hyperref}", 

846 "\\usepackage{hyperref}\\usepackage{amssymb}\\usepackage{latexsym}\\usepackage{amsfonts}\\usepackage{ulem}\\usepackage{textcomp}") 

847 else: 

848 raise HelpGenException( # pragma: no cover 

849 f"unable to add new instructions usepackage in file {info}") 

850 

851 # SVG does not work unless it is converted (nbconvert should handle that 

852 # case) 

853 reg = re.compile("([\\\\]includegraphics[{].*?[.]svg[}])") 

854 fall = reg.findall(st) 

855 for found in fall: 

856 st = st.replace(found, "%" + found) 

857 

858 # fix references 

859 st = update_notebook_link(st, "latex", nblinks=nblinks, fLOG=fLOG) 

860 if "find://" in st: 

861 raise HelpGenException( # pragma: no cover 

862 "find:// was found in '{0}'\nYou should add or extend " 

863 "'nblinks' in conf.py.\n{1}".format(file, st)) 

864 

865 # notebook replacements 

866 st = _notebook_replacements(st, notebook_replacements, fLOG) 

867 

868 # end 

869 if custom_latex_processing is not None: 

870 st = custom_latex_processing(st) # pragma: no cover 

871 

872 if remove_unicode: 

873 encoding = 'ascii' 

874 else: 

875 encoding = 'utf-8' 

876 st0 = st 

877 bst = st.encode(encoding, errors='replace') 

878 st = bst.decode(encoding, errors='replace') 

879 if st0 != st and fLOG: 

880 fLOG("[post_process_latex] characters were removed for encoding", encoding) 

881 return st 

882 

883 

884def post_process_python(st, doall, info=None, nblinks=None, file=None, fLOG=None, notebook_replacements=None): 

885 """ 

886 Modifies a python file after its generation by :epkg:`sphinx`. 

887 

888 @param st string 

889 @param doall do all transformations 

890 @param info for more understandable error messages 

891 @param nblinks dictionary ``{ref: url}`` 

892 @param file used only when an exception is raised 

893 @param fLOG logging function 

894 @param notebook_replacements string replacement in notebooks 

895 @return string 

896 """ 

897 st = st.strip("\n \r\t") 

898 st = st.replace("# coding: utf-8", "# -*- coding: utf-8 -*-") 

899 st = update_notebook_link(st, "python", nblinks=nblinks, fLOG=fLOG) 

900 if "find://" in st: 

901 raise HelpGenException( # pragma: no cover 

902 "find:// was found in '{0}'.\nYou should add or extend " 

903 "'nblinks' in conf.py.".format(file)) 

904 

905 # notebook replacements 

906 st = _notebook_replacements(st, notebook_replacements, fLOG) 

907 

908 return st 

909 

910 

911def remove_character_under32(s): 

912 """ 

913 Removes :epkg:`ASCII` characters in *[0..31]*. 

914 

915 @param s string to process 

916 @return filtered string 

917 """ 

918 ls = "" 

919 for c in s: 

920 d = ord(c) 

921 if 0 <= d < 32: 

922 ls += " " 

923 else: 

924 ls += c 

925 return ls