Coverage for pyquickhelper/helpgen/post

1# -*- coding: utf-8 -*-

2"""

3@file

4@brief Contains the main function to generate the documentation

5for a module designed the same way as this one, @see fn generate_help_sphinx.

6"""

7import os

8import re

9import warnings

10import glob

11from .utils_sphinx_doc_helpers import HelpGenException

14template_examples = """

16List of programs

17++++++++++++++++

19.. toctree::

20 :maxdepth: 2

22.. autosummary:: __init__.py

23 :toctree: %s/

24 :template: modules.rst

26Another list

27++++++++++++

29"""

32def update_notebook_link(text, format, nblinks, fLOG):

33 """

34 A notebook can contain a link ``[anchor](find://...)``

35 and it will be converted into: ``:ref:...`` in rst format.

37 @param text text to look into

38 @param format format

39 @param nblinks list of mappings *(reference: url)*

40 @param fLOG logging function

41 @return modified text

42 """

43 def get_url_from_nblinks(nblinks, url, format):

44 if isinstance(nblinks, dict):

45 if (url, format) in nblinks:

46 url = nblinks[url, format]

47 elif url in nblinks:

48 url = nblinks[url]

49 if url.startswith("find://"):

50 short = url[7:]

51 if (short, format) in nblinks:

52 url = nblinks[short, format]

53 elif short in nblinks:

54 url = nblinks[short]

55 else:

56 url = nblinks(url, format)

57 if url.startswith("find://"):

58 if format == 'python':

59 url = url[7:]

60 else: # pragma: no cover

61 snb = "\n".join(f"'{k}': '{v}'"

62 for k, v in sorted(nblinks.items()))

63 extension = (

64 "You shoud add links into variable 'nblinks' "

65 "into documentation configuration file.")

66 extension += f"\nnblinks={nblinks}"

67 raise HelpGenException(

68 "Unable to find a replacement for '{0}' format='{1}' in \n{2}\n{3}".format(

69 url, format, snb, extension))

70 return url

72 if nblinks is None:

73 nblinks = {}

74 if format == "rst":

75 def reprst(le):

76 anc, url = le.groups()

77 url = get_url_from_nblinks(nblinks, url, format)

78 if "://" in url:

79 new_url = f"`{anc} <{url}>`_"

80 else:

81 new_url = f":ref:`{anc} <{url}>`"

82 if fLOG:

83 fLOG(" [update_notebook_link]1 add in ",

84 format, ":", new_url)

85 return new_url

86 reg = re.compile("`([^`]+?) <find://([^`<>]+?)>`_")

87 new_text = reg.sub(reprst, text)

88 elif format in ("html", "slides", "slides2"):

89 def rephtml(le):

90 anc, url = le.groups()

91 url = get_url_from_nblinks(nblinks, url, format)

92 new_url = f"<a href=\"{anc}.html\">{url}</a>"

93 if fLOG:

94 fLOG(" [update_notebook_link]2 add in ",

95 format, ":", new_url)

96 return new_url

97 reg = re.compile("<a href=\\\"find://([^\\\"]+?)\\\">([^`<>]+?)</a>")

98 new_text = reg.sub(rephtml, text)

99 elif format in ("ipynb", "python"):

100 def repipy(le):

101 anc, url = le.groups()

102 url = get_url_from_nblinks(nblinks, "find://" + url, format)

103 if not url.startswith("http"):

104 mes = "\n".join(f"{k}: '{v}'"

105 for k, v in sorted(nblinks.items()))

106 extension = "You should add this link into the documentation " \

107 "configuration file in variable 'nblinks'."

108 raise HelpGenException( # pragma: no cover

109 "A reference was not found: '{0}' - '{1}' "

110 "format={2}, nblinks=\n{3}\n{4}".format(

111 anc, url, format, mes, extension))

112 new_url = f"[{anc}]({url})"

113 if fLOG:

114 fLOG(" [update_notebook_link]3 add in ",

115 format, ":", new_url)

116 return new_url

117 reg = re.compile("[\\[]([^[]+?)[\\]][(]find://([^ ]+)[)]")

118 new_text = reg.sub(repipy, text)

119 elif format in ("latex", "elatex"):

120 def replat(le):

121 url, anc = le.groups()

122 url = get_url_from_nblinks(nblinks, url, format)

123 if not url.endswith(".html") and not url.endswith(".js") and not url.endswith(".css"):

124 url += ".html"

125 new_url = f"\\href{{{url}}}{{{anc}}}"

126 if fLOG:

127 fLOG(" [update_notebook_link]4 add in ",

128 format, ":", new_url)

129 return new_url

130 reg = re.compile("\\\\href{find://([^{} ]+?)}{([^{}]+)}")

131 new_text = reg.sub(replat, text)

132 # {\hyperref[\detokenize{c_classes/classes:chap-classe}]

133 # {\sphinxcrossref{\DUrole{std,std-ref}{Classes}}}}

134 else:

135 raise NotImplementedError( # pragma: no cover

136 f"Unsupported format '{format}'\n{text}")

137 return new_text

138

139

140def _notebook_replacements(nbtext, notebook_replacements, fLOG=None):

141 """

142 Makes some replacements in a notebook.

143

144 @param nbtext text to process

145 @param notebook_replacements dictionary of replacements

146 @param fLOG logging function

147 @return text

148 """

149 if notebook_replacements is None:

150 return nbtext

151 for k, v in notebook_replacements:

152 if k in nbtext:

153 fLOG(

154 f"[_notebook_replacements] replace '{k}' -> '{v}'")

155 nbtext = nbtext.replace(k, v)

156 if '"nbformat": 4,' in nbtext: # pragma: no cover

157 rep = ['"nbformat_minor": 0', '"nbformat_minor": 1',

158 '"nbformat_minor": 2']

159 for r in rep:

160 if r in nbtext:

161 nbtext = nbtext.replace(r, '"nbformat_minor": 4')

162 return nbtext

163

164

165def post_process_latex_output(root, doall, latex_book=False, exc=True,

166 custom_latex_processing=None, nblinks=None,

167 remove_unicode=True, fLOG=None, notebook_replacements=None):

168 """

169 Postprocesses the latex file produced by :epkg:`sphinx`.

170

171 @param root root path or latex file to process

172 @param doall do all transformations

173 @param latex_book customized for a book

174 @param exc raises an exception or a warning

175 @param custom_latex_processing function which does some post processing of the full latex file

176 @param nblinks dictionary ``{ ref : url }`` where to look for references

177 @param remove_unicode remove unicode characters (fails with latex)

178 @param notebook_replacements string replacement in notebooks

179 @param fLOG logging function

180 """

181 if os.path.isfile(root):

182 file = root

183 if fLOG:

184 fLOG(f"[post_process_latex_output] clean {file!r}")

185 with open(file, "r", encoding="utf8") as f:

186 content = f.read()

187 with open(file + ".tex1~", "w", encoding="utf8") as f:

188 f.write(content)

189 content = post_process_latex(

190 content, doall, latex_book=latex_book, exc=exc,

191 custom_latex_processing=custom_latex_processing, nblinks=nblinks,

192 file=file, remove_unicode=remove_unicode, fLOG=fLOG,

193 notebook_replacements=notebook_replacements)

194 with open(file, "w", encoding="utf8") as f:

195 f.write(content)

196 else: # pragma: no cover

197 build = os.path.join(root, "_doc", "sphinxdoc", "build", "latex")

198 if not os.path.exists(build):

199 raise FileNotFoundError(build)

200 for tex in os.listdir(build):

201 if tex.endswith(".tex"):

202 file = os.path.join(build, tex)

203 fLOG("[post_process_latex_output] modify file", file)

204 with open(file, "r", encoding="utf8") as f:

205 content = f.read()

206 with open(file + ".tex2~", "w", encoding="utf8") as f:

207 f.write(content)

208 content = post_process_latex(

209 content, doall, info=file, latex_book=latex_book, exc=exc,

210 custom_latex_processing=custom_latex_processing, nblinks=nblinks,

211 file=file, remove_unicode=remove_unicode, fLOG=fLOG,

212 notebook_replacements=notebook_replacements)

213 with open(file, "w", encoding="utf8") as f:

214 f.write(content)

215

216

217def post_process_python_output(root, doall, exc=True, nblinks=None, fLOG=None, notebook_replacements=None):

218 """

219 Postprocesses the python file produced by :epkg:`sphinx`.

220

221 @param root root path or python file to process

222 @param doall unused

223 @param exc raise an exception if needed

224 @param nblinks dictionary ``{ref: url}``

225 @param notebook_replacements string replacement in notebooks

226 @param fLOG logging function

227 """

228 if os.path.isfile(root):

229 file = root

230 if fLOG:

231 fLOG(f"[post_process_python_output] clean {file!r}")

232 with open(file, "r", encoding="utf8") as f:

233 content = f.read()

234 content = post_process_python(

235 content, doall, nblinks=nblinks, file=file, fLOG=fLOG,

236 notebook_replacements=notebook_replacements)

237 with open(file, "w", encoding="utf8") as f:

238 f.write(content)

239 else: # pragma: no cover

240 build = os.path.join(root, "_doc", "sphinxdoc", "build", "latex")

241 if not os.path.exists(build) and exc:

242 raise FileNotFoundError(build)

243 for tex in os.listdir(build):

244 if tex.endswith(".tex"):

245 file = os.path.join(build, tex)

246 fLOG("[post_process_python_output] modify file", file)

247 with open(file, "r", encoding="utf8") as f:

248 content = f.read()

249 content = post_process_python(

250 content, doall, info=file, nblinks=nblinks, file=file, fLOG=fLOG)

251 with open(file, "w", encoding="utf8") as f:

252 f.write(content)

253

254

255def post_process_latex_output_any(file, custom_latex_processing, nblinks=None,

256 remove_unicode=False, fLOG=None, notebook_replacements=None):

257 """

258 Postprocesses the latex file produced by :epkg:`sphinx`.

259

260 @param file latex filename

261 @param custom_latex_processing function which does some post processing of the full latex file

262 @param nblinks dictionary ``{url: link}``

263 @param remove_unicode remove unicode characters

264 @param notebook_replacements string replacement in notebooks

265 @param fLOG logging function

266 """

267 if fLOG:

268 fLOG("[post_process_latex_output_any] ** post_process_latex_output_any ", file)

269 if not os.path.exists(file):

270 raise FileNotFoundError( # pragma: no cover

271 "Unable to find '{}', other files in the same folder\n{}".format(

272 file, "\n".join(os.listdir(os.path.dirname(file)))))

273 with open(file, "r", encoding="utf8") as f:

274 content = f.read()

275 with open(file + f".tex3.u{1 if remove_unicode else 0}~", "w", encoding="utf8") as f:

276 f.write(content)

277 content = post_process_latex(content, True, info=file, nblinks=nblinks, file=file,

278 remove_unicode=remove_unicode, fLOG=fLOG,

279 notebook_replacements=notebook_replacements)

280 with open(file, "w", encoding="utf8") as f:

281 f.write(content)

282

283

284def post_process_rst_output(file, html, pdf, python, slides, is_notebook=False,

285 exc=True, github=False, notebook=None, nblinks=None, fLOG=None,

286 notebook_replacements=None):

287 """

288 Processes a :epkg:`rst` file generated from the conversion of a notebook.

289

290 @param file filename

291 @param pdf if True, add a link to the :epkg:`pdf`,

292 assuming it will exists at the same location

293 @param html if True, add a link to the :epkg:`html` conversion

294 @param python if True, add a link to the :epkg:`Python` conversion

295 @param slides if True, add a link to the slides conversion

296 @param is_notebook does something more if the file is a notebook

297 @param exc raises an exception (True) or a warning (False)

298 @param github add a link to the notebook on :epkg:`github`

299 @param notebook location of the notebook, file might be a copy

300 @param nblinks links added to a notebook, dictionary ``{ref: url}``

301 @param notebook_replacements string replacement in notebooks

302 @param fLOG logging function

303

304 The function adds the following replacement

305 ``st = st.replace("\\\\mathbb{1}", "\\\\mathbf{1\\\\!\\\\!1}")``.

306 and checks that audio is only included in :epkg:`HTML`.

307 """

308 if fLOG:

309 fLOG(f"[post_process_rst_output] clean {file!r}")

310

311 name = os.path.split(file)[1]

312 noext = os.path.splitext(name)[0]

313 with open(file, "r", encoding="utf8") as f:

314 lines = f.readlines()

315 with open(file + "~", "w", encoding="utf8") as f:

316 f.write("".join(lines))

317

318 # Probably not the best way to fix that.

319 # For some reason, nbconvert adds None as the first row.

320 if lines[0] == 'None\n':

321 lines[0] = '\n' # pragma: no cover

322

323 if any(line == 'None\n' for line in lines):

324 raise HelpGenException( # pragma: no cover

325 "One row unexpectedly contains only None in '{}'\n{}".format(

326 file, "".join(lines[:20])))

327

328 # Removes empty lines in inserted code, also adds line number.

329 def startss(line):

330 for b in ["::", ".. parsed-literal::", ".. code:: python",

331 ".. code-block:: python"]:

332 if line.startswith(b):

333 return b

334 return None

335

336 codeb = [".. code:: python", ".. code-block:: python"]

337 inbloc = False

338 for pos, line in enumerate(lines):

339 if not inbloc:

340 b = startss(line)

341 if b is None:

342 pass

343 else:

344 if b in codeb:

345 # we remove line number for the notebooks

346 if "notebook" not in file:

347 lines[pos] = f"{codeb[-1]}\n :linenos:\n\n"

348 else:

349 lines[pos] = f"{codeb[-1]}\n\n"

350 inbloc = True

351 memopos = pos

352 else:

353 if len(line.strip(" \r\n")) == 0 and pos < len(lines) - 1 and \

354 lines[pos + 1].startswith(" ") and len(lines[pos + 1].strip(" \r\n")) > 0:

355 lines[pos] = ""

356

357 elif not line.startswith(" ") and line != "\n":

358 inbloc = False

359

360 if lines[memopos].startswith("::"):

361 code = "".join(

362 (_[4:] if _.startswith(" ") else _) for _ in lines[memopos + 1:pos])

363 if len(code) == 0:

364 fLOG( # pragma: no cover

365 "[post_process_rst_output] EMPTY-SECTION in ", file)

366 else:

367 try:

368 cmp = compile(code, "", "exec")

369 if cmp is not None:

370 lines[memopos] = "{0}\n :linenos:\n".format(

371 ".. code-block:: python")

372 except Exception: # pragma: no cover

373 pass

374

375 memopos = None

376

377 # code and images

378 imgreg = re.compile("[.][.] image:: (.*)")

379 for pos in range(0, len(lines)):

380 # lines[pos] = lines[pos].replace(".. code:: python","::")

381 if lines[pos].strip().startswith(".. image::"):

382 # we assume every image should be placed in the same folder as the

383 # notebook itself

384 img = imgreg.findall(lines[pos])

385 if len(img) == 0:

386 raise HelpGenException( # pragma: no cover

387 f"Unable to extract image name in '{lines[pos]}'")

388 nameimg = img[0]

389 short = nameimg.replace("%5C", "/")

390 short = os.path.split(short)[-1]

391 lines[pos] = lines[pos].replace(nameimg, short)

392

393 # title

394 for pos, line in enumerate(lines):

395 line = line.strip("\n\r")

396 if len(line) > 0 and line == "=" * len(line):

397 # lines[pos] = lines[pos].replace("=", "*")

398 pos2 = pos - 1

399 li = len(lines[pos])

400 while len(lines[pos2]) != li:

401 pos2 -= 1

402 sep = "" if lines[pos2].endswith("\n") else "\n"

403 lines[pos2] = f"{lines[pos]}{sep}{lines[pos2]}"

404 for p in range(pos2 + 1, pos):

405 if lines[p] == "\n": # pragma: no cover

406 lines[p] = ""

407 break

408

409 pos += 1

410 if pos >= len(lines):

411 mes = f"Unable to find a title in notebook '{file}'"

412 if exc:

413 raise HelpGenException(mes) # pragma: no cover

414 warnings.warn(mes, UserWarning)

415

416 # label

417 labelname = name.replace(" ", "").replace("_", "").replace(

418 ":", "").replace(".", "").replace(",", "")

419 label = f"\n.. _{labelname}:\n\n"

420 lines.insert(0, label)

421

422 # links

423 links = [f'**Links:** :download:`notebook <{noext}.ipynb>`']

424 if html:

425 links.append(f':downloadlink:`html <{noext}2html.html>`')

426 if pdf:

427 links.append(f':download:`PDF <{noext}.pdf>`')

428 if python:

429 links.append(f':download:`python <{noext}.py>`')

430 if slides:

431 links.append(f':downloadlink:`slides <{noext}.slides.html>`')

432

433 if github: # pragma: no cover

434 if notebook is None:

435 raise ValueError(

436 f"Cannot add a link on github, notebook is None for file='{file}'")

437 docname = notebook

438 folder = docname

439 git = os.path.join(folder, ".git")

440 while len(folder) > 0 and not os.path.exists(git):

441 folder = os.path.split(folder)[0]

442 git = os.path.join(folder, ".git")

443 if len(folder) > 0:

444 path = docname[len(folder):]

445 tried = []

446 if path.strip('/\\').startswith('build'):

447 # The notebook may be in a build folder but is not

448 # the original notebook. The function does something

449 # if the path starts with `build`.

450 subfolds = os.listdir(folder)

451 for sub in subfolds:

452 fulls = os.path.join(folder, sub)

453 if not os.path.isdir(fulls):

454 continue

455 if not ('_doc' in sub or 'notebook' in sub or 'example' in sub):

456 continue

457 # Search for another version of the file.

458 last_name = os.path.split(docname)[-1]

459 tried.append((last_name, fulls))

460 selected = glob.glob(

461 fulls + "/**/" + last_name, recursive=True)

462 if len(selected) == 1:

463 docname = selected[0]

464 path = docname[len(folder):]

465 break

466 if "blob/master/build" in path or "build/notebooks" in path:

467 # raise RuntimeError( # pragma: no cover

468 warnings.warn( # pragma: no cover

469 "Unexpected substring found in %r in folder %r\n"

470 "--TRIED--\n%r" % (path, folder, "\n".join(map(str, tried))))

471 links.append(

472 ":githublink:`GitHub|{0}|*`".format(path.replace("\\", "/").lstrip("/")))

473 lines[pos] = f"{lines[pos]}\n\n.. only:: html\n\n {', '.join(links)}\n\n"

474

475 # we remove the

476 # <div

477 # style="position:absolute;

478 # ....

479 # </div>

480 reg = re.compile(

481 "([.]{2} raw[:]{2} html[\\n ]+<div[\\n ]+style=.?position:absolute;(.|\\n)*?[.]{2} raw[:]{2} html[\\n ]+</div>)")

482 merged = "".join(lines)

483 r = reg.findall(merged)

484 if len(r) > 0:

485 fLOG("[post_process_rst_output] *** remove div absolute in ", file)

486 for spa in r:

487 rep = spa[0]

488 nbl = len(rep.split("\n"))

489 merged = merged.replace(rep, "\n" * nbl)

490 lines = [(_ + "\n") for _ in merged.split("\n")]

491

492 # bullets

493 for pos, line in enumerate(lines):

494 if pos == 0:

495 continue

496 if len(line) > 0 and (line.startswith("- ") or line.startswith("* ")) \

497 and pos < len(lines) - 1:

498 next = lines[pos + 1]

499 prev = lines[pos - 1]

500 if (next.startswith("- ") or next.startswith("* ")) \

501 and not (prev.startswith("- ") or prev.startswith("* ")) \

502 and not prev.startswith(" "):

503 lines[pos - 1] += "\n"

504 elif line.startswith("- ") and next.startswith(" ") \

505 and not prev.startswith(" ") and not prev.startswith("- "):

506 lines[pos - 1] += "\n"

507 elif line.startswith("- "):

508 pass

509

510 # remove last ::

511 i = len(lines)

512 for i in range(len(lines), 1, -1):

513 s = lines[i - 1].strip(" \n\r")

514 if len(s) != 0 and s != "::":

515 break

516

517 if i < len(lines):

518 del lines[i:]

519

520 # specific treatment for notebooks

521 if is_notebook:

522 # change links <#Alink --> <#alink

523 reg = re.compile("(<#[A-Z][a-zA-Z0-9_+-]+>)")

524 for i, line in enumerate(lines):

525 r = reg.search(line)

526 if r:

527 memo = r.groups()[0]

528 new_memo = "<#" + memo[2].lower() + memo[3:]

529 new_memo = new_memo.replace("+", "")

530 line = line.replace(memo, new_memo)

531 lines[i] = line

532

533 # checking for find://

534 content = "".join(lines)

535 content = update_notebook_link(content, "rst", nblinks=nblinks, fLOG=fLOG)

536 if "find://" in content:

537 raise HelpGenException( # pragma: no cover

538 "find:// was found in '{0}'.\nYou should "

539 "add or extend 'nblinks' in conf.py.".format(file))

540

541 # notebooks replacements

542 content = _notebook_replacements(content, notebook_replacements, fLOG)

543

544 # replaces the function

545 content = content.replace("\\mathbb{1}", "\\mathbf{1\\!\\!1}")

546

547 with open(file, "w", encoding="utf8") as f:

548 f.write(content)

549

550

551def post_process_html_output(file, pdf, python, slides, exc=True,

552 nblinks=None, fLOG=None,

553 notebook_replacements=None):

554 """

555 Processes a HTML file generated from the conversion of a notebook.

556

557 @param file filename

558 @param pdf if True, add a link to the PDF, assuming it will exists

559 at the same location

560 @param python if True, add a link to the Python conversion

561 @param slides if True, add a link to the slides conversion

562 @param exc raises an exception (True) or a warning (False)

563 @param nblinks dictionary ``{ref: url}``

564 @param notebook_replacements string replacement in notebooks

565 @param fLOG logging function

566 """

567 if not os.path.exists(file):

568 raise FileNotFoundError(file) # pragma: no cover

569 if fLOG:

570 fLOG(f"[post_process_html_output] clean {file!r}")

571 with open(file, "r", encoding="utf8") as f:

572 text = f.read()

573

574 # mathjax

575 text = text.replace(

576 "https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML",

577 "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML")

578

579 # notebook replacements

580 if fLOG:

581 fLOG("[post_process_html_output] nb:", notebook_replacements)

582 text = _notebook_replacements(text, notebook_replacements, fLOG)

583

584 text = update_notebook_link(text, "html", nblinks=nblinks, fLOG=fLOG)

585 if "find://" in text:

586 raise HelpGenException( # pragma: no cover

587 "find:// was found in '{0}'.\nYou should add "

588 "or extend 'nblinks' in conf.py.".format(file))

589

590 # js

591 if fLOG:

592 fLOG("[post_process_html_output] js: replacements")

593 repl = {

594 'https://unpkg.com/@jupyter-widgets/html-manager@^0.20.0/dist/embed-amd.js':

595 '../_static/embed-amd.js',

596 '</head>': '<script src="../_static/require.js"></script>\n</head>'}

597 lines = text.split('\n')

598 new_lines = []

599 for line in lines:

600 if "https://cdnjs.cloudflare.com/ajax/libs/require.js" in line:

601 if fLOG:

602 fLOG(f"[post_process_html_output] js: skip {line!r}")

603 continue

604 new_lines.append(line)

605 text = "\n".join(new_lines)

606 for k, v in repl.items():

607 if k in text:

608 if fLOG: # pragma: no cover

609 fLOG(f"[post_process_html_output] js: replace {k!r} -> {v!r}")

610 text = text.replace(k, v)

611

612 with open(file, "w", encoding="utf8") as f:

613 f.write(text)

614

615

616def post_process_slides_output(file, pdf, python, slides, exc=True,

617 nblinks=None, fLOG=None,

618 notebook_replacements=None):

619 """

620 Processes a :epkg:`HTML` file generated from the conversion of a notebook.

621

622 @param file filename

623 @param pdf if True, add a link to the PDF, assuming it will

624 exists at the same location

625 @param python if True, add a link to the Python conversion

626 @param slides if True, add a link to the slides conversion

627 @param exc raises an exception (True) or a warning (False)

628 @param nblinks dictionary ``{ref: url}``

629 @param notebook_replacements string replacement in notebooks

630 @param fLOG logging function

631 """

632 if (len(file) > 5000 or not os.path.exists(file)) and "<html" in file:

633 text = file # pragma: no cover

634 save = False # pragma: no cover

635 else:

636 if not os.path.exists(file):

637 raise FileNotFoundError(file) # pragma: no cover

638 if fLOG:

639 fLOG(f"[post_process_slides_output] clean {file!r}")

640 # fold, name = os.path.split(file)

641 with open(file, "r", encoding="utf8") as f:

642 text = f.read()

643 save = True

644

645 # reveal.js

646 require = "require(" in text

647 text = text.replace("reveal.js/dist/reveal.css",

648 "reveal.js/css/reveal.css")

649 text = text.replace("reveal.js/dist/reveal.js",

650 "reveal.js/js/reveal.js")

651 text = text.replace("reveal.js/dist/theme/simple.css",

652 "reveal.js/css/theme/simple.css")

653 text = text.replace("https://unpkg.com/@jupyter-widgets/html-manager@0.20.0/dist/embed-amd.js",

654 "embed-amd.js")

655 lines = text.split("\n")

656 for i, line in enumerate(lines):

657 if '<script src="reveal.js/lib/js/head.min.js"></script>' in line:

658 lines[i] = (

659 '<script src="reveal.js/js/jquery.min.js"></script>\n' + lines[i])

660 if '<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>' in line:

661 lines[i] = ""

662 if '<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/' in line:

663 lines[i] = ""

664 if lines[i] == "</script>" and require:

665 lines[i] += '\n<script src="require.js"></script>'

666 require = False

667 text = "\n".join(lines)

668

669 # mathjax

670 text = text.replace("https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML",

671 "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML")

672 text = update_notebook_link(text, "slides", nblinks=nblinks, fLOG=fLOG)

673 if "find://" in text:

674 raise HelpGenException( # pragma: no cover

675 "find:// was found in '{0}'.\nYou should add "

676 "or extend 'nblinks' in conf.py.".format(file))

677

678 # notebook replacements

679 text = _notebook_replacements(text, notebook_replacements, fLOG)

680

681 if save:

682 with open(file, "w", encoding="utf8") as f:

683 f.write(text)

684 return text

685

686

687def post_process_latex(st, doall, info=None, latex_book=False, exc=True,

688 custom_latex_processing=None, nblinks=None, file=None,

689 remove_unicode=False, fLOG=None, notebook_replacements=None):

690 """

691 Modifies a :epkg:`latex` file after its generation by :epkg:`sphinx`.

692

693 @param st string

694 @param doall do all transformations

695 @param info for more understandable error messages

696 @param latex_book customized for a book

697 @param exc raises an exception or a warning

698 @param custom_latex_processing function which takes and returns a string,

699 final post processing

700 @param nblinks dictionary ``{ref: url}``

701 @param file only used when an exception is raised

702 @param remove_unicode remove unicode character (fails when converting into PDF)

703 @param notebook_replacements string replacement in notebooks

704 @param fLOG logging function

705 @return string

706

707 *SVG* included in a notebook (or in *RST* file) usually do not word.

708 :epkg:`Inkscape` should be used to convert them into Latex.

709 The function is less strict on the checking of `$`.

710 The function replaces ``\\mathbb{1}`` by ``\\mathbf{1\\!\\!1}``.

711

712 .. index:: chinese characters, latex, unicode

713

714 .. faqref::

715 :title: Why a ¿ is showing the final PDF?

716

717 Unicode, chinese characters are an issue because the latex compiler

718 prompts on those if the necessary packages are not installed.

719 `pdflatex <https://en.wikipedia.org/w/index.php?title=PdfTeX&redirect=no>`_

720 does not accepts inline chinese

721 characters, `xetex <https://en.wikipedia.org/wiki/XeTeX>`_

722 should be used instead:

723 see `How to input Traditional Chinese in pdfLaTeX

724 <http://tex.stackexchange.com/questions/200449/how-to-input-traditional-chinese-in-pdflatex>`_.

725 Until this is being implemented, the unicode will unfortunately be removed

726 in this function.

727 """

728 if fLOG:

729 fLOG("[post_process_latex] ** enter post_process_latex",

730 doall, "%post_process_latex" in st)

731 weird_character = set(chr(i) for i in range(1, 9))

732

733 def clean_unicode(c):

734 if c == "’":

735 return "'"

736 if c == "…":

737 return "..."

738 if ord(c) >= 255 or c in weird_character:

739 return "\\textquestiondown "

740 return c

741

742 def clean_line(line):

743 if line.startswith("\\documentclass"):

744 line = line.replace("{None}", "{report}")

745 return line

746

747 lines = st.split("\n")

748 lines = list(map(clean_line, lines))

749 st = "\n".join("".join(map(clean_unicode, line)) for line in lines)

750

751 # we count the number of times we have \$ (which is unexpected unless the

752 # currency is used.

753 dollar = st.split("\\$")

754 if len(dollar) > 0 and (

755 info is None or os.path.splitext(info)[-1] != ".html"):

756 # it could be an issue, for the time being, we raise

757 # an exception if a formula is too long

758 exp = re.compile(r"(.{200}[\\]\$\$)")

759 found = 0

760 records = []

761 for m in exp.finditer(st):

762 found += 1

763 p1, p2 = m.start(), m.end()

764 sub = st[p1:p2].strip(" \r\n").replace(

765 "\n", " ").replace("\r", "").replace("\t", " ")

766 sub2 = sub[-10:]

767 records.append((info, p1, p2, sub, sub2, ""))

768 if len(records) > 0: # pragma: no cover

769 messages = [str(i) + ":" + ("unexpected \\$ in a latex file:\n {0}\n" +

770 "at position: {1},{2}\n substring: {3}\n " +

771 "around: {4}\n temp=[{5}]").format(*rec)

772 for i, rec in enumerate(records)]

773 for mes in messages:

774 warnings.warn(mes, UserWarning)

775

776 st = st.replace("<br />", "\\\\")

777 st = st.replace("»", '"')

778 st = st.replace("\\mathbb{1}", "\\mathbf{1\\!\\!1}")

779 st = st.replace(

780 "\\documentclass[11pt]{article}", "\\documentclass[10pt]{article}")

781

782 if not doall and not latex_book:

783 st = st.replace(

784 "\\maketitle", "\\maketitle\n\n\\newchapter{Introduction}")

785

786 st = st.replace("%5C", "/") \

787 .replace("%3A", ":") \

788 .replace("\\includegraphics{notebooks\\", "\\includegraphics {")

789 st = st.replace(

790 "\\begin{document}", "\\setlength{\\parindent}{0cm}%s\\begin {document}" % "\n")

791 st = st.replace("DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}",

792 "DefineVerbatimEnvironment{Highlighting}{Verbatim} {commandchars=\\\\\\{\\},fontsize=\\small}")

793 st = st.replace("\\textquotesingle{}", "'")

794 st = st.replace("\u0001", "\\u1")

795 st = st.replace("\\begin{notice}{note}\\end{notice}", "")

796

797 # hyperref

798 if doall and "%post_process_latex" not in st:

799 st = "%post_process_latex\n" + st

800 reg = re.compile("hyperref[\\[]([a-zA-Z0-9]+)[\\]][\\{](.*?)[\\}]")

801 allhyp = reg.findall(st)

802 sections = []

803 for id, section in allhyp:

804 sec = r"\subsection{%s} \label{%s}" % (section, id)

805 sections.append((id, section, sec))

806 elif not doall and not latex_book:

807 sections = []

808 # first section

809 lines = st.split("\n")

810 for i, line in enumerate(lines):

811 if "\\section" in line:

812 lines[i] = "\\newchapter{Documentation}\n" + lines[i]

813 break

814 st = "\n".join(lines)

815 else:

816 sections = []

817

818 if len(sections) > 0:

819 lines = st.split("\n")

820 for i, line in enumerate(lines):

821 for _, section, sec in sections:

822 if line.strip("\r\n ") == section:

823 fLOG(" **", section, " --> ", sec)

824 lines[i] = sec

825 st = "\n".join(lines)

826

827 if not latex_book:

828 st = st.replace("\\chapter", "\\section")

829 st = st.replace("\\newchapter", "\\chapter")

830

831 comment_out = [

832 '\\usepackage{parskip}',

833 '\\usepackage{fontspec}',

834 '\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}\n',

835 ]

836 for co in comment_out:

837 if co in st:

838 st = st.replace(co, "%" + co)

839 if "\\usepackage{multirow}" in st:

840 st = st.replace(

841 "\\usepackage{svg}\\usepackage{multirow}",

842 "\\usepackage{multirow}\\usepackage{amssymb}\\usepackage{latexsym}\\usepackage{amsfonts}\\usepackage{ulem}\\usepackage{textcomp}")

843 elif "\\usepackage{hyperref}" in st:

844 st = st.replace(

845 "\\usepackage{svg}\\usepackage{hyperref}",

846 "\\usepackage{hyperref}\\usepackage{amssymb}\\usepackage{latexsym}\\usepackage{amsfonts}\\usepackage{ulem}\\usepackage{textcomp}")

847 else:

848 raise HelpGenException( # pragma: no cover

849 f"unable to add new instructions usepackage in file {info}")

850

851 # SVG does not work unless it is converted (nbconvert should handle that

852 # case)

853 reg = re.compile("([\\\\]includegraphics[{].*?[.]svg[}])")

854 fall = reg.findall(st)

855 for found in fall:

856 st = st.replace(found, "%" + found)

857

858 # fix references

859 st = update_notebook_link(st, "latex", nblinks=nblinks, fLOG=fLOG)

860 if "find://" in st:

861 raise HelpGenException( # pragma: no cover

862 "find:// was found in '{0}'\nYou should add or extend "

863 "'nblinks' in conf.py.\n{1}".format(file, st))

864

865 # notebook replacements

866 st = _notebook_replacements(st, notebook_replacements, fLOG)

867

868 # end

869 if custom_latex_processing is not None:

870 st = custom_latex_processing(st) # pragma: no cover

871

872 if remove_unicode:

873 encoding = 'ascii'

874 else:

875 encoding = 'utf-8'

876 st0 = st

877 bst = st.encode(encoding, errors='replace')

878 st = bst.decode(encoding, errors='replace')

879 if st0 != st and fLOG:

880 fLOG("[post_process_latex] characters were removed for encoding", encoding)

881 return st

882

883

884def post_process_python(st, doall, info=None, nblinks=None, file=None, fLOG=None, notebook_replacements=None):

885 """

886 Modifies a python file after its generation by :epkg:`sphinx`.

887

888 @param st string

889 @param doall do all transformations

890 @param info for more understandable error messages

891 @param nblinks dictionary ``{ref: url}``

892 @param file used only when an exception is raised

893 @param fLOG logging function

894 @param notebook_replacements string replacement in notebooks

895 @return string

896 """

897 st = st.strip("\n \r\t")

898 st = st.replace("# coding: utf-8", "# -*- coding: utf-8 -*-")

899 st = update_notebook_link(st, "python", nblinks=nblinks, fLOG=fLOG)

900 if "find://" in st:

901 raise HelpGenException( # pragma: no cover

902 "find:// was found in '{0}'.\nYou should add or extend "

903 "'nblinks' in conf.py.".format(file))

904

905 # notebook replacements

906 st = _notebook_replacements(st, notebook_replacements, fLOG)

907

908 return st

909

910

911def remove_character_under32(s):

912 """

913 Removes :epkg:`ASCII` characters in *[0..31]*.

914

915 @param s string to process

916 @return filtered string

917 """

918 ls = ""

919 for c in s:

920 d = ord(c)

921 if 0 <= d < 32:

922 ls += " "

923 else:

924 ls += c

925 return ls

Coverage for pyquickhelper/helpgen/post_process.py: 91%

413 statements