Coverage for src/ensae_teaching_cs/homeblog/py2html.py: 63%

180 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-04-28 06:23 +0200

1""" 

2@file 

3@brief Mark-up Python code file using HTML for syntax highlighting. 

4Syntax highlighting rules are in the spirit of IDLE. 

5 

6Unless the -r 0 option is used it will also format the code by 

7applying some of the PEP8 spacing guidelines to expressions and 

8assignments. 

9 

10For those that want a GUI you can try py2htmTk.pyw - 

11(it's minimal but functional). 

12 

13:: 

14 USAGE in command line mode: 

15 py2html [options] [-i filename]|-I] 

16 

17 OPTIONS: 

18 -h Print this command line summary 

19 --help Print more detailed help on styles and 

20 revision info. 

21 -o filename Output file (default is "py2html.html") 

22 -i filename Source file. See -I. 

23 -p filename HTML page template (must include a %s for inserting 

24 the code). If not specified then a default is used. 

25 -s filename Use a style-file otherwise use built in styles 

26 (see --help for details) 

27 -r 0|1|2 Reformat expressions and definitions. 

28 -r 0 No formatting 

29 -r 1 Format as a = 3+4; b = [1, 2, 3] (default) 

30 -r 2 Format as a = 3 + 4; b = [1 , 2 , 3] 

31 -R Replace newlines with <br>, tabs and multi-spaces 

32 with &nbsp; 

33 -B Just make a block (ignores -p) 

34 -O Print to sys.stdout (ignores -o, no file created) 

35 -I Use stdin as source file (ignore -i option) 

36 -E 0|1|2|3|4 0 - Don't do entity substitution. 

37 1 - Substitute < > and & (default) 

38 2 - Substitute < > & and " 

39 3 - Substitute <> & " and ' 

40 4 - Substitute all non-ASCIIalphanumeric 

41""" 

42 

43 

44import tokenize 

45import os 

46 

47appliedstyle = None 

48 

49help_styles = """ 

50The default styles applied are as follows: 

51 <style type="text/css"> 

52 h1 { color: green; 

53 position: center; 

54 } 

55 .python_code { font-family: monospace; 

56 font-size: 10pt; 

57 } 

58 .py_key {} 

59 .py_num {} 

60 .py_str { color: #00AA00;} 

61 .py_op {} 

62 .py_com { color: red;} 

63 .py_res { color: #FF7700;} 

64 .py_def { color: blue;} 

65 </style> 

66 

67Where: 

68 .python_code is the style applied to the whole block 

69 .py_key is used for words that are not reserved words 

70 .py_num for numeric values 

71 .py_str for strings 

72 .py_op for operators 

73 .py_res for reserved words 

74 .py_com for comments 

75 .py_def used for words that are names in function and class definitions. 

76 

77Optionally you can define a style-file, a text file with the following format: 

78 

79 #User editable style substitutions 

80 # This style-file assumes a css is used. 

81 # Format: 

82 # styleName | start | end 

83 # examples: 

84 # block | <pre> | </pre> 

85 # key | <span class="key">'| </span> 

86 # str | <span style = "color: #00AA00"> | </span> 

87 

88 block | <pre class="python_code" id="pycode"> | </pre> 

89 key | <span class="py_key"> | </span> 

90 num | <span class="py_num"> | </span> 

91 str | <span class="py_str"> | </span> 

92 op | <span class="py_op"> | </span> 

93 com | <span class="py_com"> | </span> 

94 res | <span class="py_res"> | </span> 

95 def | <span class="py_def"> | </span> 

96 brk | <span class="py_brk"> | </span> 

97 #End 

98 

99This example does not use css: 

100 

101 # User editable HTML style substitutions 

102 # This file uses font tags 

103 # Undefined tags will use the default character colour 

104 block | <pre> | </pre> 

105 key | | 

106 num | | 

107 str | <font color="green"> | </font> 

108 op | | 

109 com | <font color="red"> | </font> 

110 res | <font color="orange"> | </font> 

111 def | <font color="blue"> | </font> 

112 brk | | 

113 

114 

115Revisions: 

1160.51 First release. 

1170.6 7 Mar '04 

118 Fixed - now supports \\ character properly 

119 Changes - The styles now use a py_ prefix 

120 Added - Extended and added to formatting options 

121 - External style-file support 

1220.61 Added - support for disabling entity translator 

1230.62 Added - additional entity replacement options 

124""" 

125 

126__author__ = "Paul Hardwick <paul@peck.org.uk>" 

127__date__ = "07 March 2004" 

128__version__ = "0.62" 

129 

130entities = { # 34: '&quot;', 38: '&amp;', 60: '&lt;', 62: '&gt;', 

131 # 160: '&nbsp;, 

132 161: '&iexcl;', 162: '&cent;', 163: '&pound;', 

133 164: '&curren;', 165: '&yen;', 166: '&brvbar;', 

134 167: '&sect;', 168: '&uml;', 169: '&copy;', 

135 170: '&ordf;', 171: '&laquo;', 172: '&not;', 

136 173: '&shy;', 174: '&reg;', 175: '&macr;', 

137 176: '&deg;', 177: '&plusmn;', 178: '&sup2;', 

138 179: '&sup3;', 180: '&acute;', 181: '&micro;', 

139 182: '&para;', 183: '&middot;', 184: '&cedil;', 

140 185: '&sup1;', 186: '&ordm;', 187: '&raquo;', 

141 188: '&frac14;', 189: '&frac12;', 190: '&frac34;', 

142 191: '&iquest;', 192: '&Agrave;', 193: '&Aacute;', 

143 194: '&Acirc;', 195: '&Atilde;', 196: '&Auml;', 

144 197: '&Aring;', 198: '&AElig;', 199: '&Ccedil;', 

145 200: '&Egrave;', 201: '&Eacute;', 202: '&Ecirc;', 

146 203: '&Euml;', 204: '&Igrave;', 205: '&Iacute;', 

147 206: '&Icirc;', 207: '&Iuml;', 208: '&ETH;', 

148 209: '&Ntilde;', 210: '&Ograve;', 211: '&Oacute;', 

149 212: '&Ocirc;', 213: '&Otilde;', 214: '&Ouml;', 

150 215: '&times;', 216: '&Oslash;', 217: '&Ugrave;', 

151 218: '&Uacute;', 219: '&Ucirc;', 220: '&Uuml;', 

152 221: '&Yacute;', 222: '&THORN;', 223: '&szlig;', 

153 224: '&agrave;', 225: '&aacute;', 226: '&acirc;', 

154 227: '&atilde;', 228: '&auml;', 229: '&aring;', 

155 230: '&aelig;', 231: '&ccedil;', 232: '&egrave;', 

156 233: '&eacute;', 234: '&ecirc;', 235: '&euml;', 

157 236: '&igrave;', 237: '&iacute;', 238: '&icirc;', 

158 239: '&iuml;', 240: '&eth;', 241: '&ntilde;', 

159 242: '&ograve;', 243: '&oacute;', 244: '&ocirc;', 

160 245: '&otilde;', 246: '&ouml;', 247: '&divide;', 

161 248: '&oslash;', 249: '&ugrave;', 250: '&uacute;', 

162 251: '&ucirc;', 252: '&uuml;', 253: '&yacute;', 

163 254: '&thorn;', 255: '&yuml;'} 

164BSLASH = chr(92) 

165 

166WORD = 1 

167NUMBER = 2 

168STRING = 3 

169OPERATOR = 50 

170COMMENT = 52 

171RESERVED = "reserved" 

172DEFINING = "defining" 

173BRACKETS = "brackets" 

174BLOCK = "block" 

175 

176skeys = {WORD: "key", NUMBER: "num", STRING: "str", COMMENT: "com", 

177 OPERATOR: "op", RESERVED: "res", DEFINING: "def", BRACKETS: "brk", BLOCK: "block"} 

178 

179####################################### 

180# The styles wrapping each token using css 

181####################################### 

182py_style = {skeys[WORD]: ['<span class="py_key">', "</span>"], # name 

183 skeys[NUMBER]: ['<span class="py_num">', "</span>"], # number 

184 skeys[STRING]: ['<span class="py_str">', "</span>"], # string 

185 skeys[OPERATOR]: ['<span class="py_op">', "</span>"], # operator 

186 skeys[COMMENT]: ['<span class="py_com">', "</span>"], # comment 

187 # reserved word 

188 skeys[RESERVED]: ['<span class="py_res">', "</span>"], 

189 # class and def names 

190 skeys[DEFINING]: ['<span class="py_def">', "</span>"], 

191 # all enclosing ops 

192 skeys[BRACKETS]: ['<span class="py_brk">', "</span>"], 

193 # wraps the block 

194 skeys[BLOCK]: ['<pre class="python_code" id="pycode">', "</pre>"] 

195 } 

196 

197###################################### 

198# The default web page 

199###################################### 

200 

201py_page = """ 

202<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 

203<html> 

204<head> 

205<meta http-equiv="Content-Type" content="text/html; charset=utf-8" > 

206<title>%s</title> 

207<style type="text/css"> 

208 h1 { color: green; 

209 position: center; 

210 } 

211 .python_code { font-family: monospace; 

212 font-size: 10pt; 

213 } 

214 .py_key {color: black;} 

215 .py_num color: black;{} 

216 .py_str { color: #00AA00;} 

217 .py_op {color: black; } 

218 .py_com { color: red;} 

219 .py_res { color: #FF7700;} 

220 .py_def { color: blue;} 

221 .py_brk { color: black;} 

222</style> 

223</head> 

224<body> 

225<h1>#### Source code for %s ####</h1> 

226<hr> 

227%s 

228<hr> 

229[Created with py2html Ver:%s] 

230<p> 

231 <a href="http://validator.w3.org/check/referer"><img border="0" 

232 src="http://www.w3.org/Icons/valid-html401" 

233 alt="Valid HTML 4.01!" height="31" width="88"></a> 

234</p></body> 

235</html>""" 

236###################################### 

237 

238# Some variables used for tracking states 

239command = False 

240prev = 0 

241prevres = False 

242prevtok = "#" # previous token 

243defining = False # check if previous toke was def or class 

244# Are we somewhere between def/class and : (for default assignment formating) 

245 

246 

247definingmode = False 

248 

249# Operator formating 

250 

251 

252spaced_tokens = ["=", "==", ">", "<", ">=", 

253 "<=", "+=", "-=", "<>", "!=", "&&", "||"] 

254monospaced_tokens = [":", ";", ","] 

255binops = ["+", "-", "*", "/", "//", "**", "%", "<<", ">>", "&", "^", "|"] 

256brackets = ["[", "]", "{", "}", "(", ")"] 

257spaced_fmt2 = spaced_tokens + binops 

258 

259# reserved words 

260reserved = kwlist = [ 

261 # --start keywords-- 

262 'and', 'assert', 'break', 

263 'class', 'continue', 'def', 

264 'del', 'elif', 'else', 

265 'except', 'exec', 'finally', 

266 'for', 'from', 'global', 

267 'if', 'import', 'in', 

268 'is', 'lambda', 'not', 

269 'or', 'pass', 'print', 

270 'raise', 'return', 'try', 

271 'while', 'yield', 

272 # others languages 

273 "void", "double", "int", "throw", "template", "catch", 

274 "public", "protected", "float", "unsigned", "__int32", "short", 

275 # --end keywords-- 

276] 

277 

278 

279def substituteEntities(token, level="1", char_set=None): 

280 """ 

281 based on level setting do entity substitution 

282 and return revised. iso8859-1 ??: 

283 - "0" : Don't do entity substitution. 

284 - "1" : Substitute ``< > and & (default)`` 

285 - "2" : Substitute ``< > & and "`` 

286 - "3" : Substitute ``<> & " and '`` 

287 - "4" : Substitute all non-ASCIIalphanumeric char_set not implemented yet 

288 """ 

289 if level == "0": 

290 return token 

291 elif level in "1234": 

292 token = token.replace('&', "&amp;") 

293 token = token.replace('<', "&lt;") 

294 token = token.replace('>', "&gt;") 

295 if level in "234": 

296 token = token.replace('"', "&quot;") 

297 if level in "34": 

298 token = token.replace("'", "&#039;") 

299 

300 if level == "4": 

301 dl = list(token) 

302 for index, char in enumerate(dl): 

303 value = ord(char) 

304 if value > 127: 

305 dl[index] = entities.get(value, "&#%3d;" % (value,)) 

306 token = ''.join(dl) 

307 return token 

308 

309 

310def apply_style(index, token, start, src, format, style, entity="1"): 

311 """Supplied with an index this function applies 

312 the style using the format rules and returns a formatted 

313 verison of the token. 

314 'start' and 'src' are (not used yet) for appliance of 

315 intelligent line breaks for long lines 

316 entity is the replace entities flag""" 

317 global prev, prevres, defining, definingmode, prevtok 

318 # keyword handling 

319 isres = False 

320 if index == WORD: 

321 if token not in reserved: 

322 if defining: 

323 fmt = style[skeys[DEFINING]] 

324 else: 

325 fmt = style.get(skeys.get(index, 99), ['', '']) 

326 defining = False 

327 else: 

328 fmt = style[skeys[RESERVED]] 

329 isres = True 

330 if token in ['def', 'class']: 

331 defining = True 

332 definingmode = True 

333 elif index == OPERATOR and token in brackets: 

334 fmt = style[skeys[BRACKETS]] 

335 else: 

336 fmt = style.get(skeys.get(index, 99), ['', '']) 

337 

338 if (token == ":") and definingmode: 

339 definingmode = False 

340 token = substituteEntities(token, entity, char_set=None) 

341 if format != "0": 

342 if format == "1": 

343 if token in spaced_tokens and not definingmode: 

344 token = f" {token} " 

345 elif token in monospaced_tokens: 

346 token = f"{token} " 

347 elif (isres or index == COMMENT) and prev in [WORD, NUMBER, STRING, OPERATOR]: 

348 token = " " + token 

349 elif index in [WORD, NUMBER, STRING] and prev == WORD: 

350 token = " " + token 

351 elif token in ["[", "("] and prevres: 

352 token = " " + token 

353 elif format == "2": 

354 if token in spaced_fmt2: 

355 if (prev == OPERATOR and token in "+-~"): 

356 token = f" {token}" 

357 else: 

358 token = f" {token} " 

359 elif token in monospaced_tokens: 

360 token = f"{token} " 

361 elif isres and prev in [WORD, NUMBER, STRING, OPERATOR]: 

362 token = " " + token 

363 elif index in [WORD, NUMBER, STRING] and prev == WORD: 

364 token = " " + token 

365 elif token in ["[", "("] and prevres: 

366 token = " " + token 

367 

368 text = fmt[0] + token + fmt[1] 

369 prev = index 

370 prevres = isres 

371 prevtok = token 

372 return text 

373 

374 

375def readStyleFile(filename): 

376 """Read a style file and return a style dictionary. 

377 The file format is:: 

378 

379 #User editable style substitutions 

380 # This style-file assumes a css is used. 

381 # Format: 

382 # styleName | start | end 

383 # examples: 

384 # block | <pre> | </pre> 

385 # key | <span class="key">'| </span> 

386 # str | <span style = "color: #00AA00"> | </span> 

387 

388 block | <pre class="python_code" id="pycode"> | </pre> 

389 key | <span class="py_key"> | </span> 

390 num | <span class="py_num"> | </span> 

391 str | <span class="py_str"> | </span> 

392 op | <span class="py_op"> | </span> 

393 com | <span class="py_com"> | </span> 

394 res | <span class="py_res"> | </span> 

395 def | <span class="py_def"> | </span> 

396 brk | <span class="py_brk"> | </span> 

397 """ 

398 global appliedstyle 

399 if not filename: 

400 appliedstyle = py_style 

401 return py_style 

402 else: 

403 try: 

404 with open(filename, 'r') as ff: 

405 lines = ff.readlines() 

406 appliedstyle = {} 

407 for line in lines: 

408 line = line.strip() 

409 if line and line[0] != '#': 

410 parts = line.split('|') 

411 if len(parts) == 3 and len(parts[0].strip()): 

412 appliedstyle[parts[0].strip()] = [ 

413 parts[1].strip(), parts[2].strip()] 

414 else: 

415 print("Error in style file:\n", line) 

416 sys.exit(1) 

417 return appliedstyle 

418 except Exception: # pragma: no cover 

419 appliedstyle = py_style 

420 return py_style 

421 

422 

423def replaceCodes(text=""): 

424 """ 

425 Helper function that does the ``\\n`` and space substition 

426 returning the changed text. 

427 """ 

428 text = text.replace('\n', '<br>') 

429 text = text.replace('\t', ' ' * 4) 

430 text = text.replace(" " * 4, "&nbsp; &nbsp; ") 

431 text = text.replace(" " * 3, "&nbsp; &nbsp;") 

432 text = text.replace(" " * 2, "&nbsp; ") 

433 text = text.replace("<br><br>", "<br> <br>") 

434 return text 

435 

436 

437def file2HTML(file_name, format, style, Replace, entity="1", encoding="utf-8"): 

438 """ 

439 Reads a file and returns the contents as a string, 

440 highlighted with :epkg:`HTML` styles. This function uses the 

441 output of the tokenize module to decide what to colour. 

442 It calls @see fn apply_style with the token index. 

443 

444 - If format == '0' then the code will display as the author expected it to. 

445 - If format == '1' then spaces are added and removed around expressions to standardise the format. 

446 - If format == '2' as '1' but different rules 

447 - If style == style dictionary. Replace (boolean) 

448 - If True then replace ``\\n`` with ``<br>``, multiple spaces with ``&nbsp<space>combinations;`` 

449 """ 

450 removeFile = None 

451 if file_name == "<stdin>": 

452 file_name = "temp_stdin.py2html.tmp" 

453 lines = sys.stdin.readlines() 

454 with open(file_name, "w", encoding=encoding) as f: 

455 f.writelines(lines) 

456 removeFile = file_name 

457 elif len(file_name) < 1000 and os.path.exists(file_name): 

458 try: 

459 # , encoding="utf8").readlines() #copy all lines into lines list 

460 with open(file_name, 'r', encoding=encoding) as f: 

461 lines = f.readlines() 

462 except UnicodeDecodeError as e: # pragma: no cover 

463 print("issue with file ", file_name) 

464 raise e 

465 else: 

466 lines = file_name.split("\n") 

467 file_name = "temp_py2html.tmp" 

468 with open(file_name, "w", encoding=encoding) as f: 

469 f.writelines("\n".join(lines)) 

470 removeFile = file_name 

471 

472 lines = ['', ] + lines 

473 tempPointer = open(file_name, 'r', encoding=encoding) 

474 read_line = tempPointer.readline # , encoding="utf8").readline 

475 # use tokenize to interate through tokens 

476 tok = tokenize.generate_tokens(read_line) 

477 

478 page = [] 

479 old_line = 1 

480 old_column = 0 

481 try: 

482 for tupe in tok: 

483 # first collect packing beween previous token and this one 

484 if old_line == tupe[2][0]: # Another token on same line as last token 

485 if tupe[2][1] and (old_column < tupe[2][1]): 

486 # handle when no reformat and when it is the first line 

487 # processed 

488 if (format == "0") or (prev == 0): 

489 txt = lines[old_line][old_column:tupe[2][1]] 

490 txt = txt.replace(BSLASH, BSLASH + '\n') 

491 page.append(txt) 

492 else: 

493 # collect remains of old line 

494 subpage = lines[old_line][old_column:-1] 

495 subpage = subpage.replace(BSLASH, BSLASH + '\n') 

496 page.append(subpage) 

497 old_line += 1 

498 # now collect all the lines between last and current 

499 while old_line != tupe[2][0]: 

500 txt = lines[old_line].replace(BSLASH, BSLASH + '\n') 

501 page.append(txt) 

502 old_line += 1 

503 # now get begining of line upto current column 

504 txt = lines[old_line][0:tupe[2][1]].replace( 

505 BSLASH, BSLASH + '\n') 

506 page.append(txt) 

507 # now add formatted token 

508 page.append(apply_style(tupe[0], tupe[1], tupe[ 

509 2], tupe[4], format, style, entity)) 

510 

511 # now update pointers 

512 old_line = tupe[3][0] 

513 old_column = tupe[3][1] 

514 except tokenize.TokenError: # pragma: no cover 

515 return "File cannot be tokenized by tokenize" 

516 except IndexError: # pragma: no cover 

517 pass 

518 text = ''.join(page) 

519 if Replace: 

520 text = replaceCodes(text) 

521 tempPointer.close() 

522 

523 if removeFile is not None and os.path.exists(removeFile): 

524 os.remove(removeFile) 

525 

526 return text 

527 

528 

529def makeBlock(data): 

530 """Applies the block tags to text 

531 """ 

532 global appliedstyle # pylint: disable=W0602 

533 return f"{appliedstyle['block'][0]}{data}{appliedstyle['block'][1]}" 

534 

535 

536def cmdLine(): # pragma: no cover 

537 '''This is the function that handles 

538 command line mode''' 

539 global appliedstyle 

540 help_ = ' PY2HTML - convert python code to HTML (version %s)\n\n%s' % ( 

541 __version__, __doc__) 

542 

543 try: 

544 opts, _ = getopt.getopt(sys.argv[1:], "ho:i:p:s:r:RBOIE:", 

545 ["help", "output", "page", "input", "style", "reformat", 

546 "Replace", "Block", "STDOUT", "STDIN", "entities"]) 

547 except getopt.GetoptError: 

548 # print help information and exit: 

549 print(help_) 

550 sys.exit(2) 

551 outfile = "py2html.html" 

552 page = None 

553 infile = "" 

554 reformat = "1" 

555 justdiv = False 

556 stylefile = None 

557 emit = False 

558 Replace = False 

559 stdin = False 

560 entity = "1" 

561 for o, a in opts: 

562 if o == "-h": 

563 print(help_) 

564 sys.exit() 

565 elif o == "--help": 

566 print(help_styles) 

567 sys.exit() 

568 elif o in ("-o", "--output"): 

569 outfile = a 

570 elif o in ("-i", "--input"): 

571 infile = a 

572 elif o in ("-p", "--page"): 

573 page = a 

574 elif o in ("-s", "--style"): 

575 stylefile = a 

576 elif o in ("-r", "--reformat") and a in "012": 

577 reformat = str(a) 

578 elif o in ("-B", "--Block"): 

579 justdiv = True 

580 elif o in ("-O", "--STDOUT"): 

581 emit = True 

582 elif o in ("-R", "--Replace"): 

583 Replace = True 

584 elif o in ("-I", "--STDIN"): 

585 stdin = True 

586 elif o in ("-E", "--entities") and a in "01234": 

587 entity = str(a) 

588 else: 

589 print(help_) 

590 sys.exit(1) 

591 

592 if infile == "" and not stdin: 

593 print(help_) 

594 sys.exit() 

595 elif stdin: 

596 infile = "<stdin>" 

597 

598 appliedstyle = readStyleFile(stylefile) 

599 data = file2HTML(infile, reformat, appliedstyle, Replace, entity) 

600 block = makeBlock(data) 

601 if infile == "<stdin>": 

602 infile = "stdin" 

603 if justdiv: 

604 html = block 

605 elif page: 

606 html = open(page, "r").read() # , encoding="utf8").read() 

607 html = html % (block,) 

608 else: 

609 html = py_page % (infile, infile, block, __version__) 

610 if not emit: 

611 f = open(outfile, "w") # , encoding="utf8") 

612 f.write(html) 

613 f.close() 

614 else: 

615 print(html) 

616 

617 

618if __name__ == "__main__": # pragma: no cover 

619 import getopt 

620 import sys 

621 cmdLine()