Coverage for src/ensae_teaching_cs/homeblog/py2html.py: 63%
180 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Mark-up Python code file using HTML for syntax highlighting.
4Syntax highlighting rules are in the spirit of IDLE.
6Unless the -r 0 option is used it will also format the code by
7applying some of the PEP8 spacing guidelines to expressions and
8assignments.
10For those that want a GUI you can try py2htmTk.pyw -
11(it's minimal but functional).
13::
14 USAGE in command line mode:
15 py2html [options] [-i filename]|-I]
17 OPTIONS:
18 -h Print this command line summary
19 --help Print more detailed help on styles and
20 revision info.
21 -o filename Output file (default is "py2html.html")
22 -i filename Source file. See -I.
23 -p filename HTML page template (must include a %s for inserting
24 the code). If not specified then a default is used.
25 -s filename Use a style-file otherwise use built in styles
26 (see --help for details)
27 -r 0|1|2 Reformat expressions and definitions.
28 -r 0 No formatting
29 -r 1 Format as a = 3+4; b = [1, 2, 3] (default)
30 -r 2 Format as a = 3 + 4; b = [1 , 2 , 3]
31 -R Replace newlines with <br>, tabs and multi-spaces
32 with
33 -B Just make a block (ignores -p)
34 -O Print to sys.stdout (ignores -o, no file created)
35 -I Use stdin as source file (ignore -i option)
36 -E 0|1|2|3|4 0 - Don't do entity substitution.
37 1 - Substitute < > and & (default)
38 2 - Substitute < > & and "
39 3 - Substitute <> & " and '
40 4 - Substitute all non-ASCIIalphanumeric
41"""
44import tokenize
45import os
47appliedstyle = None
49help_styles = """
50The default styles applied are as follows:
51 <style type="text/css">
52 h1 { color: green;
53 position: center;
54 }
55 .python_code { font-family: monospace;
56 font-size: 10pt;
57 }
58 .py_key {}
59 .py_num {}
60 .py_str { color: #00AA00;}
61 .py_op {}
62 .py_com { color: red;}
63 .py_res { color: #FF7700;}
64 .py_def { color: blue;}
65 </style>
67Where:
68 .python_code is the style applied to the whole block
69 .py_key is used for words that are not reserved words
70 .py_num for numeric values
71 .py_str for strings
72 .py_op for operators
73 .py_res for reserved words
74 .py_com for comments
75 .py_def used for words that are names in function and class definitions.
77Optionally you can define a style-file, a text file with the following format:
79 #User editable style substitutions
80 # This style-file assumes a css is used.
81 # Format:
82 # styleName | start | end
83 # examples:
84 # block | <pre> | </pre>
85 # key | <span class="key">'| </span>
86 # str | <span style = "color: #00AA00"> | </span>
88 block | <pre class="python_code" id="pycode"> | </pre>
89 key | <span class="py_key"> | </span>
90 num | <span class="py_num"> | </span>
91 str | <span class="py_str"> | </span>
92 op | <span class="py_op"> | </span>
93 com | <span class="py_com"> | </span>
94 res | <span class="py_res"> | </span>
95 def | <span class="py_def"> | </span>
96 brk | <span class="py_brk"> | </span>
97 #End
99This example does not use css:
101 # User editable HTML style substitutions
102 # This file uses font tags
103 # Undefined tags will use the default character colour
104 block | <pre> | </pre>
105 key | |
106 num | |
107 str | <font color="green"> | </font>
108 op | |
109 com | <font color="red"> | </font>
110 res | <font color="orange"> | </font>
111 def | <font color="blue"> | </font>
112 brk | |
115Revisions:
1160.51 First release.
1170.6 7 Mar '04
118 Fixed - now supports \\ character properly
119 Changes - The styles now use a py_ prefix
120 Added - Extended and added to formatting options
121 - External style-file support
1220.61 Added - support for disabling entity translator
1230.62 Added - additional entity replacement options
124"""
126__author__ = "Paul Hardwick <paul@peck.org.uk>"
127__date__ = "07 March 2004"
128__version__ = "0.62"
130entities = { # 34: '"', 38: '&', 60: '<', 62: '>',
131 # 160: ' ,
132 161: '¡', 162: '¢', 163: '£',
133 164: '¤', 165: '¥', 166: '¦',
134 167: '§', 168: '¨', 169: '©',
135 170: 'ª', 171: '«', 172: '¬',
136 173: '­', 174: '®', 175: '¯',
137 176: '°', 177: '±', 178: '²',
138 179: '³', 180: '´', 181: 'µ',
139 182: '¶', 183: '·', 184: '¸',
140 185: '¹', 186: 'º', 187: '»',
141 188: '¼', 189: '½', 190: '¾',
142 191: '¿', 192: 'À', 193: 'Á',
143 194: 'Â', 195: 'Ã', 196: 'Ä',
144 197: 'Å', 198: 'Æ', 199: 'Ç',
145 200: 'È', 201: 'É', 202: 'Ê',
146 203: 'Ë', 204: 'Ì', 205: 'Í',
147 206: 'Î', 207: 'Ï', 208: 'Ð',
148 209: 'Ñ', 210: 'Ò', 211: 'Ó',
149 212: 'Ô', 213: 'Õ', 214: 'Ö',
150 215: '×', 216: 'Ø', 217: 'Ù',
151 218: 'Ú', 219: 'Û', 220: 'Ü',
152 221: 'Ý', 222: 'Þ', 223: 'ß',
153 224: 'à', 225: 'á', 226: 'â',
154 227: 'ã', 228: 'ä', 229: 'å',
155 230: 'æ', 231: 'ç', 232: 'è',
156 233: 'é', 234: 'ê', 235: 'ë',
157 236: 'ì', 237: 'í', 238: 'î',
158 239: 'ï', 240: 'ð', 241: 'ñ',
159 242: 'ò', 243: 'ó', 244: 'ô',
160 245: 'õ', 246: 'ö', 247: '÷',
161 248: 'ø', 249: 'ù', 250: 'ú',
162 251: 'û', 252: 'ü', 253: 'ý',
163 254: 'þ', 255: 'ÿ'}
164BSLASH = chr(92)
166WORD = 1
167NUMBER = 2
168STRING = 3
169OPERATOR = 50
170COMMENT = 52
171RESERVED = "reserved"
172DEFINING = "defining"
173BRACKETS = "brackets"
174BLOCK = "block"
176skeys = {WORD: "key", NUMBER: "num", STRING: "str", COMMENT: "com",
177 OPERATOR: "op", RESERVED: "res", DEFINING: "def", BRACKETS: "brk", BLOCK: "block"}
179#######################################
180# The styles wrapping each token using css
181#######################################
182py_style = {skeys[WORD]: ['<span class="py_key">', "</span>"], # name
183 skeys[NUMBER]: ['<span class="py_num">', "</span>"], # number
184 skeys[STRING]: ['<span class="py_str">', "</span>"], # string
185 skeys[OPERATOR]: ['<span class="py_op">', "</span>"], # operator
186 skeys[COMMENT]: ['<span class="py_com">', "</span>"], # comment
187 # reserved word
188 skeys[RESERVED]: ['<span class="py_res">', "</span>"],
189 # class and def names
190 skeys[DEFINING]: ['<span class="py_def">', "</span>"],
191 # all enclosing ops
192 skeys[BRACKETS]: ['<span class="py_brk">', "</span>"],
193 # wraps the block
194 skeys[BLOCK]: ['<pre class="python_code" id="pycode">', "</pre>"]
195 }
197######################################
198# The default web page
199######################################
201py_page = """
202<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
203<html>
204<head>
205<meta http-equiv="Content-Type" content="text/html; charset=utf-8" >
206<title>%s</title>
207<style type="text/css">
208 h1 { color: green;
209 position: center;
210 }
211 .python_code { font-family: monospace;
212 font-size: 10pt;
213 }
214 .py_key {color: black;}
215 .py_num color: black;{}
216 .py_str { color: #00AA00;}
217 .py_op {color: black; }
218 .py_com { color: red;}
219 .py_res { color: #FF7700;}
220 .py_def { color: blue;}
221 .py_brk { color: black;}
222</style>
223</head>
224<body>
225<h1>#### Source code for %s ####</h1>
226<hr>
227%s
228<hr>
229[Created with py2html Ver:%s]
230<p>
231 <a href="http://validator.w3.org/check/referer"><img border="0"
232 src="http://www.w3.org/Icons/valid-html401"
233 alt="Valid HTML 4.01!" height="31" width="88"></a>
234</p></body>
235</html>"""
236######################################
238# Some variables used for tracking states
239command = False
240prev = 0
241prevres = False
242prevtok = "#" # previous token
243defining = False # check if previous toke was def or class
244# Are we somewhere between def/class and : (for default assignment formating)
247definingmode = False
249# Operator formating
252spaced_tokens = ["=", "==", ">", "<", ">=",
253 "<=", "+=", "-=", "<>", "!=", "&&", "||"]
254monospaced_tokens = [":", ";", ","]
255binops = ["+", "-", "*", "/", "//", "**", "%", "<<", ">>", "&", "^", "|"]
256brackets = ["[", "]", "{", "}", "(", ")"]
257spaced_fmt2 = spaced_tokens + binops
259# reserved words
260reserved = kwlist = [
261 # --start keywords--
262 'and', 'assert', 'break',
263 'class', 'continue', 'def',
264 'del', 'elif', 'else',
265 'except', 'exec', 'finally',
266 'for', 'from', 'global',
267 'if', 'import', 'in',
268 'is', 'lambda', 'not',
269 'or', 'pass', 'print',
270 'raise', 'return', 'try',
271 'while', 'yield',
272 # others languages
273 "void", "double", "int", "throw", "template", "catch",
274 "public", "protected", "float", "unsigned", "__int32", "short",
275 # --end keywords--
276]
279def substituteEntities(token, level="1", char_set=None):
280 """
281 based on level setting do entity substitution
282 and return revised. iso8859-1 ??:
283 - "0" : Don't do entity substitution.
284 - "1" : Substitute ``< > and & (default)``
285 - "2" : Substitute ``< > & and "``
286 - "3" : Substitute ``<> & " and '``
287 - "4" : Substitute all non-ASCIIalphanumeric char_set not implemented yet
288 """
289 if level == "0":
290 return token
291 elif level in "1234":
292 token = token.replace('&', "&")
293 token = token.replace('<', "<")
294 token = token.replace('>', ">")
295 if level in "234":
296 token = token.replace('"', """)
297 if level in "34":
298 token = token.replace("'", "'")
300 if level == "4":
301 dl = list(token)
302 for index, char in enumerate(dl):
303 value = ord(char)
304 if value > 127:
305 dl[index] = entities.get(value, "&#%3d;" % (value,))
306 token = ''.join(dl)
307 return token
310def apply_style(index, token, start, src, format, style, entity="1"):
311 """Supplied with an index this function applies
312 the style using the format rules and returns a formatted
313 verison of the token.
314 'start' and 'src' are (not used yet) for appliance of
315 intelligent line breaks for long lines
316 entity is the replace entities flag"""
317 global prev, prevres, defining, definingmode, prevtok
318 # keyword handling
319 isres = False
320 if index == WORD:
321 if token not in reserved:
322 if defining:
323 fmt = style[skeys[DEFINING]]
324 else:
325 fmt = style.get(skeys.get(index, 99), ['', ''])
326 defining = False
327 else:
328 fmt = style[skeys[RESERVED]]
329 isres = True
330 if token in ['def', 'class']:
331 defining = True
332 definingmode = True
333 elif index == OPERATOR and token in brackets:
334 fmt = style[skeys[BRACKETS]]
335 else:
336 fmt = style.get(skeys.get(index, 99), ['', ''])
338 if (token == ":") and definingmode:
339 definingmode = False
340 token = substituteEntities(token, entity, char_set=None)
341 if format != "0":
342 if format == "1":
343 if token in spaced_tokens and not definingmode:
344 token = f" {token} "
345 elif token in monospaced_tokens:
346 token = f"{token} "
347 elif (isres or index == COMMENT) and prev in [WORD, NUMBER, STRING, OPERATOR]:
348 token = " " + token
349 elif index in [WORD, NUMBER, STRING] and prev == WORD:
350 token = " " + token
351 elif token in ["[", "("] and prevres:
352 token = " " + token
353 elif format == "2":
354 if token in spaced_fmt2:
355 if (prev == OPERATOR and token in "+-~"):
356 token = f" {token}"
357 else:
358 token = f" {token} "
359 elif token in monospaced_tokens:
360 token = f"{token} "
361 elif isres and prev in [WORD, NUMBER, STRING, OPERATOR]:
362 token = " " + token
363 elif index in [WORD, NUMBER, STRING] and prev == WORD:
364 token = " " + token
365 elif token in ["[", "("] and prevres:
366 token = " " + token
368 text = fmt[0] + token + fmt[1]
369 prev = index
370 prevres = isres
371 prevtok = token
372 return text
375def readStyleFile(filename):
376 """Read a style file and return a style dictionary.
377 The file format is::
379 #User editable style substitutions
380 # This style-file assumes a css is used.
381 # Format:
382 # styleName | start | end
383 # examples:
384 # block | <pre> | </pre>
385 # key | <span class="key">'| </span>
386 # str | <span style = "color: #00AA00"> | </span>
388 block | <pre class="python_code" id="pycode"> | </pre>
389 key | <span class="py_key"> | </span>
390 num | <span class="py_num"> | </span>
391 str | <span class="py_str"> | </span>
392 op | <span class="py_op"> | </span>
393 com | <span class="py_com"> | </span>
394 res | <span class="py_res"> | </span>
395 def | <span class="py_def"> | </span>
396 brk | <span class="py_brk"> | </span>
397 """
398 global appliedstyle
399 if not filename:
400 appliedstyle = py_style
401 return py_style
402 else:
403 try:
404 with open(filename, 'r') as ff:
405 lines = ff.readlines()
406 appliedstyle = {}
407 for line in lines:
408 line = line.strip()
409 if line and line[0] != '#':
410 parts = line.split('|')
411 if len(parts) == 3 and len(parts[0].strip()):
412 appliedstyle[parts[0].strip()] = [
413 parts[1].strip(), parts[2].strip()]
414 else:
415 print("Error in style file:\n", line)
416 sys.exit(1)
417 return appliedstyle
418 except Exception: # pragma: no cover
419 appliedstyle = py_style
420 return py_style
423def replaceCodes(text=""):
424 """
425 Helper function that does the ``\\n`` and space substition
426 returning the changed text.
427 """
428 text = text.replace('\n', '<br>')
429 text = text.replace('\t', ' ' * 4)
430 text = text.replace(" " * 4, " ")
431 text = text.replace(" " * 3, " ")
432 text = text.replace(" " * 2, " ")
433 text = text.replace("<br><br>", "<br> <br>")
434 return text
437def file2HTML(file_name, format, style, Replace, entity="1", encoding="utf-8"):
438 """
439 Reads a file and returns the contents as a string,
440 highlighted with :epkg:`HTML` styles. This function uses the
441 output of the tokenize module to decide what to colour.
442 It calls @see fn apply_style with the token index.
444 - If format == '0' then the code will display as the author expected it to.
445 - If format == '1' then spaces are added and removed around expressions to standardise the format.
446 - If format == '2' as '1' but different rules
447 - If style == style dictionary. Replace (boolean)
448 - If True then replace ``\\n`` with ``<br>``, multiple spaces with `` <space>combinations;``
449 """
450 removeFile = None
451 if file_name == "<stdin>":
452 file_name = "temp_stdin.py2html.tmp"
453 lines = sys.stdin.readlines()
454 with open(file_name, "w", encoding=encoding) as f:
455 f.writelines(lines)
456 removeFile = file_name
457 elif len(file_name) < 1000 and os.path.exists(file_name):
458 try:
459 # , encoding="utf8").readlines() #copy all lines into lines list
460 with open(file_name, 'r', encoding=encoding) as f:
461 lines = f.readlines()
462 except UnicodeDecodeError as e: # pragma: no cover
463 print("issue with file ", file_name)
464 raise e
465 else:
466 lines = file_name.split("\n")
467 file_name = "temp_py2html.tmp"
468 with open(file_name, "w", encoding=encoding) as f:
469 f.writelines("\n".join(lines))
470 removeFile = file_name
472 lines = ['', ] + lines
473 tempPointer = open(file_name, 'r', encoding=encoding)
474 read_line = tempPointer.readline # , encoding="utf8").readline
475 # use tokenize to interate through tokens
476 tok = tokenize.generate_tokens(read_line)
478 page = []
479 old_line = 1
480 old_column = 0
481 try:
482 for tupe in tok:
483 # first collect packing beween previous token and this one
484 if old_line == tupe[2][0]: # Another token on same line as last token
485 if tupe[2][1] and (old_column < tupe[2][1]):
486 # handle when no reformat and when it is the first line
487 # processed
488 if (format == "0") or (prev == 0):
489 txt = lines[old_line][old_column:tupe[2][1]]
490 txt = txt.replace(BSLASH, BSLASH + '\n')
491 page.append(txt)
492 else:
493 # collect remains of old line
494 subpage = lines[old_line][old_column:-1]
495 subpage = subpage.replace(BSLASH, BSLASH + '\n')
496 page.append(subpage)
497 old_line += 1
498 # now collect all the lines between last and current
499 while old_line != tupe[2][0]:
500 txt = lines[old_line].replace(BSLASH, BSLASH + '\n')
501 page.append(txt)
502 old_line += 1
503 # now get begining of line upto current column
504 txt = lines[old_line][0:tupe[2][1]].replace(
505 BSLASH, BSLASH + '\n')
506 page.append(txt)
507 # now add formatted token
508 page.append(apply_style(tupe[0], tupe[1], tupe[
509 2], tupe[4], format, style, entity))
511 # now update pointers
512 old_line = tupe[3][0]
513 old_column = tupe[3][1]
514 except tokenize.TokenError: # pragma: no cover
515 return "File cannot be tokenized by tokenize"
516 except IndexError: # pragma: no cover
517 pass
518 text = ''.join(page)
519 if Replace:
520 text = replaceCodes(text)
521 tempPointer.close()
523 if removeFile is not None and os.path.exists(removeFile):
524 os.remove(removeFile)
526 return text
529def makeBlock(data):
530 """Applies the block tags to text
531 """
532 global appliedstyle # pylint: disable=W0602
533 return f"{appliedstyle['block'][0]}{data}{appliedstyle['block'][1]}"
536def cmdLine(): # pragma: no cover
537 '''This is the function that handles
538 command line mode'''
539 global appliedstyle
540 help_ = ' PY2HTML - convert python code to HTML (version %s)\n\n%s' % (
541 __version__, __doc__)
543 try:
544 opts, _ = getopt.getopt(sys.argv[1:], "ho:i:p:s:r:RBOIE:",
545 ["help", "output", "page", "input", "style", "reformat",
546 "Replace", "Block", "STDOUT", "STDIN", "entities"])
547 except getopt.GetoptError:
548 # print help information and exit:
549 print(help_)
550 sys.exit(2)
551 outfile = "py2html.html"
552 page = None
553 infile = ""
554 reformat = "1"
555 justdiv = False
556 stylefile = None
557 emit = False
558 Replace = False
559 stdin = False
560 entity = "1"
561 for o, a in opts:
562 if o == "-h":
563 print(help_)
564 sys.exit()
565 elif o == "--help":
566 print(help_styles)
567 sys.exit()
568 elif o in ("-o", "--output"):
569 outfile = a
570 elif o in ("-i", "--input"):
571 infile = a
572 elif o in ("-p", "--page"):
573 page = a
574 elif o in ("-s", "--style"):
575 stylefile = a
576 elif o in ("-r", "--reformat") and a in "012":
577 reformat = str(a)
578 elif o in ("-B", "--Block"):
579 justdiv = True
580 elif o in ("-O", "--STDOUT"):
581 emit = True
582 elif o in ("-R", "--Replace"):
583 Replace = True
584 elif o in ("-I", "--STDIN"):
585 stdin = True
586 elif o in ("-E", "--entities") and a in "01234":
587 entity = str(a)
588 else:
589 print(help_)
590 sys.exit(1)
592 if infile == "" and not stdin:
593 print(help_)
594 sys.exit()
595 elif stdin:
596 infile = "<stdin>"
598 appliedstyle = readStyleFile(stylefile)
599 data = file2HTML(infile, reformat, appliedstyle, Replace, entity)
600 block = makeBlock(data)
601 if infile == "<stdin>":
602 infile = "stdin"
603 if justdiv:
604 html = block
605 elif page:
606 html = open(page, "r").read() # , encoding="utf8").read()
607 html = html % (block,)
608 else:
609 html = py_page % (infile, infile, block, __version__)
610 if not emit:
611 f = open(outfile, "w") # , encoding="utf8")
612 f.write(html)
613 f.close()
614 else:
615 print(html)
618if __name__ == "__main__": # pragma: no cover
619 import getopt
620 import sys
621 cmdLine()