Coverage for src/pyrsslocal/simple_server/html_script_parser.py: 62%

93 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2023-02-02 02:59 +0100

1""" 

2@file 

3@brief This modules contains a class which implements a simple server. 

4""" 

5 

6import sys 

7from html.parser import HTMLParser 

8from html import escape 

9from io import StringIO 

10from ..helper.python_run import run_python_script 

11 

12 

13class HTMLScriptParser(HTMLParser): 

14 

15 """ 

16 Defines a :epkg:`HTML` parser. 

17 the purpose is to intercept section such as 

18 the following and to run it. 

19 

20 :: 

21 

22 <script type="text/python"> 

23 from pandas import DataFrame 

24 from pyquickhelper.pandashelper.tblformat import df2html 

25 pars = [ { "key":k, "value":v } for k,v in params ] 

26 tbl = DataFrame (pars) 

27 print ( df2html(tbl,class_table="myclasstable") ) 

28 </script> 

29 """ 

30 

31 def __init__(self, outStream=sys.stdout, 

32 context=None, catch_exception=False): 

33 """ 

34 @param outStream instance of a class which should have a method ``write`` 

35 @param context context for the script execution (dictionary with local variables) 

36 @param catch_exception if True, the parser prints out the exception instead of raising when it happens. 

37 

38 The context is not modified unless it contains container. 

39 In that case, it could be. 

40 """ 

41 if context is None: 

42 context = {} 

43 HTMLParser.__init__(self, convert_charrefs=True) 

44 self.outStream = outStream 

45 self.script_stack = None 

46 self.context = context 

47 self.catch_exception = catch_exception 

48 self.in_script = False 

49 

50 # for some reason it is missing 

51 self.outStream.write('<?xml version="1.0" encoding="utf-8"?>\n') 

52 

53 def str_attr(self, attrs): 

54 """ 

55 Returns a string including the parameters values. 

56 

57 @param attr attributes 

58 @return string 

59 """ 

60 if len(attrs) > 0: 

61 #at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ] 

62 at = ["%s=\"%s\"" % (a, b) for a, b in attrs] 

63 return " " + " ".join(at) 

64 else: 

65 return "" 

66 

67 def handle_starttag(self, tag, attrs): 

68 """ 

69 Intercepts the beginning of a tag. 

70 

71 @param tag tag 

72 @param attrs attributes 

73 """ 

74 if tag.lower() == "script" and \ 

75 len(attrs) == 1 and \ 

76 attrs[0][0].lower() == "type" and \ 

77 attrs[0][1].lower() == "text/python": 

78 self.script_stack = StringIO() 

79 else: 

80 if tag.lower() == "script": 

81 self.in_script = True 

82 self.script_stack = None 

83 row = "<%s%s>" % (tag, self.str_attr(attrs)) 

84 self.outStream.write(row) 

85 

86 def handle_endtag(self, tag): 

87 """ 

88 Intercepts the end of a tag. 

89 

90 @param tag tag 

91 """ 

92 def ffpr(v): 

93 return self.outStream.write(str(v) + "\n") 

94 

95 if tag.lower() == "script" and self.script_stack is not None: 

96 script = self.script_stack.getvalue() 

97 fpr = ffpr 

98 pars = {"print": fpr} 

99 pars.update(self.context) 

100 

101 if self.catch_exception: 

102 try: 

103 run_python_script(script, pars) 

104 except Exception: 

105 import traceback # pylint: disable=C0415 

106 ht = '<pre class="prettyprint linenums:4">\n%s\n</pre>\nException:<pre class="prettyprint">\n' % script 

107 self.outStream.write(ht) 

108 traceback.print_exc(file=self.outStream) 

109 self.outStream.write("\n</pre>") 

110 else: 

111 run_python_script(script, pars) 

112 

113 self.script_stack = None 

114 else: 

115 if tag.lower() == "script": 

116 self.in_script = False 

117 row = "</%s>" % tag 

118 self.outStream.write(row) 

119 

120 def handle_data(self, data): 

121 """ 

122 Intercepts the data between two tags. 

123 

124 @param data data 

125 """ 

126 if self.script_stack is not None: 

127 self.script_stack.write(data) 

128 elif self.in_script: 

129 self.outStream.write(data) 

130 else: 

131 self.outStream.write(escape(data)) 

132 

133 

134class HTMLScriptParserRemove(HTMLScriptParser): 

135 """ 

136 Defines a :epkg:`HTML` parser. 

137 the purpose is to remove the :epkg:`HTML` code and the header 

138 """ 

139 

140 def __init__(self, strict=False, 

141 outStream=sys.stdout, 

142 catch_exception=False): 

143 """ 

144 @param strict @see cl HTMLParser 

145 @param outStream instance of a class which should have a method ``write`` 

146 @param catch_exception if True, the parser prints out the exception instead of raising when it happens. 

147 

148 The context is not modified unless it contains container. In that case, it could be. 

149 """ 

150 HTMLScriptParser.__init__(self, 

151 outStream=outStream, 

152 catch_exception=catch_exception, 

153 context={}) 

154 self.in_ = {"head": False, 

155 "meta": False, 

156 "link": False, 

157 "style": False, 

158 "title": False 

159 } 

160 

161 def str_attr(self, attrs): 

162 """ 

163 Returns a string including the parameters values. 

164 

165 @param attr attributes 

166 @return string 

167 """ 

168 if len(attrs) > 0: 

169 #at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ] 

170 at = ["%s=\"%s\"" % (a, b) for a, b in attrs] 

171 return " " + " ".join(at) 

172 else: 

173 return "" 

174 

175 def handle_starttag(self, tag, attrs): 

176 """ 

177 Intercepts the beginning of a tag. 

178 

179 @param tag tag 

180 @param attrs attributes 

181 """ 

182 ltag = tag.lower() 

183 

184 for t in ["link", "meta", "title"]: 

185 if self.in_[t]: 

186 self.in_[t] = False 

187 

188 if ltag == "script": 

189 self.script_stack = StringIO() 

190 elif ltag in self.in_: 

191 self.in_[ltag] = True 

192 elif ltag == "meta": 

193 self.in_meta = True 

194 else: 

195 self.script_stack = None 

196 row = "<%s%s>" % (tag, self.str_attr(attrs)) 

197 self.outStream.write(row) 

198 

199 def handle_endtag(self, tag): 

200 """ 

201 Intercepts the end of a tag. 

202 

203 @param tag tag 

204 """ 

205 if tag == "script" and self.script_stack is not None: 

206 self.script_stack = None 

207 elif tag in self.in_: 

208 self.in_[tag.lower()] = False 

209 else: 

210 row = "</%s>" % tag 

211 self.outStream.write(row) 

212 

213 def handle_data(self, data): 

214 """ 

215 Intercepts the data between two tags. 

216 

217 @param data data 

218 """ 

219 if True not in self.in_.values(): 

220 if self.script_stack is not None: 

221 self.script_stack.write(data) 

222 else: 

223 self.outStream.write(escape(data))