Coverage for src/ensae_teaching_cs/homeblog/modifypost.py: 82%

159 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-01-27 05:44 +0100

1""" 

2@file 

3@brief Helpers which modify a post. 

4""" 

5import xml.dom.minidom 

6import os 

7from pyquickhelper.loghelper import fLOG 

8from .filefunction import find_all_blogs_function 

9from .postclassification import privateKeyClassificationMandatory, classify_post 

10 

11 

12def _write_data(writer, data, section): 

13 """ 

14 Writes datachars to writer and deals with < > 

15 @param writer stream or file 

16 @param data string to write 

17 @param section depending of this name, 

18 some special characters are processed or not (pre or !pre) 

19 """ 

20 if data: 

21 if section == "pre": 

22 data = data.replace("&", "&amp;") \ 

23 .replace("<", "&lt;") \ 

24 .replace(">", "&gt;") 

25 elif section == "script": 

26 pass 

27 else: 

28 if section is None: 

29 raise ValueError("section name is empty") 

30 data = data.replace("&", "&amp;") \ 

31 .replace("<", "&lt;") \ 

32 .replace("\"", "&quot;") \ 

33 .replace(">", "&gt;") 

34 writer.write(data) 

35 

36 

37def Text_writexml(self, writer, indent="", addindent="", newl=""): 

38 section = self.localName 

39 if section is None: 

40 section = self.sectionNameSpecial 

41 if section is None: 

42 raise ValueError("section name is empty") 

43 _write_data(writer, f"{indent}{self.data}{newl}", section) 

44 

45 

46def Element_writexml(self, writer, indent="", addindent="", newl=""): 

47 # indent = current indentation 

48 # addindent = indentation to add to higher levels 

49 # newl = newline string 

50 writer.write(indent + "<" + self.tagName) 

51 

52 attrs = self._get_attributes() 

53 a_names = sorted(attrs.keys()) 

54 

55 for a_name in a_names: 

56 writer.write(f" {a_name}=\"") 

57 _write_data(writer, attrs[a_name].value, "") 

58 writer.write("\"") 

59 if self.childNodes: 

60 writer.write(f">{newl}") 

61 for node in self.childNodes: 

62 if node.localName is None: 

63 node.sectionNameSpecial = self.localName 

64 node.writexml(writer, indent + addindent, addindent, newl) 

65 writer.write(f"{indent}</{self.tagName}>{newl}") 

66 else: 

67 writer.write(f"/>{newl}") 

68 

69 

70xml.dom.minidom._write_data = _write_data 

71xml.dom.minidom.Text.writexml = Text_writexml 

72xml.dom.minidom.Element.writexml = Element_writexml 

73 

74 

75def information_from_xml(fullFileContent, file): 

76 text = fullFileContent 

77 

78 # read xml 

79 try: 

80 dom = xml.dom.minidom.parseString(text) 

81 except Exception as e: 

82 fLOG("issue with file ", file) 

83 ee = str(e) 

84 ee = ee[ee.find("line"):] 

85 print(f" File \"{file}\", {ee} syntax error") 

86 raise e 

87 

88 # get attributes 

89 attr = {} 

90 link = [] 

91 head = dom.documentElement.getElementsByTagName("head")[0] 

92 for no in head.childNodes: 

93 if no.localName == "META": 

94 raise ValueError("upper META") 

95 if no.localName == "meta" and "name" in no.attributes: 

96 name = no.attributes["name"].value 

97 content = no.attributes["content"].value 

98 attr[name] = content 

99 

100 if no.localName == "title": 

101 attr["title"] = no.toxml() 

102 if no.localName == "link": 

103 link.append(no.attributes["href"].value) 

104 

105 if len(attr) == 0: 

106 raise ValueError("document " + file + " has no attribute") 

107 

108 content = dom.documentElement.getElementsByTagName("body")[0].toxml() 

109 

110 return dom, attr, link, head, content 

111 

112 

113def modify_header_attributes(dom, headerattr): 

114 """ 

115 the function do not modify links 

116 """ 

117 head = dom.documentElement.getElementsByTagName("head")[0] 

118 for no in head.childNodes: 

119 if no.localName == "META": 

120 raise ValueError("upper META") 

121 if no.localName == "meta" and "name" in no.attributes: 

122 name = no.attributes["name"].value 

123 if name in headerattr: 

124 content = headerattr[name] 

125 if not isinstance(content, str): 

126 raise TypeError( 

127 "content should be a string not " + str(type(content))) 

128 no.attributes["content"].value = content 

129 

130 for key in ["title"]: 

131 if no.localName == key and key in headerattr: 

132 content = headerattr["title"] 

133 if not isinstance(content, str): 

134 raise TypeError( 

135 "content should be a string not " + str(type(content))) 

136 no.value = content 

137 

138 

139def load_and_modify_xml_dom(file, outfile, check_keywords=True): 

140 f = open(file, "r", encoding="utf8") 

141 text = f.read() 

142 f.close() 

143 

144 memo = text 

145 

146 # cleaning malformations 

147 

148 text = text.replace('<meta http-equiv="Content-Type" content="text/html; charset=utf-8">', 

149 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />') 

150 

151 text = text.replace('''<LINK REL="stylesheet" TYPE="text/css" HREF='../pMenu.css'>''', 

152 '''<LINK REL="stylesheet" TYPE="text/css" HREF='../pMenu.css' />''') 

153 

154 text = text.replace("<HTML>", "<html>") 

155 text = text.replace("<HEAD>", "<head>") 

156 text = text.replace("</HEAD>", "</head>") 

157 text = text.replace('"../pMenu.css"', '"pMenu.css"') 

158 text = text.replace("'../pMenu.css'", '"pMenu.css"') 

159 text = text.replace(" HREF=", " href=") 

160 text = text.replace("<LINK REL=", "<link rel=") 

161 text = text.replace("<LINK TYPE=", "<link type=") 

162 

163 dom, attr, link, head, content = information_from_xml(text, file) 

164 

165 if "title" not in attr: 

166 raise ValueError("no title in " + file) 

167 if "pMenu.css" not in link: 

168 raise ValueError("no pMenu.css in " + file) 

169 

170 if "date" not in attr: 

171 date = os.path.split(file)[-1] 

172 date = date[:10] 

173 attr["date"] = date 

174 ele = dom.createElement("meta") 

175 ele.attributes["name"] = "date" # pylint: disable=E1101 

176 ele.attributes["content"] = date # pylint: disable=E1101 

177 head.appendChild(ele) 

178 

179 # change keywords 

180 keywords = [_.strip() for _ in attr["keywords"].split(",")] 

181 mod_keywords = classify_post(keywords, content) 

182 

183 if check_keywords: 

184 inter = [_ for _ in privateKeyClassificationMandatory 

185 if _ in mod_keywords] 

186 if len(inter) == 0: 

187 raise Exception("the post should be at least technical or recreative:\n File: \"" + str(file) + "\ntitle: " + attr[ 

188 "title"] + "\nkeywords: " + str(keywords) + "\nmandatory\n" + str(privateKeyClassificationMandatory) + "\"") 

189 

190 attr["keywords"] = ", ".join(mod_keywords) 

191 

192 # modify header 

193 modify_header_attributes(dom, attr) 

194 

195 # from xml to text 

196 if outfile is not None: 

197 

198 text = dom.documentElement.toxml() 

199 text = text.replace('type="text/javascript"/>', 

200 'type="text/javascript"></script>') 

201 header = '<?xml version="1.0" encoding="utf-8"?>' 

202 text = header + "\n" + text 

203 

204 resfile = None 

205 if os.path.exists(outfile): 

206 f = open(outfile, "r", encoding="utf8") 

207 oldtext = f.read() 

208 f.close() 

209 else: 

210 oldtext = "" 

211 

212 if oldtext != memo: 

213 # text is different, update it 

214 direct = os.path.split(outfile)[0] 

215 if not os.path.exists(direct): 

216 os.makedirs(direct) 

217 

218 fLOG("updating ", file) 

219 f = open(outfile, "w", encoding="utf8") 

220 f.write(text) 

221 f.close() 

222 resfile = outfile 

223 

224 return dom, resfile 

225 else: 

226 return dom 

227 

228 

229def modify_all_posts(folder=".", 

230 outfolder=None, 

231 exclude=None): 

232 """ 

233 modifies, checks the syntax of every post 

234 @param folder folder (also process subfolders) 

235 @param outfolder new location (the modified post is copied somewhere else), 

236 if None, replace the file 

237 @param exclude if not None, function which avoids some file the function 

238 returns a True value, example: 

239 

240 :: 

241 

242 lambda f : "_old" in f 

243 

244 @return files, modified where: 

245 * files is the list of files processed 

246 * modified is the list of modified files 

247 """ 

248 if outfolder is None: 

249 outfolder = folder 

250 folder = os.path.abspath(folder) 

251 outfolder = os.path.abspath(outfolder) 

252 files = find_all_blogs_function(folder, exclude) 

253 modified = [] 

254 

255 for file in files: 

256 outfile = file.replace(folder, outfolder) 

257 fLOG(" loading file ", file, ' to ', outfile) 

258 _, outfile = load_and_modify_xml_dom(file, outfile) 

259 if outfile is not None: 

260 modified.append(outfile) 

261 

262 return files, modified