Coverage for src/ensae_teaching_cs/homeblog/modifypost.py: 82%

1"""

2@file

3@brief Helpers which modify a post.

4"""

5import xml.dom.minidom

6import os

7from pyquickhelper.loghelper import fLOG

8from .filefunction import find_all_blogs_function

9from .postclassification import privateKeyClassificationMandatory, classify_post

12def _write_data(writer, data, section):

13 """

14 Writes datachars to writer and deals with < >

15 @param writer stream or file

16 @param data string to write

17 @param section depending of this name,

18 some special characters are processed or not (pre or !pre)

19 """

20 if data:

21 if section == "pre":

22 data = data.replace("&", "&") \

23 .replace("<", "<") \

24 .replace(">", ">")

25 elif section == "script":

26 pass

27 else:

28 if section is None:

29 raise ValueError("section name is empty")

30 data = data.replace("&", "&") \

31 .replace("<", "<") \

32 .replace("\"", """) \

33 .replace(">", ">")

34 writer.write(data)

37def Text_writexml(self, writer, indent="", addindent="", newl=""):

38 section = self.localName

39 if section is None:

40 section = self.sectionNameSpecial

41 if section is None:

42 raise ValueError("section name is empty")

43 _write_data(writer, f"{indent}{self.data}{newl}", section)

46def Element_writexml(self, writer, indent="", addindent="", newl=""):

47 # indent = current indentation

48 # addindent = indentation to add to higher levels

49 # newl = newline string

50 writer.write(indent + "<" + self.tagName)

52 attrs = self._get_attributes()

53 a_names = sorted(attrs.keys())

55 for a_name in a_names:

56 writer.write(f" {a_name}=\"")

57 _write_data(writer, attrs[a_name].value, "")

58 writer.write("\"")

59 if self.childNodes:

60 writer.write(f">{newl}")

61 for node in self.childNodes:

62 if node.localName is None:

63 node.sectionNameSpecial = self.localName

64 node.writexml(writer, indent + addindent, addindent, newl)

65 writer.write(f"{indent}</{self.tagName}>{newl}")

66 else:

67 writer.write(f"/>{newl}")

70xml.dom.minidom._write_data = _write_data

71xml.dom.minidom.Text.writexml = Text_writexml

72xml.dom.minidom.Element.writexml = Element_writexml

75def information_from_xml(fullFileContent, file):

76 text = fullFileContent

78 # read xml

79 try:

80 dom = xml.dom.minidom.parseString(text)

81 except Exception as e:

82 fLOG("issue with file ", file)

83 ee = str(e)

84 ee = ee[ee.find("line"):]

85 print(f" File \"{file}\", {ee} syntax error")

86 raise e

88 # get attributes

89 attr = {}

90 link = []

91 head = dom.documentElement.getElementsByTagName("head")[0]

92 for no in head.childNodes:

93 if no.localName == "META":

94 raise ValueError("upper META")

95 if no.localName == "meta" and "name" in no.attributes:

96 name = no.attributes["name"].value

97 content = no.attributes["content"].value

98 attr[name] = content

100 if no.localName == "title":

101 attr["title"] = no.toxml()

102 if no.localName == "link":

103 link.append(no.attributes["href"].value)

104

105 if len(attr) == 0:

106 raise ValueError("document " + file + " has no attribute")

107

108 content = dom.documentElement.getElementsByTagName("body")[0].toxml()

109

110 return dom, attr, link, head, content

111

112

113def modify_header_attributes(dom, headerattr):

114 """

115 the function do not modify links

116 """

117 head = dom.documentElement.getElementsByTagName("head")[0]

118 for no in head.childNodes:

119 if no.localName == "META":

120 raise ValueError("upper META")

121 if no.localName == "meta" and "name" in no.attributes:

122 name = no.attributes["name"].value

123 if name in headerattr:

124 content = headerattr[name]

125 if not isinstance(content, str):

126 raise TypeError(

127 "content should be a string not " + str(type(content)))

128 no.attributes["content"].value = content

129

130 for key in ["title"]:

131 if no.localName == key and key in headerattr:

132 content = headerattr["title"]

133 if not isinstance(content, str):

134 raise TypeError(

135 "content should be a string not " + str(type(content)))

136 no.value = content

137

138

139def load_and_modify_xml_dom(file, outfile, check_keywords=True):

140 f = open(file, "r", encoding="utf8")

141 text = f.read()

142 f.close()

143

144 memo = text

145

146 # cleaning malformations

147

148 text = text.replace('<meta http-equiv="Content-Type" content="text/html; charset=utf-8">',

149 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />')

150

151 text = text.replace('''<LINK REL="stylesheet" TYPE="text/css" HREF='../pMenu.css'>''',

152 '''<LINK REL="stylesheet" TYPE="text/css" HREF='../pMenu.css' />''')

153

154 text = text.replace("<HTML>", "<html>")

155 text = text.replace("<HEAD>", "<head>")

156 text = text.replace("</HEAD>", "</head>")

157 text = text.replace('"../pMenu.css"', '"pMenu.css"')

158 text = text.replace("'../pMenu.css'", '"pMenu.css"')

159 text = text.replace(" HREF=", " href=")

160 text = text.replace("<LINK REL=", "<link rel=")

161 text = text.replace("<LINK TYPE=", "<link type=")

162

163 dom, attr, link, head, content = information_from_xml(text, file)

164

165 if "title" not in attr:

166 raise ValueError("no title in " + file)

167 if "pMenu.css" not in link:

168 raise ValueError("no pMenu.css in " + file)

169

170 if "date" not in attr:

171 date = os.path.split(file)[-1]

172 date = date[:10]

173 attr["date"] = date

174 ele = dom.createElement("meta")

175 ele.attributes["name"] = "date" # pylint: disable=E1101

176 ele.attributes["content"] = date # pylint: disable=E1101

177 head.appendChild(ele)

178

179 # change keywords

180 keywords = [_.strip() for _ in attr["keywords"].split(",")]

181 mod_keywords = classify_post(keywords, content)

182

183 if check_keywords:

184 inter = [_ for _ in privateKeyClassificationMandatory

185 if _ in mod_keywords]

186 if len(inter) == 0:

187 raise RuntimeError(

188 f"The post should be at least technical or recreative:\n"

189 f" File: {file!r}\ntitle: {attr['title']}\n"

190 f"keywords: {keywords!r}\n"

191 f"mandatory: {privateKeyClassificationMandatory!r}.")

192

193 attr["keywords"] = ", ".join(mod_keywords)

194

195 # modify header

196 modify_header_attributes(dom, attr)

197

198 # from xml to text

199 if outfile is not None:

200

201 text = dom.documentElement.toxml()

202 text = text.replace('type="text/javascript"/>',

203 'type="text/javascript"></script>')

204 header = '<?xml version="1.0" encoding="utf-8"?>'

205 text = header + "\n" + text

206

207 resfile = None

208 if os.path.exists(outfile):

209 f = open(outfile, "r", encoding="utf8")

210 oldtext = f.read()

211 f.close()

212 else:

213 oldtext = ""

214

215 if oldtext != memo:

216 # text is different, update it

217 direct = os.path.split(outfile)[0]

218 if not os.path.exists(direct):

219 os.makedirs(direct)

220

221 fLOG("updating ", file)

222 f = open(outfile, "w", encoding="utf8")

223 f.write(text)

224 f.close()

225 resfile = outfile

226

227 return dom, resfile

228 else:

229 return dom

230

231

232def modify_all_posts(folder=".",

233 outfolder=None,

234 exclude=None):

235 """

236 modifies, checks the syntax of every post

237 @param folder folder (also process subfolders)

238 @param outfolder new location (the modified post is copied somewhere else),

239 if None, replace the file

240 @param exclude if not None, function which avoids some file the function

241 returns a True value, example:

242

243 ::

244

245 lambda f : "_old" in f

246

247 @return files, modified where:

248 * files is the list of files processed

249 * modified is the list of modified files

250 """

251 if outfolder is None:

252 outfolder = folder

253 folder = os.path.abspath(folder)

254 outfolder = os.path.abspath(outfolder)

255 files = find_all_blogs_function(folder, exclude)

256 modified = []

257

258 for file in files:

259 outfile = file.replace(folder, outfolder)

260 fLOG(" loading file ", file, ' to ', outfile)

261 _, outfile = load_and_modify_xml_dom(file, outfile)

262 if outfile is not None:

263 modified.append(outfile)

264

265 return files, modified