Coverage for src/ensae_teaching_cs/homeblog/modifypost.py: 82%
159 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Helpers which modify a post.
4"""
5import xml.dom.minidom
6import os
7from pyquickhelper.loghelper import fLOG
8from .filefunction import find_all_blogs_function
9from .postclassification import privateKeyClassificationMandatory, classify_post
12def _write_data(writer, data, section):
13 """
14 Writes datachars to writer and deals with < >
15 @param writer stream or file
16 @param data string to write
17 @param section depending of this name,
18 some special characters are processed or not (pre or !pre)
19 """
20 if data:
21 if section == "pre":
22 data = data.replace("&", "&") \
23 .replace("<", "<") \
24 .replace(">", ">")
25 elif section == "script":
26 pass
27 else:
28 if section is None:
29 raise ValueError("section name is empty")
30 data = data.replace("&", "&") \
31 .replace("<", "<") \
32 .replace("\"", """) \
33 .replace(">", ">")
34 writer.write(data)
37def Text_writexml(self, writer, indent="", addindent="", newl=""):
38 section = self.localName
39 if section is None:
40 section = self.sectionNameSpecial
41 if section is None:
42 raise ValueError("section name is empty")
43 _write_data(writer, f"{indent}{self.data}{newl}", section)
46def Element_writexml(self, writer, indent="", addindent="", newl=""):
47 # indent = current indentation
48 # addindent = indentation to add to higher levels
49 # newl = newline string
50 writer.write(indent + "<" + self.tagName)
52 attrs = self._get_attributes()
53 a_names = sorted(attrs.keys())
55 for a_name in a_names:
56 writer.write(f" {a_name}=\"")
57 _write_data(writer, attrs[a_name].value, "")
58 writer.write("\"")
59 if self.childNodes:
60 writer.write(f">{newl}")
61 for node in self.childNodes:
62 if node.localName is None:
63 node.sectionNameSpecial = self.localName
64 node.writexml(writer, indent + addindent, addindent, newl)
65 writer.write(f"{indent}</{self.tagName}>{newl}")
66 else:
67 writer.write(f"/>{newl}")
70xml.dom.minidom._write_data = _write_data
71xml.dom.minidom.Text.writexml = Text_writexml
72xml.dom.minidom.Element.writexml = Element_writexml
75def information_from_xml(fullFileContent, file):
76 text = fullFileContent
78 # read xml
79 try:
80 dom = xml.dom.minidom.parseString(text)
81 except Exception as e:
82 fLOG("issue with file ", file)
83 ee = str(e)
84 ee = ee[ee.find("line"):]
85 print(f" File \"{file}\", {ee} syntax error")
86 raise e
88 # get attributes
89 attr = {}
90 link = []
91 head = dom.documentElement.getElementsByTagName("head")[0]
92 for no in head.childNodes:
93 if no.localName == "META":
94 raise ValueError("upper META")
95 if no.localName == "meta" and "name" in no.attributes:
96 name = no.attributes["name"].value
97 content = no.attributes["content"].value
98 attr[name] = content
100 if no.localName == "title":
101 attr["title"] = no.toxml()
102 if no.localName == "link":
103 link.append(no.attributes["href"].value)
105 if len(attr) == 0:
106 raise ValueError("document " + file + " has no attribute")
108 content = dom.documentElement.getElementsByTagName("body")[0].toxml()
110 return dom, attr, link, head, content
113def modify_header_attributes(dom, headerattr):
114 """
115 the function do not modify links
116 """
117 head = dom.documentElement.getElementsByTagName("head")[0]
118 for no in head.childNodes:
119 if no.localName == "META":
120 raise ValueError("upper META")
121 if no.localName == "meta" and "name" in no.attributes:
122 name = no.attributes["name"].value
123 if name in headerattr:
124 content = headerattr[name]
125 if not isinstance(content, str):
126 raise TypeError(
127 "content should be a string not " + str(type(content)))
128 no.attributes["content"].value = content
130 for key in ["title"]:
131 if no.localName == key and key in headerattr:
132 content = headerattr["title"]
133 if not isinstance(content, str):
134 raise TypeError(
135 "content should be a string not " + str(type(content)))
136 no.value = content
139def load_and_modify_xml_dom(file, outfile, check_keywords=True):
140 f = open(file, "r", encoding="utf8")
141 text = f.read()
142 f.close()
144 memo = text
146 # cleaning malformations
148 text = text.replace('<meta http-equiv="Content-Type" content="text/html; charset=utf-8">',
149 '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />')
151 text = text.replace('''<LINK REL="stylesheet" TYPE="text/css" HREF='../pMenu.css'>''',
152 '''<LINK REL="stylesheet" TYPE="text/css" HREF='../pMenu.css' />''')
154 text = text.replace("<HTML>", "<html>")
155 text = text.replace("<HEAD>", "<head>")
156 text = text.replace("</HEAD>", "</head>")
157 text = text.replace('"../pMenu.css"', '"pMenu.css"')
158 text = text.replace("'../pMenu.css'", '"pMenu.css"')
159 text = text.replace(" HREF=", " href=")
160 text = text.replace("<LINK REL=", "<link rel=")
161 text = text.replace("<LINK TYPE=", "<link type=")
163 dom, attr, link, head, content = information_from_xml(text, file)
165 if "title" not in attr:
166 raise ValueError("no title in " + file)
167 if "pMenu.css" not in link:
168 raise ValueError("no pMenu.css in " + file)
170 if "date" not in attr:
171 date = os.path.split(file)[-1]
172 date = date[:10]
173 attr["date"] = date
174 ele = dom.createElement("meta")
175 ele.attributes["name"] = "date" # pylint: disable=E1101
176 ele.attributes["content"] = date # pylint: disable=E1101
177 head.appendChild(ele)
179 # change keywords
180 keywords = [_.strip() for _ in attr["keywords"].split(",")]
181 mod_keywords = classify_post(keywords, content)
183 if check_keywords:
184 inter = [_ for _ in privateKeyClassificationMandatory
185 if _ in mod_keywords]
186 if len(inter) == 0:
187 raise RuntimeError(
188 f"The post should be at least technical or recreative:\n"
189 f" File: {file!r}\ntitle: {attr['title']}\n"
190 f"keywords: {keywords!r}\n"
191 f"mandatory: {privateKeyClassificationMandatory!r}.")
193 attr["keywords"] = ", ".join(mod_keywords)
195 # modify header
196 modify_header_attributes(dom, attr)
198 # from xml to text
199 if outfile is not None:
201 text = dom.documentElement.toxml()
202 text = text.replace('type="text/javascript"/>',
203 'type="text/javascript"></script>')
204 header = '<?xml version="1.0" encoding="utf-8"?>'
205 text = header + "\n" + text
207 resfile = None
208 if os.path.exists(outfile):
209 f = open(outfile, "r", encoding="utf8")
210 oldtext = f.read()
211 f.close()
212 else:
213 oldtext = ""
215 if oldtext != memo:
216 # text is different, update it
217 direct = os.path.split(outfile)[0]
218 if not os.path.exists(direct):
219 os.makedirs(direct)
221 fLOG("updating ", file)
222 f = open(outfile, "w", encoding="utf8")
223 f.write(text)
224 f.close()
225 resfile = outfile
227 return dom, resfile
228 else:
229 return dom
232def modify_all_posts(folder=".",
233 outfolder=None,
234 exclude=None):
235 """
236 modifies, checks the syntax of every post
237 @param folder folder (also process subfolders)
238 @param outfolder new location (the modified post is copied somewhere else),
239 if None, replace the file
240 @param exclude if not None, function which avoids some file the function
241 returns a True value, example:
243 ::
245 lambda f : "_old" in f
247 @return files, modified where:
248 * files is the list of files processed
249 * modified is the list of modified files
250 """
251 if outfolder is None:
252 outfolder = folder
253 folder = os.path.abspath(folder)
254 outfolder = os.path.abspath(outfolder)
255 files = find_all_blogs_function(folder, exclude)
256 modified = []
258 for file in files:
259 outfile = file.replace(folder, outfolder)
260 fLOG(" loading file ", file, ' to ', outfile)
261 _, outfile = load_and_modify_xml_dom(file, outfile)
262 if outfile is not None:
263 modified.append(outfile)
265 return files, modified