Coverage for src/pyrsslocal/rss/rss_helper.py: 59%

120 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2023-02-02 02:59 +0100

1""" 

2@file 

3@brief Various function to automate the collection of blog posts. 

4""" 

5import os 

6import webbrowser 

7import sys 

8import threading 

9import datetime 

10from textwrap import dedent 

11from jinja2 import Template 

12from pyquickhelper.filehelper import read_content_ufs 

13from pyensae.sql.database_main import Database 

14from .rss_stream import StreamRSS 

15from .rss_blogpost import BlogPost 

16from .rss_simple_server import RSSServer 

17 

18 

19def rss_from_xml_to_database(file, database="database_rss.db3", 

20 table="blogs", fLOG=None): 

21 """ 

22 Parses a list of blogs stored in a :epkg:`XML` 

23 file using Google Reader format, 

24 stores the results in a :epkg:`SQLite` database. 

25 

26 @param file (str) xml file containing the list of blogs, example: 

27 @param database database file (sqlite) 

28 @param table table name 

29 @param fLOG logging function 

30 @return number of stored blogs 

31 

32 The XML file should contain the following: 

33 

34 :: 

35 

36 <outline text="XD blog" 

37 title="XD blog" type="rss" 

38 xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml" 

39 htmlUrl="http://www.xavierdupre.fr/blog/xd_blog.html" /> 

40 

41 The function does not check whether or not the blogs were 

42 already added to the database, 

43 they will be added a second time. If the table 

44 does not exist, it will be created. 

45 """ 

46 res = list(StreamRSS.enumerate_stream_from_google_list(file)) 

47 db = Database(database, LOG=fLOG) 

48 db.connect() 

49 StreamRSS.fill_table(db, table, res) 

50 db.close() 

51 return len(res) 

52 

53 

54def rss_download_post_to_database(database="database_rss.db3", 

55 table_blog="blogs", 

56 table_post="posts", 

57 fLOG=None): 

58 """ 

59 Downloads all posts from a list of blogs stored 

60 in a database by function @see fn rss_from_xml_to_database. 

61 

62 @param database database file name (SQLite format) 

63 @param table_blog table name of the blogs 

64 @param table_post table name of the post 

65 @param fLOG logging function 

66 @return number of posts downloaded 

67 """ 

68 db = Database(database, LOG=fLOG) 

69 db.connect() 

70 rss_list = list(db.enumerate_objects(table_blog, StreamRSS)) 

71 list_post = list( 

72 StreamRSS.enumerate_post_from_rsslist(rss_list, fLOG=fLOG)) 

73 BlogPost.fill_table(db, table_post, list_post, skip_exception=True) 

74 db.close() 

75 

76 return len(list_post) 

77 

78 

79def rss_update_run_server(dbfile, xml_blogs, port=8093, browser=None, period="today", 

80 server=None, thread=False, fLOG=None): 

81 """ 

82 Creates a database if it does not exists, 

83 add a table for blogs and posts, 

84 update the database, starts a server and 

85 open a browser. 

86 

87 @param dbfile (str) sqllite database to create 

88 @param xml_blogs (str) xml description of blogs (google format) (file or string) 

89 @param port the main page will be ``http://localhost:port/`` 

90 @param browser (str) to choose a different browser than the default one 

91 @param period (str) when opening the browser, it can show the results for last day or last week 

92 @param server to set up your own server 

93 @param thread to start the server in a separate thread 

94 @param fLOG logging function 

95 @return see @see fn rss_run_server 

96 

97 You can read the blog post `pyhome3 RSS Reader 

98 <http://www.xavierdupre.fr/blog/2013-07-28_nojs.html>`_. 

99 """ 

100 rss_from_xml_to_database(xml_blogs, database=dbfile, fLOG=fLOG) 

101 rss_download_post_to_database(database=dbfile, fLOG=fLOG) 

102 return rss_run_server(dbfile, port, browser=browser, period=period, server=server, thread=thread, fLOG=fLOG) 

103 

104 

105def rss_run_server(dbfile, port=8093, browser=None, period="today", 

106 server=None, thread=False, fLOG=None): 

107 """ 

108 Starts a server and open a browser on a page reading blog posts. 

109 

110 @param dbfile (str) sqllite database to create 

111 @param port the main page will be ``http://localhost:port/`` 

112 @param browser (str) to choose a different browser than the default one 

113 @param period (str) when opening the browser, it can show the results for last day or last week 

114 @param server to set up your own server 

115 @param thread to start the server in a separate thread 

116 @param fLOG logging function 

117 

118 You can read the blog post `RSS Reader 

119 <http://www.xavierdupre.fr/blog/2013-07-28_nojs.html>`_. 

120 

121 If *browser* is "none", the browser is not started. 

122 """ 

123 if not os.path.exists(dbfile): 

124 raise FileNotFoundError(dbfile) 

125 

126 def open_browser(): 

127 url = "http://localhost:%d/rss_reader.html?search=%s" % (port, period) 

128 if fLOG: 

129 fLOG("opening ", url) 

130 if browser is not None: 

131 if browser in ["none", "None"]: 

132 pass 

133 else: 

134 try: 

135 b = webbrowser.get(browser) 

136 except webbrowser.Error as e: 

137 if browser == "firefox" and sys.platform.startswith("win"): 

138 webbrowser.register( 

139 'firefox', 

140 None, 

141 webbrowser.GenericBrowser(r"C:\Program Files (x86)\Mozilla Firefox\firefox.exe")) 

142 b = webbrowser.get(browser) 

143 else: 

144 raise e 

145 b.open(url) 

146 else: 

147 webbrowser.open(url) 

148 

149 # webbrowser.open does get back until the browser is closed if the browser was launched 

150 # with this only tab. If a new tab was create this function quickly endss 

151 th = threading.Thread(target=open_browser) 

152 th.start() 

153 ret = RSSServer.run_server( 

154 server, dbfile, port=port, thread=thread, fLOG=fLOG) 

155 # we should close the thread here if it is still alive 

156 return ret 

157 

158 

159def enumerate_post_from_rss(content, rss_stream=None): 

160 """ 

161 Parses a :epkg:`RSS` stream. 

162 

163 @param content :epkg:`RSS` content 

164 @return list of @see cl BlogPost 

165 """ 

166 import feedparser # pylint: disable=C0415 

167 d = feedparser.parse(content) 

168 

169 if d is not None: 

170 for post in d["entries"]: 

171 titleb = post.get("title", "-") 

172 url = post.get("link", "") 

173 

174 try: 

175 id_ = post["id"] 

176 guid = url if post["guidislink"] else id_ 

177 except KeyError: 

178 id_ = url 

179 guid = url 

180 

181 try: 

182 desc = post["summary_detail"]["value"] 

183 except KeyError: 

184 try: 

185 desc = post["summary"] 

186 except KeyError: 

187 desc = "" 

188 

189 isPermaLink = True 

190 

191 try: 

192 structTime = post["published_parsed"] 

193 date = datetime.datetime(*structTime[:6]) 

194 except KeyError: 

195 try: 

196 structTime = post["updated_parsed"] 

197 date = datetime.datetime(*structTime[:6]) 

198 except KeyError: 

199 date = datetime.datetime.now() 

200 except TypeError as e: 

201 structTime = post["published_parsed"] 

202 if structTime is None: 

203 date = datetime.datetime.now() 

204 else: 

205 raise e 

206 

207 if date > datetime.datetime.now(): 

208 date = datetime.datetime.now() 

209 

210 bl = BlogPost(rss_stream, titleb, guid, 

211 isPermaLink, url, desc, date) 

212 yield bl 

213 

214 

215def enumerate_rss_merge(rss_urls, title="compilation", min_size=None): 

216 """ 

217 Merges many :epkg:`rss` file or url. 

218 

219 @param rss_urls :epkg:`rss` files or urls 

220 @param title title 

221 @param min_size fails if the downloaded file 

222 is below this size 

223 @return new RSS 

224 """ 

225 sts = StreamRSS(title, None, None, None, None, id=0) 

226 for name in rss_urls: 

227 content = read_content_ufs(name, min_size=min_size) 

228 for blog in enumerate_post_from_rss(content, rss_stream=sts): 

229 yield blog 

230 

231 

232def to_rss(obj, link, description): 

233 """ 

234 Converts something into :epkg:`RSS`. 

235 

236 @param obj object 

237 @param link link 

238 @param description description 

239 @return content 

240 """ 

241 if isinstance(obj, list): 

242 if len(obj) == 0: 

243 raise ValueError("obj cannot be empty.") 

244 else: 

245 raise TypeError("Unexpected type {}.".format(type(obj))) 

246 

247 if isinstance(obj[0], StreamRSS): 

248 st = obj[0] 

249 title = st.title 

250 else: 

251 title = "" 

252 

253 items = [] 

254 for blog in obj: 

255 items.append(blog.to_rss_item()) 

256 

257 template = dedent(""" 

258 <?xml version="1.0" encoding="utf-8"?> 

259 <rss version="2.0"> 

260 <channel> 

261 <title>{{title}}</title> 

262 <link>{{link}}</link> 

263 <description>{{description}}</description> 

264 {{items}} 

265 </channel> 

266 </rss> 

267 """) 

268 tpl = Template(template) 

269 return tpl.render(link=link, description=description, 

270 items='\n'.join(items), 

271 title=title) 

272 

273 

274template_html = """ 

275<?xml version="1.0" encoding="utf-8"?> 

276<html> 

277<head> 

278<link href="http://www.xavierdupre.fr/pyhome3.ico" rel="shortcut icon"/> 

279<link href="http://www.xavierdupre.fr/blog/pMenu.css" rel="stylesheet" type="text/css"/> 

280<link REL="stylesheet" TYPE="text/css" href="http://www.xavierdupre.fr/blog/javascript/run_prettify.css"/> 

281<title>{{title}}</title> 

282<meta content="{{author}}" name="author"/> 

283<meta content="{{keywords}}" name="keywords"/> 

284<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/> 

285<script src="http://www.xavierdupre.fr/blog/javascript/pMenu.js" type="text/javascript"></script> 

286<script src="http://www.xavierdupre.fr/blog/javascript/latexit.js" type="text/javascript"></script> 

287<script src="http://www.xavierdupre.fr/blog/javascript/run_prettify.js" type="text/javascript"></script> 

288<link href="http://www.xavierdupre.fr/blog/javascript/shCore.css" rel="stylesheet" type="text/css"/> 

289<link href="http://www.xavierdupre.fr/blog/javascript/shThemeDefault.css" rel="stylesheet" type="text/css"/> 

290<script src="http://www.xavierdupre.fr/blog/javascript/shCore.js" type="text/javascript"></script> 

291<script src="http://www.xavierdupre.fr/blog/javascript/shAutoloader.js" type="text/javascript"></script> 

292</head> 

293 

294<body> 

295 

296<div class="otherlayer"> 

297<!-- other layer --> 

298</div> 

299 

300<div class="sidebar"> 

301</div> 

302 

303<div class="maintitle"> 

304<h1>{{title}}</h1> 

305<p><a href="{{rssfile.xml}}"><img src="http://www.xavierdupre.fr/blog/documents/feed-icon-16x16.png"/></a> 

306<i>{{header}}</i></p> 

307 

308</div> 

309 

310<div class="mainbody"> 

311 

312<hr /> 

313 

314{{items}} 

315 

316<hr /> 

317 

318</div> 

319<script type="text/javascript"> 

320SyntaxHighlighter.autoloader( 

321 'js jscript javascript http://www.xavierdupre.fr/blog/javascript/shBrushJScript.js', 

322 'py python http://www.xavierdupre.fr/blog/javascript/shBrushPython.js', 

323 'cpp http://www.xavierdupre.fr/blog/javascript/shBrushCpp.js', 

324 'sql http://www.xavierdupre.fr/blog/javascript/shBrushSql.js', 

325 'flat plain http://www.xavierdupre.fr/blog/javascript/shBrushPlain.js', 

326 'vba vb http://www.xavierdupre.fr/blog/javascript/shBrushVb.js', 

327 'bash http://www.xavierdupre.fr/blog/javascript/shBrushBash.js', 

328 'cs http://www.xavierdupre.fr/blog/javascript/shBrushCSharp.js', 

329 'php http://www.xavierdupre.fr/blog/javascript/shBrushPhp.js', 

330 'css http://www.xavierdupre.fr/blog/javascript/shBrushCss.js', 

331 'xml html http://www.xavierdupre.fr/blog/javascript/shBrushXml.js' 

332); 

333SyntaxHighlighter.all(); 

334</script> 

335<div id="playscript"/> 

336 

337</body> 

338</html> 

339""" 

340 

341 

342def to_html(items, template=None, title="BLOG", 

343 author="AUTHOR", keywords="blog,python", 

344 header="", rssfile="rssfile.xml", 

345 **context): 

346 """ 

347 Produces a :epkg:`HTML`. 

348 

349 @param items list of blog post 

350 @param template template or None to get the default one 

351 @param title blog title 

352 @param author author 

353 @param keywords keywords 

354 @param header blog description 

355 @param rssfile file RSS 

356 @param context other information 

357 @return pages 

358 """ 

359 if template is None: 

360 template_ = Template(template_html) 

361 

362 hitems = "\n".join(map(lambda b: b.to_html_item(), 

363 sorted(items, reverse=True, 

364 key=lambda i: i.pubDate))) 

365 return template_.render(title=title, author=author, keywords=keywords, 

366 items=hitems, header=header, rssfile=rssfile, 

367 **context)