Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Some automation helpers to grab mails from students about their projects. 

4""" 

5import re 

6import os 

7import sys 

8import json 

9import textwrap 

10import warnings 

11import zipfile 

12from urllib.parse import urlparse 

13import numpy 

14from pyquickhelper.loghelper import noLOG 

15from pyquickhelper.texthelper import remove_diacritics 

16from pyquickhelper.filehelper import remove_folder, explore_folder_iterfile 

17from pyquickhelper.filehelper import ( 

18 unzip_files, zip_files, ungzip_files, un7zip_files, unrar_files, 

19 untar_files 

20) 

21from pyquickhelper.helpgen import nb2html 

22from pyquickhelper.ipythonhelper import upgrade_notebook 

23from pymmails import EmailMessageRenderer, EmailMessage 

24from .repository_exception import RegexRepositoryException, TooManyProjectsException 

25from ..td_1a import edit_distance 

26from ..homeblog.python_exemple_py_to_html import py_to_html_file 

27 

28 

29class ProjectsRepository: 

30 """ 

31 Handle a repository of students projects. 

32 See example :ref:`sphx_glr_automation_fetch_student_projects_from_gmail.py`. 

33 """ 

34 

35 class MailNotFound(Exception): 

36 """ 

37 Raises an exception if mail not found. 

38 """ 

39 pass 

40 

41 _email_regex = re.compile("[*] *e?mails? *: *([^*+\\n]+)") 

42 _gitlab_regex = re.compile("[*] *gitlab *: *([^*+\\n]+[.]git)") 

43 _video_regex = re.compile("[*] *videos? *: *([^*\\n]+)") 

44 

45 def __init__(self, location, suivi="suivi.rst", fLOG=noLOG): 

46 """ 

47 Location of the repository. 

48 

49 @param location location of the repository 

50 @param suivi name of the file gathering information about each project 

51 """ 

52 self._location = location 

53 self._suivi = suivi 

54 self.fLOG = fLOG 

55 

56 @property 

57 def Location(self): 

58 """ 

59 @return location of the repository 

60 """ 

61 return self._location 

62 

63 @property 

64 def Groups(self): 

65 """ 

66 Returns all available groups in the repository. 

67 """ 

68 return [_ for _ in os.listdir(self._location) 

69 if os.path.isdir(os.path.join(self._location, _))] 

70 

71 def get_group_location(self, group): 

72 """ 

73 Returns the local folder associated to a group. 

74 

75 @param group group name 

76 @return local folder 

77 """ 

78 return os.path.join(self._location, group) 

79 

80 @staticmethod 

81 def get_regex(path, regex, suivi="suivi.rst", skip_if_empty=False): 

82 """ 

83 Retrieves data from file ``suivi.rst`` using a regular expression. 

84 

85 @param path sub folder to look into 

86 @param suivi name of the file ``suivi.rst`` 

87 @param skip_if_empty skip of no mail? 

88 @return list of mails 

89 """ 

90 if not os.path.exists(path): 

91 raise FileNotFoundError(path) # pragma: no cover 

92 filename = os.path.join(path, suivi) 

93 if not os.path.exists(filename): 

94 raise FileNotFoundError(filename) # pragma: no cover 

95 

96 try: 

97 with open(filename, "r", encoding="utf8") as f: 

98 content = f.read() 

99 except UnicodeDecodeError as e: 

100 raise ValueError( # pragma: no cover 

101 'unable to parse file:\n File "{0}", line 1'.format(filename)) from e 

102 

103 mails = regex.findall(content) 

104 if len(mails) == 0: 

105 if skip_if_empty: 

106 return [] 

107 raise RuntimeError( # pragma: no cover 

108 "Unable to find the regular expression '{0}' in '{1}'".format( 

109 regex.pattern, filename)) 

110 

111 allmails = [] 

112 for m in mails: 

113 allmails.extend(m.strip("\n\r\t ").split(";")) 

114 

115 return [_.strip() for _ in allmails for _ in allmails] 

116 

117 def get_emails(self, group, skip_if_empty=False): 

118 """ 

119 Retrieves student emails from file ``suivi.rst``. 

120 

121 @param group group 

122 @param skip_if_empty skip if no mail? 

123 @return list of mails 

124 """ 

125 path = os.path.join(self._location, group) 

126 allmails = ProjectsRepository.get_regex(path, 

127 ProjectsRepository._email_regex, self._suivi, 

128 skip_if_empty=skip_if_empty) 

129 for a in allmails: 

130 if "\n" in a: 

131 raise ValueError( # pragma: no cover 

132 "unable to interpret " + str([a]) + " from path " + path) 

133 ff = a.split("@") 

134 if len(ff) != 2: 

135 raise RegexRepositoryException( # pragma: no cover 

136 "unable to understand mail {0} in {1} (suivi={2} (mail separator is ;)".format( 

137 a, 

138 path, 

139 self._suivi)) 

140 return allmails 

141 

142 def get_videos(self, group): 

143 """ 

144 Retrieves student emails from file ``suivi.rst``. 

145 

146 @param group group 

147 @return list of videos 

148 """ 

149 return ProjectsRepository.get_regex(group, ProjectsRepository._video_regex, self._suivi) 

150 

151 def get_sections(self, group): 

152 """ 

153 Extracts sections from a filename used to follow a group of students. 

154 

155 @param group group 

156 @return dictionary { section : content } 

157 

158 Example of a file:: 

159 

160 rapport 

161 +++++++ 

162 

163 * bla 1 

164 

165 extrait 

166 +++++++ 

167 

168 :: 

169 

170 paragraphe 1 

171 

172 paragraphe 2 

173 

174 """ 

175 path = os.path.join(self._location, group) 

176 if not os.path.exists(path): 

177 raise FileNotFoundError(path) # pragma: no cover 

178 filename = os.path.join(path, self._suivi) 

179 if not os.path.exists(filename): 

180 raise FileNotFoundError(filename) # pragma: no cover 

181 

182 try: 

183 with open(filename, "r", encoding="utf8") as f: 

184 content = f.read() 

185 except UnicodeDecodeError as e: 

186 raise ValueError( # pragma: no cover 

187 'unable to parse file:\n File "{0}", line 1'.format(filename)) from e 

188 

189 lines = [_.strip("\r").rstrip() for _ in content.split("\n")] 

190 added_in = [] 

191 sections = {"": []} 

192 title = "" 

193 for i, line in enumerate(lines): 

194 if len(line) == 0: 

195 sections[title].append(line) 

196 added_in.append(title) 

197 else: 

198 f = line[0] 

199 if f == " ": 

200 if title is not None: 

201 sections[title].append(line) 

202 added_in.append(title) 

203 else: 

204 sections[""].append(line) 

205 added_in.append("") 

206 elif f in "=+-": 

207 if line == f * len(line): 

208 title = lines[i - 1] 

209 if len(added_in) > 0: 

210 t = added_in[-1] 

211 sections[t] = sections[t][:-1] 

212 added_in[-1] = title 

213 if f == "=": 

214 sections["title"] = [title] 

215 added_in.append("title") 

216 title = "title" 

217 else: 

218 sections[title] = [] 

219 added_in.append(title) 

220 else: 

221 sections[title].append(line) 

222 added_in.append(title) 

223 else: 

224 sections[title].append(line) 

225 added_in.append(title) 

226 

227 return sections 

228 

229 _regex_split = re.compile("[-;,. @]") 

230 

231 @staticmethod 

232 def match_mail(name, emails, threshold=3, exc=True): 

233 """ 

234 Tries to match a name among a list of mails. 

235 

236 @param name a name (first name last name separated by a space) 

237 @param emails list of emails 

238 @param threshold above this threshold, mails and names don't match 

239 @param exc raise an Exception if not found 

240 @return list of available mails, boolean 

241 

242 The second results is True if no email were found in the list. 

243 """ 

244 # we check the easy case 

245 if isinstance(name, float): 

246 name = str(name) if not numpy.isnan(name) else "" 

247 if name in emails: 

248 return [(0, name)] 

249 

250 pieces = [_.strip() for _ in ProjectsRepository._regex_split.split( 

251 remove_diacritics(name.lower()))] 

252 pieces.sort() 

253 pieces = " ".join(pieces) 

254 res = [] 

255 for email in emails: 

256 spl = [_.strip() for _ in ProjectsRepository._regex_split.split( 

257 remove_diacritics(email.split("@")[0].lower()))] 

258 spl.sort() 

259 mail = " ".join(spl) 

260 d = edit_distance(mail, pieces)[0] 

261 res.append((d, email)) 

262 res = [_ for _ in res if _[0] <= threshold] 

263 res.sort() 

264 if exc and len(res) == 0: 

265 raise ProjectsRepository.MailNotFound( # pragma: no cover 

266 "unable to find a mail for {0} among\n{1}".format(name, "\n".join(emails))) 

267 return res 

268 

269 @staticmethod 

270 def match_mails(names, emails, threshold=3, exc=True, skip_names=None): 

271 """ 

272 Tries to match a series of names among a list of mails. 

273 

274 @param names list of names (first name last name separated by a space) 

275 @param emails list of emails 

276 @param threshold above this threshold, mails and names don't match 

277 @param exc raise an Exception if not found 

278 @param skip_names the second boolean is True is one of the name 

279 belongs to this list 

280 @return list of available mails, boolean 

281 

282 The second results is True if no email were found in the list. 

283 """ 

284 res = [] 

285 skip = False 

286 for name in names: 

287 if skip_names is not None and name in skip_names: 

288 skip = True 

289 r = ProjectsRepository.match_mail(name, emails, threshold, exc) 

290 res.extend([_[1] for _ in r]) 

291 return res, skip 

292 

293 @staticmethod 

294 def create_folders_from_dataframe(df, root, report="suivi.rst", col_student=None, col_group="Groupe", 

295 col_subject="Sujet", col_mail="mail", overwrite=False, email_function=None, 

296 must_have_email=True, skip_if_nomail=False, skip_names=None, 

297 fLOG=noLOG): 

298 """ 

299 Creates a series of folders for groups of students. 

300 

301 @param root where to create the folders 

302 @param col_student column which contains the student name (firt name + last name), 

303 equal to *col_mail* if *None* 

304 @param col_group index of the group (it can be *None* if each student is a group) 

305 @param col_subject column which contains the subject 

306 @param col_mail if there is a column which contains the mail in the input dataframe 

307 @param df DataFrame 

308 @param email_function function which infers email from first and last names, see below 

309 @param report report file 

310 @param overwrite if False, skip if the report already exists 

311 @param must_have_email if True, raises an exception if no mail is found 

312 @param skip_if_nomail skip a name if no mail is found 

313 @param skip_names less checking for a given set of names 

314 @param fLOG logging function 

315 @return list of creates folders 

316 

317 The function *email_function* has the following signature:: 

318 

319 def email_function(names): 

320 # part of a names is a list of tokens 

321 # ... 

322 return list of mails, skip=boolean 

323 

324 The boolean tells the function to skip this group. 

325 *email_function* can be a list of mails. In that case, 

326 this function is replaced by @see me match_mails. 

327 """ 

328 if col_mail is None and email_function is None: 

329 raise ValueError( # pragma: no cover 

330 "col_mail cannot be None if email_function is None") 

331 if col_student is None: 

332 col_student = col_mail 

333 

334 def local_email_function(names, skip_names): 

335 return ProjectsRepository.match_mails(names, email_function, 

336 exc=False, skip_names=skip_names) 

337 

338 def local_email_function_column(names, skip_names, mapping): 

339 res = [] 

340 skip = False 

341 for name in names: 

342 if skip_names is not None and name in skip_names: 

343 skip = True 

344 r = mapping.get(name, None) 

345 if r: 

346 res.append(r) 

347 return res, skip 

348 

349 if isinstance(email_function, (list, set)): 

350 if col_mail is None: 

351 local_function = local_email_function 

352 else: 

353 try: 

354 ind_student = list(df.columns).index(col_student) + 1 

355 ind_mail = list(df.columns).index(col_mail) + 1 

356 except ValueError as e: 

357 raise ValueError( # pragma: no cover 

358 "Unable to find '{0}' or '{1}' in {2}".format( 

359 col_student, col_mail, df.columns)) from e 

360 mapping = {} 

361 for row in df.itertuples(): 

362 mapping[row[ind_student]] = row[ind_mail] 

363 local_function = \ 

364 lambda names, skip, mp=mapping: \ 

365 local_email_function_column(names, skip_names, mp) 

366 else: 

367 local_function = email_function 

368 

369 def ul(last): 

370 res = "" 

371 for i, c in enumerate(last): 

372 if c == " ": 

373 res += "." 

374 elif c == "-": 

375 res += "." 

376 elif c == '@': 

377 break 

378 else: 

379 res += c 

380 return res 

381 

382 folds = [] 

383 

384 if df.shape[1] == 0: 

385 raise Exception("No column in the dataframe.") # pragma: no cover 

386 

387 if col_group: 

388 gr = df.groupby(col_group) 

389 else: 

390 df2 = df.copy() 

391 df2["gid"] = df.index 

392 df2["gid2"] = df2.gid.apply(lambda x: "G%d" % x) 

393 gr = df2.groupby("gid2") 

394 

395 fLOG("[ProjectsRepository.create_folders_from_dataframe] number of groups {0}".format( 

396 len(gr))) 

397 

398 for name, group in gr: 

399 if col_subject: 

400 s = list(set(group[col_subject].copy())) 

401 s = [_ for _ in s if not isinstance( 

402 _, float) or ~numpy.isnan(_)] 

403 if len(s) > 1: 

404 raise TooManyProjectsException( # pragma: no cover 

405 "more than one subject for group: " + str(name) + "\n" + str(s)) 

406 elif len(s) == 0: 

407 s = ["unknown"] 

408 subject = s[0] 

409 else: 

410 subject = None 

411 

412 eleves = list(group[col_student]) 

413 eleves.sort() 

414 

415 if email_function is not None: 

416 mails, skip = local_function(eleves, skip_names) 

417 if must_have_email and (not skip and len(mails) == 0): 

418 # we skip only if a group has no mails at all 

419 if isinstance(email_function, (list, set)): 

420 mes = "unable to find a mail for\n{0}\nname={1}\nskip:{4}\n{5}\namong\n{3}\nGROUP\n{2}\nlocal_function: {6}" 

421 raise ProjectsRepository.MailNotFound( # pragma: no cover 

422 mes.format("; ".join("'%s'" % _ for _ in eleves), 

423 name, group, "\n".join(email_function), 

424 skip, skip_names, local_function)) 

425 raise ProjectsRepository.MailNotFound( # pragma: no cover 

426 "unable to find a mail for {0}\nname={1}\n with function\n{3}\nGROUP\n{2}\nTYPE:\n{4}".format( 

427 " ;".join(eleves), name, group, email_function, type(email_function))) 

428 if skip_if_nomail and (not skip and len(mails) == 0): 

429 fLOG("[ProjectsRepository.create_folders_from_dataframe] skipping {0}".format( 

430 "; ".join(eleves))) 

431 continue 

432 if mails: 

433 for m in mails: 

434 if "@" not in m: 

435 raise ValueError( # pragma: no cover 

436 "mails contains a mail with no @: {0}".format(m)) 

437 if "<" in m or ">" in m: 

438 raise ValueError( # pragma: no cover 

439 "one mail contains weird characters: {0}".format(m)) 

440 jmail = "; ".join(mails) 

441 else: 

442 jmail = None 

443 else: 

444 jmail = None 

445 

446 if jmail is not None: 

447 if "@" not in jmail: 

448 raise ValueError( # pragma: no cover 

449 "jmail does not contain any @: {0}".format(jmail)) 

450 

451 members = ", ".join(map(str, eleves)) 

452 content = [members] 

453 content.append("=" * len(members)) 

454 content.append("") 

455 

456 content.append("* members: {0}".format(members)) 

457 if subject: 

458 content.append("* subject: {0}".format(subject)) 

459 content.append("* G: {0}".format(name)) 

460 

461 if jmail: 

462 content.append("* mails: " + jmail) 

463 

464 content.append("") 

465 content.append("") 

466 

467 last = "-".join(ul(a) for a in sorted(map(str, eleves))) 

468 

469 folder = os.path.join(root, last) 

470 filename = os.path.join(folder, report) 

471 

472 if not os.path.exists(folder): 

473 if '@' in folder: 

474 raise ValueError( # pragma: no cover 

475 "Folder '{0}' must not contain '@'.".format(folder)) 

476 os.mkdir(folder) 

477 

478 if overwrite or not os.path.exists(filename): 

479 with open(filename, "w", encoding="utf8") as f: 

480 f.write("\n".join(content)) 

481 

482 folds.append(folder) 

483 

484 proj = ProjectsRepository(root, suivi=report, fLOG=fLOG) 

485 

486 if must_have_email: 

487 for gr in proj.Groups: 

488 mails = proj.get_emails(gr) 

489 if len(mails) == 0: 

490 raise ValueError( # pragma: no cover 

491 "No mail for group '{0}'.".format(gr)) 

492 return proj 

493 

494 def enumerate_group_mails(self, group, mailbox, subfolder, date=None, 

495 skip_function=None, max_dest=5): 

496 """ 

497 Enumerates all mails sent by or sent to a given group. 

498 

499 @param group group (if None, goes through all mails) 

500 @param mailbox mailbox (see `pymmails <http://www.xavierdupre.fr/app/pymmails/helpsphinx/>`_) 

501 @param subfolder which subfolder of the mailbox to look into 

502 @param date date 

503 @param skip_function if not None, use this function on the header/body to avoid loading the entire message (and skip it) 

504 @param max_dest maximum number of receivers 

505 @return iterator on mails 

506 """ 

507 if group is None: 

508 for group_ in self.Groups: 

509 self.fLOG( 

510 "[ProjectsRepository.enumerate_group_mails] group='{0}'".format(group_)) 

511 iter = self.enumerate_group_mails(group_, mailbox, subfolder=subfolder, 

512 date=date, skip_function=skip_function, max_dest=max_dest) 

513 for mail in iter: 

514 yield mail 

515 else: 

516 mails = self.get_emails(group) 

517 self.fLOG("[ProjectsRepository.enumerate_group_mails] mails='{0}' folder='{1}' date={2}".format( 

518 str(mails), subfolder, date)) 

519 iter = mailbox.enumerate_search_person( 

520 person=mails, 

521 folder=subfolder, 

522 skip_function=skip_function, 

523 date=date, 

524 max_dest=5) 

525 for mail in iter: 

526 yield mail 

527 

528 def dump_group_mails(self, renderer, group, mailbox, subfolder, date=None, 

529 skip_function=None, max_dest=5, filename="index_mails.html", 

530 overwrite=False, skip_if_empty=False, convert_files=False): 

531 """ 

532 Enumerates all mails sent by or sent to a given group. 

533 

534 @param renderer instance of class `EmailMessageListRenderer 

535 <http://www.xavierdupre.fr/app/pymmails/helpsphinx/pymmails/render/ 

536 email_message_list_renderer.html>`_ 

537 @param group group 

538 @param mailbox mailbox (see `pymmails <http://www.xavierdupre.fr/app/pymmails/helpsphinx/>`_) 

539 @param subfolder which subfolder of the mailbox to look into 

540 @param date date 

541 @param skip_function if not None, use this function on the header/body to avoid loading 

542 the entire message (and skip it) 

543 @param max_dest maximum number of receivers 

544 @param filename filename which gathers a link to every mail 

545 @param overwrite overwrite 

546 @param skip_if_empty skip if no mail? 

547 @param convert_files unzip and convert 

548 @return list of files (see `EmailMessageListRenderer.write 

549 <http://www.xavierdupre.fr/app/pymmails/helpsphinx/pymmails/render/ 

550 email_message_list_renderer.html>`_) 

551 

552 zip, gz, rar, 7z can be uncompressed. 

553 It then convert *.py* and *.ipynb* into html. 

554 """ 

555 if group is None: 

556 res = [] 

557 for group_ in self.Groups: 

558 r = self.dump_group_mails(renderer, group_, mailbox, subfolder=subfolder, 

559 date=date, skip_function=skip_function, max_dest=max_dest, 

560 overwrite=overwrite, skip_if_empty=skip_if_empty, 

561 convert_files=convert_files) 

562 res.extend(r) 

563 return res 

564 else: 

565 mails = self.get_emails(group, skip_if_empty=skip_if_empty) 

566 if skip_if_empty and len(mails) == 0: 

567 self.fLOG("[ProjectsRepository.dump_group_mails] SKIP group='{0}' folder='{1}' date={2} mails={3}".format( 

568 group, subfolder, date, str(mails))) 

569 return [] 

570 else: 

571 self.fLOG("[ProjectsRepository.dump_group_mails] group='{0}' folder='{1}' date={2} mails={3}".format( 

572 group, subfolder, date, str(mails))) 

573 

574 def iter_mail(body=True): 

575 return mailbox.enumerate_search_person(person=mails, folder=subfolder, 

576 skip_function=skip_function, date=date, 

577 max_dest=max_dest, body=body) 

578 nbmails = len(self.list_mails(group)) 

579 nbcur = len(list(iter_mail(body=False))) 

580 if nbmails != nbcur: 

581 overwrite = True 

582 self.fLOG("[dump_group_mails] group='{0}' - new mails".format( 

583 group), nbcur, "<", "nbmails") 

584 

585 iter = iter_mail(body=True) 

586 location = self.get_group_location(group) 

587 

588 r = renderer.write(iter=iter, location=location, 

589 filename=filename, overwrite=overwrite, 

590 file_jsatt="_summaryattachements_raw.json", 

591 attach_folder="attachments") 

592 renderer.flush() 

593 

594 # attachments in JSON format 

595 json_att = [] 

596 metadata = {} 

597 

598 for name in self.enumerate_group_files(group): 

599 if "attachments" not in name or not name.endswith('.metadata'): 

600 continue 

601 sname = os.path.relpath(name, location).replace("\\", "/") 

602 metadata[sname[:-9]] = sname 

603 

604 for name in self.enumerate_group_files(group): 

605 if "attachments" not in name or name.endswith('.metadata'): 

606 continue 

607 sname = os.path.relpath(name, location).replace("\\", "/") 

608 info = dict(a=sname, name=sname) 

609 if sname in metadata: 

610 info['info'] = '<a href="{0}">metadata</a>'.format( 

611 metadata[sname]) 

612 json_att.append(info) 

613 

614 if convert_files: 

615 converted = self.unzip_convert(group) 

616 for conv in converted: 

617 sconv = os.path.relpath(conv, location).replace("\\", "/") 

618 json_att.append( 

619 dict(a=sconv, name=sconv, unzip_convert='Yes')) 

620 

621 file_jsatt = os.path.join(location, "_summaryattachements.json") 

622 if json_att and not renderer.BufferWrite.exists(file_jsatt, local=not overwrite): 

623 f = renderer.BufferWrite.open( 

624 file_jsatt, text=True, encoding='utf-8') 

625 js = json.dumps(json_att) 

626 f.write(js) 

627 

628 return r 

629 

630 def remove_group(self, group): 

631 """ 

632 Removes a group. 

633 

634 @param group group 

635 @return list of removed files 

636 

637 See `remove_folder <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/ 

638 pyquickhelper/filehelper/synchelper.html#module-pyquickhelper.filehelper.synchelper>`_. 

639 """ 

640 loc = self.get_group_location(group) 

641 return remove_folder(loc) 

642 

643 def enumerate_group_files(self, group): 

644 """ 

645 Enumerates all files in a group. 

646 

647 @param group group 

648 @return iterator on files 

649 """ 

650 if group is None: 

651 for g in self.Groups: 

652 for _ in self.enumerate_group_files(g): 

653 yield _ 

654 else: 

655 loc = self.get_group_location(group) 

656 for _ in explore_folder_iterfile(loc): 

657 yield _ 

658 

659 def list_mails(self, group): 

660 """ 

661 Returns the number of mails of a group. 

662 

663 @param group group name 

664 @return list of mails 

665 """ 

666 names = list(self.enumerate_group_files(group)) 

667 mails = [] 

668 for name in names: 

669 if "attachments" in name: 

670 continue 

671 name_d = os.path.split(name)[-1] 

672 if name_d.startswith("d_") and name_d.endswith(".html"): 

673 mails.append(name) 

674 return mails 

675 

676 def zip_group(self, group, outfile, addition=None): 

677 """ 

678 Zips a group. 

679 

680 @param group group 

681 @param outfile output file 

682 @param addition additional files (sequence) 

683 @return list of zipped files 

684 """ 

685 def iter_files(): 

686 for _ in self.enumerate_group_files(group): 

687 yield _ 

688 if addition: 

689 for _ in addition: 

690 yield _ 

691 return zip_files(outfile, iter_files(), root=self._location) 

692 

693 _link_regex = re.compile("(https?[:][^ \\\"<>)(]+)") 

694 

695 _known_strings = ["xavierdupre.fr", "doodle", "ensaenotebook", "teralab", 

696 "outlook.com", "gohlke", "support.google", "help.github", 

697 "api.jcdecaux"] 

698 

699 _default_template_summary = """<?xml version="1.0" encoding="utf-8"?> 

700 <head> 

701 <meta http-equiv="content-type" content="text/html; charset=utf-8" /> 

702 </head> 

703 <body> 

704 <html> 

705 <head> 

706 <title>{{ title }}</title> 

707 <link rel="stylesheet" type="text/css" href="{{ css }}"> 

708 </head> 

709 <body> 

710 <h1>{{ title }}</h1> 

711 <ol type="1"> 

712 {% for ps in groups %} 

713 <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i> 

714 {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} - 

715 {% if len(ps["emails"]) > 0 %} 

716 last mail {{ ps["emails"][-1]["date"] }} ---{% else %} 

717 No mail found. {% endif %} 

718 {{ len(ps["attachments"]) }} attachments</i></small> 

719 {% if len(ps["attachments"]) + len(ps["links"]) > 0 %} 

720 <ul> 

721 {% for day, att, data in ps["attachments"] %} 

722 <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li> 

723 {% endfor %} 

724 {% for date, from_, url, domain, last in ps["links"] %} 

725 <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li> 

726 {% endfor %} 

727 </ul> 

728 {% endif %} 

729 {% if len(ps["created_files"]) > 0 %} 

730 <ul> 

731 {% for name, relpath, size in ps["created_files"] %} 

732 <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li> 

733 {% endfor %} 

734 </ul> 

735 {% endif %} 

736 </li> 

737 {% endfor %} 

738 </ol> 

739 </body> 

740 </html> 

741 """.replace(" ", "") 

742 

743 def write_run_command(self, filename=None, renderer=None): 

744 """ 

745 Writes a command script to run a server for this local content. 

746 The server runs the javascripts fetching for local files. 

747 The content is available at ``http://localhost:9000/``. 

748 """ 

749 if filename is None: 

750 if sys.platform.startswith('win'): 

751 filename = "run_server.bat" 

752 else: 

753 filename = "run_server.sh" 

754 

755 url = "http://localhost:9000/" 

756 content = textwrap.dedent(""" 

757 echo Open a browser with url '{}' 

758 python3 -m http.server 9000 

759 """).format(url) 

760 dest = os.path.join(self.Location, filename) 

761 self.fLOG("[write_run_command] write '{}'.".format(dest)) 

762 with open(dest, 'w') as f: 

763 f.write(content) 

764 

765 def write_summary(self, renderer=None, link="index_mails.html", 

766 outfile="index.html", title="summary", 

767 nolink_if=None): 

768 """ 

769 Produces a summary and uses a :epkg:`Jinja2` template. 

770 

771 @param renderer instance of `EmailMessageRenderer 

772 <http://www.xavierdupre.fr/app/pymmails/ 

773 helpsphinx//pymmails/render/email_message_renderer.html>`_), 

774 can be None 

775 @param link look for this file in each folder 

776 @param outfile output file 

777 @param nolink_if link containing those strings will be removed (if None, a default set will be assigned) 

778 @param title title 

779 @return summary 

780 

781 The current default template is:: 

782 

783 .. runpython:: 

784 

785 from ensae_teaching_cs.automation_students.projects_repository import _default_template_summary_template 

786 print(_default_template_summary) 

787 """ 

788 if nolink_if is None: 

789 nolink_if = ProjectsRepository._known_strings 

790 

791 def filter_in(url): 

792 if "\n" in url or "\r" in url or "\t" in url: 

793 return False 

794 if url.endswith("&quot;"): 

795 return False 

796 for _ in nolink_if: 

797 if _ in url: 

798 return False 

799 if ".ipynb_checkpoints" in url: 

800 return False 

801 return True 

802 

803 def clean_url(u): 

804 u = u.replace("&#43;", "+").strip(".#'/ \r\n\t ") 

805 if u.endswith("&nbsp;"): 

806 u = u[:-6] 

807 return u 

808 

809 def url_domain_name(url): 

810 r = urlparse(url) 

811 domain = r.netloc 

812 name = [_ for _ in url.split("/") if _] 

813 last = name[-1] if len(name) > 0 else domain 

814 if len(last) > 30: 

815 last = last[-30:] 

816 return domain, clean_url(last) 

817 

818 def format_size(s): 

819 if s <= 2 ** 11: 

820 return "{0} bytes".format(s) 

821 elif s <= 2 ** 21: 

822 return "{0} Kb".format(s // (2 ** 10)) 

823 elif s <= 2 ** 31: 

824 return "{0} Mb".format(s // (2 ** 20)) 

825 else: 

826 return "{0} Gb".format(s // (2 ** 30)) 

827 

828 groups = [] 

829 for group in self.Groups: 

830 lp = os.path.join(self.get_group_location(group), link) 

831 if os.path.exists(lp): 

832 c = os.path.relpath(lp, self._location), group 

833 else: 

834 c = "file:///{0}".format(group), group 

835 nb_files = 0 

836 size = 0 

837 atts = [] 

838 emails = [] 

839 links = [] 

840 created_files = [] 

841 for name in self.enumerate_group_files(group): 

842 if name.endswith(".metadata"): 

843 continue 

844 loc = self.get_group_location(group) 

845 nb_files += 1 

846 tn = name 

847 size += os.stat(tn).st_size 

848 folder = os.path.split(name)[0] 

849 splf = folder.replace("\\", "/").split("/") 

850 if folder.endswith("attachments"): 

851 meta = name + ".metadata" 

852 if os.path.exists(meta): 

853 data = EmailMessage.read_metadata(meta) 

854 day = data["date"].strftime("%Y-%m-%d") 

855 else: 

856 data = None 

857 day = "" 

858 atts.append((day, os.path.relpath( 

859 name, self._location), data)) 

860 elif "attachments" in splf: 

861 rel = os.path.relpath(name, loc) 

862 dest = os.path.relpath(name, self._location) 

863 if rel == dest: 

864 raise Exception( # pragma: no cover 

865 "weird\n{0}\n{1}".format(rel, dest)) 

866 ssize = format_size(os.stat(name).st_size) 

867 if "__MACOSX" not in rel and "__MACOSX" not in dest and \ 

868 ".ipynb_checkpoints" not in dest and ".ipynb_checkpoints" not in rel: 

869 created_files.append((rel, dest, ssize)) 

870 else: 

871 mail = os.path.split(name)[-1] 

872 res = EmailMessage.interpret_default_filename(mail) 

873 if "date" in res and "uid" in res and "from" in res: 

874 emails.append( 

875 (res["date"], res["from"], res["uid"], res)) 

876 with open(os.path.join(loc, mail), "r", encoding="utf8") as f: 

877 content = f.read() 

878 urls = ProjectsRepository._link_regex.findall(content) 

879 if urls: 

880 for u in set(urls): 

881 u = clean_url(u) 

882 if not filter_in(u): 

883 continue 

884 domain, last = url_domain_name(u) 

885 links.append( 

886 (res["date"], res["from"], clean_url(u), domain, last)) 

887 

888 # we sort 

889 atts.sort() 

890 links.sort() 

891 

892 # we clean duplicated links 

893 mlinks = links 

894 links = [] 

895 done = {} 

896 for date, from_, url, domain, last in mlinks: 

897 if url in done: 

898 continue 

899 if "__MACOSX" in url or "__MACOSX" in last or \ 

900 ".ipynb_checkpoints" in last or ".ipynb_checkpoints" in url: 

901 continue 

902 links.append((date, from_, url, domain, last)) 

903 done[url] = True 

904 

905 # we create the variable for the template 

906 emails = [_[-1] for _ in sorted(emails)] 

907 c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files, 

908 size=size, attachments=atts, emails=emails, links=links, 

909 created_files=created_files) 

910 

911 groups.append(c) 

912 

913 # final summary 

914 if renderer is None: 

915 tmpl = ProjectsRepository._default_template_summary 

916 renderer = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG) 

917 dof = True 

918 else: 

919 dof = False 

920 res = renderer.write(filename=outfile, location=self.Location, 

921 mail=None, attachments=None, groups=groups, 

922 title=title, len=len, os=os, 

923 format_size=format_size) 

924 if dof: 

925 renderer.flush() 

926 return res 

927 

928 def unzip_convert(self, group): 

929 """ 

930 Unzips files and convert notebooks into :epkg:`HTML`. 

931 

932 @param group group name 

933 @return list of new files 

934 """ 

935 self.unzip_files(group) 

936 return self.convert_files(group) 

937 

938 def unzip_files(self, group): 

939 """ 

940 Unzips files and convert notebooks into :epkg:`HTML`. 

941 

942 @param group group name 

943 @return list of new filess 

944 """ 

945 def fvalid(zip_name, local_name): 

946 if "__pycache__" in zip_name: 

947 return False 

948 if zip_name.endswith(".pyc"): 

949 return False 

950 return True 

951 

952 def clean_f(folder): 

953 folder = folder.replace(" ", "_").replace( 

954 ",", "_").replace("&", "_").replace("\r", "_") 

955 folder = folder.replace("\n", "_").replace("\t", "_") 

956 return folder 

957 

958 names = list(self.enumerate_group_files(group)) 

959 files = [] 

960 for name in names: 

961 if "attachments" not in name: 

962 continue 

963 ext = os.path.splitext(name)[-1] 

964 if ext == ".zip": 

965 folder = os.path.splitext(name)[0] + "_zip" 

966 folder = clean_f(folder) 

967 if not os.path.exists(folder): 

968 self.fLOG( 

969 "[ProjectsRepository.unzip_files] unzip '{0}'".format(name)) 

970 self.fLOG( 

971 "[ProjectsRepository.unzip_files] creating '{0}'".format(folder)) 

972 os.makedirs(folder) 

973 try: 

974 lf = unzip_files( 

975 name, folder, fLOG=self.fLOG, fvalid=fvalid, fail_if_error=False) 

976 except (zipfile.BadZipFile, NotImplementedError, OSError) as e: 

977 self.fLOG( 

978 "[ProjectsRepository.unzip_files] ERROR: unable to unzip '{0}' because of '{1}']".format(name, e)) 

979 lf = [] 

980 files.extend(lf) 

981 else: 

982 # already done, we do not do it again 

983 pass 

984 elif ext == ".7z": 

985 folder = os.path.splitext(name)[0] + "_7z" 

986 folder = clean_f(folder) 

987 if not os.path.exists(folder): 

988 self.fLOG( 

989 "[ProjectsRepository.un7zip_files] un7zip '{0}'".format(name)) 

990 self.fLOG( 

991 "[ProjectsRepository.un7zip_files] creating '{0}'".format(folder)) 

992 os.makedirs(folder) 

993 lf = un7zip_files( 

994 name, folder, fLOG=self.fLOG, fvalid=fvalid) 

995 files.extend(lf) 

996 else: 

997 # already done, we do not do it again 

998 pass 

999 elif ext == ".rar": 

1000 folder = os.path.splitext(name)[0] + "_rar" 

1001 folder = clean_f(folder) 

1002 if not os.path.exists(folder): 

1003 self.fLOG( 

1004 "[ProjectsRepository.unrar_files] unrar '{0}'".format(name)) 

1005 self.fLOG( 

1006 "[ProjectsRepository.unrar_files] creating '{0}'".format(folder)) 

1007 os.makedirs(folder) 

1008 lf = unrar_files( 

1009 name, folder, fLOG=self.fLOG, fvalid=fvalid) 

1010 files.extend(lf) 

1011 else: 

1012 # already done, we do not do it again 

1013 pass 

1014 elif name.endswith(".tar.gz"): 

1015 folder = os.path.splitext(name)[0] + "_targz" 

1016 folder = clean_f(folder) 

1017 if not os.path.exists(folder): 

1018 self.fLOG( 

1019 "[ProjectsRepository.untar_files] ungzip '{0}'".format(name)) 

1020 self.fLOG( 

1021 "[ProjectsRepository.untar_files] creating '{0}'".format(folder)) 

1022 os.makedirs(folder) 

1023 unzip = "pkl.gz" not in name 

1024 lf = untar_files(name, folder, fLOG=self.fLOG) 

1025 files.extend(lf) 

1026 else: 

1027 # already done, we do not do it again 

1028 pass 

1029 elif ext == ".gz": 

1030 folder = os.path.splitext(name)[0] + "_gz" 

1031 folder = clean_f(folder) 

1032 if not os.path.exists(folder): 

1033 self.fLOG( 

1034 "[ProjectsRepository.ungzip_files] ungzip '{0}'".format(name)) 

1035 self.fLOG( 

1036 "[ProjectsRepository.ungzip_files] creating '{0}'".format(folder)) 

1037 os.makedirs(folder) 

1038 unzip = "pkl.gz" not in name 

1039 lf = ungzip_files( 

1040 name, folder, fLOG=self.fLOG, fvalid=fvalid, unzip=unzip) 

1041 files.extend(lf) 

1042 else: 

1043 # already done, we do not do it again 

1044 pass 

1045 return files 

1046 

1047 def convert_files(self, group): 

1048 """ 

1049 Converts all notebooks and python scripts into :epkg:`HTML` for a group. 

1050 

1051 @param group group name 

1052 @return list of new files 

1053 """ 

1054 names = list(self.enumerate_group_files(group)) 

1055 files = [] 

1056 for name in names: 

1057 if "attachments" not in name: 

1058 continue 

1059 ext = os.path.splitext(name)[-1] 

1060 if ext == ".ipynb": 

1061 self.fLOG( 

1062 "[ProjectsRepository.convert_files] convert '{0}'".format(name)) 

1063 out = name + ".html" 

1064 if os.path.exists(out): 

1065 warnings.warn( 

1066 "[convert_files] overwriting '{0}'".format(out)) 

1067 try: 

1068 upgrade_notebook(name) 

1069 nb2html(name, out, exc=False) 

1070 files.append(out) 

1071 except Exception as e: 

1072 warnings.warn( 

1073 "unable to convert a notebook '{0}' because of {1}".format(name, e)) 

1074 elif ext == ".py": 

1075 self.fLOG( 

1076 "[ProjectsRepository.convert_files] convert '{0}'".format(name)) 

1077 out = name + ".html" 

1078 if os.path.exists(out): 

1079 warnings.warn( 

1080 "[convert_files] overwriting '{0}'".format(out)) 

1081 try: 

1082 py_to_html_file(name, out, False, title=os.path.relpath( 

1083 name, self.get_group_location(group))) 

1084 files.append(out) 

1085 except Exception: 

1086 # the syntax of the python file might be wrong 

1087 warnings.warn( 

1088 "unable to convert File \"{0}\"".format(name)) 

1089 return files