Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Keeps an encrypted of personal data
4"""
5import re
6import os
7import datetime
8import zlib
9from io import BytesIO as StreamIO
10from .files_status import FilesStatus
11from ..loghelper.flog import noLOG
12from .transfer_api import TransferAPI_FileInfo
13from .encryption import encrypt_stream, decrypt_stream
16class EncryptedBackupError(Exception):
17 """
18 raised by @see cl EncryptedBackup
19 """
20 pass
23class EncryptedBackup:
25 """
26 This class aims at keeping an encrypted and compressed backup of files.
27 Every file is compressed and then encrypted before being uploaded to the
28 remote location. Its name still contains the container but the
29 file name is a hash. A
31 .. exref::
32 :title: Encrypted and compressed backup
34 Here is an example which stores everything on hard drive.
35 A second run only modifies files updated between the two processes.
36 A modified file does not remove the previous version,
37 it creates a new file.
38 Example::
40 from pyquickhelper.loghelper import fLOG
41 from pyquickhelper.filehelper import FileTreeNode, EncryptedBackup
42 from pyensae.remote import TransferAPIFile
44 key_crypt = "crypt"
46 local = os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
47 this = os.path.normpath(os.path.dirname(__file__))
48 file_status=os.path.join(this, "backup_status.txt")
49 file_map=os.path.join(this, "backup_mapping.txt")
51 backup = True
52 if backup:
53 # code to backup
54 root = os.path.normpath(os.path.join(os.path.dirname(__file__)))
55 api = TransferAPIFile("f:\\\\mycryptedbackup")
56 ft = FileTreeNode(root, repository=True)
57 enc = EncryptedBackup(
58 key=key_crypt,
59 file_tree_node=ft,
60 transfer_api=api,
61 root_local=local,
62 file_status=file_status,
63 file_map=file_map,
64 fLOG=print)
66 enc.start_transfering()
68 restore = not backup
69 if restore:
70 # code to restore
71 root = os.path.normpath(os.path.join(os.path.dirname(__file__)))
72 api = TransferAPIFile("f:\\\\mycryptedbackup")
73 enc = EncryptedBackup(
74 key=key_crypt,
75 file_tree_node=None,
76 transfer_api=api,
77 root_local=local,
78 file_status=file_status,
79 file_map=file_map,
80 fLOG=print)
82 dest=os.path.join(this, "_temp")
83 enc.retrieve_all(dest)
84 """
86 def __init__(self, key, file_tree_node, transfer_api,
87 file_status, file_map, root_local=None,
88 root_remote=None, filter_out=None,
89 threshold_size=2 ** 24, algo="AES",
90 compression="lzma", fLOG=noLOG):
91 """
92 constructor
94 @param key key for encryption
95 @param file_tree_node @see cl FileTreeNode
96 @param transfer_api @see cl TransferFTP
97 @param file_status file keeping the status for each file (date, hash of the content for the last upload)
98 @param file_map keep track of local filename and remote location
99 @param root_local local root
100 @param root_remote remote root
101 @param filter_out regular expression to exclude some files, it can also be a function.
102 @param threshold_size above that size, big files are split
103 @param algo encrypting algorithm
104 @param compression kind of compression ``'lzma'`` or ``'zip'``
105 @param fLOG logging function
106 """
107 self._key = key
108 self.fLOG = fLOG
109 self._ftn = file_tree_node
110 self._api = transfer_api
111 self._map = file_map
112 self._algo = algo
113 self._mapping = None
114 self._compress = compression
115 self._threshold_size = threshold_size
116 self._root_local = root_local if root_local is not None else (
117 file_tree_node.root if file_tree_node else None)
118 self._root_remote = root_remote if root_remote is not None else ""
119 if filter_out is not None and not isinstance(filter_out, str):
120 self._filter_out = filter_out
121 else:
122 self._filter_out_reg = None if filter_out is None else re.compile(
123 filter_out)
124 self._filter_out = (lambda f: False) if filter_out is None else (
125 lambda f: self._filter_out_reg.search(f) is not None)
127 self._ft = FilesStatus(file_status) if file_status else None
129 def iter_eligible_files(self):
130 """
131 iterates on eligible file for transfering (if they have been modified)
133 @return iterator on file name
134 """
135 for f in self._ftn:
136 if f.isfile():
137 if self._filter_out(f.fullname):
138 continue
139 n = self._ft.has_been_modified_and_reason(f.fullname)[0]
140 if n:
141 yield f
143 def update_status(self, file):
144 """
145 update the status of a file
147 @param file filename
148 @return @see cl FileInfo
149 """
150 r = self._ft.update_copied_file(file)
151 self._ft.save_dates()
152 return r
154 def update_mapping(self, key, maps):
155 """
156 update the status of a file
158 @param key key
159 @param maps update the mapping
160 """
161 self.Mapping[key] = maps
162 self.transfer_mapping()
164 def load_mapping(self):
165 """
166 retrieves existing mapping
168 @return dictionary
169 """
170 self._mapping = self._api.retrieve_mapping(lambda data: decrypt_stream(
171 self._key, data, chunksize=None, algo=self._algo))
172 return self._mapping
174 def transfer_mapping(self):
175 """
176 transfer the mapping
177 """
178 self._api.transfer_mapping(self.Mapping,
179 lambda data: encrypt_stream(
180 self._key, data, chunksize=None, algo=self._algo),
181 self._map)
183 @property
184 def Mapping(self):
185 """
186 returns the mapping
187 """
188 return self._mapping
190 def enumerate_read_encrypt(self, fullname):
191 """
192 enumerate pieces of files as bytes
194 @param fullname fullname
195 @return iterator on chunk of data
196 """
197 with open(fullname, "rb") as f:
198 try:
199 data = f.read(self._threshold_size)
200 cont = True
201 except PermissionError as e:
202 yield e
203 cont = False
204 if cont:
205 while data and cont:
206 data = self.compress(data)
207 enc = encrypt_stream(
208 self._key, data, chunksize=None, algo=self._algo)
209 yield enc
210 try:
211 data = f.read(self._threshold_size)
212 except PermissionError as e:
213 yield e
214 cont = False
216 def compress(self, data):
217 """
218 compress data
220 @param data binary data
221 @return binary data
222 """
223 if self._compress == "zip":
224 return zlib.compress(data)
225 elif self._compress == "lzma":
226 # delay import
227 try:
228 import lzma
229 except ImportError:
230 import pylzma as lzma
231 return lzma.compress(data)
232 elif self._compress is None:
233 return data
234 else:
235 raise ValueError(
236 "unexpected compression algorithm {0}".format(self._compress))
238 def decompress(self, data):
239 """
240 decompress data
242 @param data binary data
243 @return binary data
244 """
245 if self._compress == "zip":
246 return zlib.decompress(data)
247 elif self._compress == "lzma":
248 # delay import
249 try:
250 import lzma
251 except ImportError:
252 import pylzma as lzma
253 return lzma.decompress(data)
254 elif self._compress is None:
255 return data
256 else:
257 raise ValueError(
258 "unexpected compression algorithm {0}".format(self._compress))
260 def start_transfering(self):
261 """
262 starts transfering files to the remote website
264 :return: list of transferred @see cl FileInfo
265 :raises FolderTransferFTPException: The class raises an
266 exception (@see cl FolderTransferFTPException)
267 if more than 5 issues happened.
268 """
269 self.load_mapping()
271 issues = []
272 total = list(self.iter_eligible_files())
273 sum_bytes = 0
274 done = []
275 for i, file in enumerate(total):
276 if i % 20 == 0:
277 self.fLOG("#### transfering %d/%d (so far %d bytes)" %
278 (i, len(total), sum_bytes))
279 relp = os.path.relpath(file.fullname, self._root_local)
280 if ".." in relp:
281 raise ValueError("the local root is not accurate:\n{0}\nFILE:\n{1}\nRELPATH:\n{2}".format(
282 self, file.fullname, relp))
284 path = self._root_remote + "/" + os.path.split(relp)[0]
285 path = path.replace("\\", "/")
287 size = os.stat(file.fullname).st_size
288 self.fLOG("[upload % 8d bytes name=%s -- fullname=%s -- to=%s]" % (
289 size,
290 os.path.split(file.fullname)[-1],
291 file.fullname,
292 path))
294 maps = TransferAPI_FileInfo(relp, [], datetime.datetime.now())
295 r = True
296 for ii, data in enumerate(self.enumerate_read_encrypt(file.fullname)):
297 if data is None or isinstance(data, Exception):
298 # it means something went wrong
299 r = False
300 err = data
301 break
302 to = self._api.get_remote_path(data, relp, ii)
303 to = path + "/" + to
304 to = to.lstrip("/")
305 r &= self.transfer(to, data)
306 maps.add_piece(to)
307 sum_bytes += len(data)
308 if not r:
309 break
311 if r:
312 self.update_status(file.fullname)
313 self.update_mapping(relp, maps)
314 done.append(relp)
315 else:
316 self.fLOG(" issue", err)
317 issues.append((relp, err))
319 if len(issues) >= 5:
320 raise EncryptedBackupError("too many issues:\n{0}".format(
321 "\n".join("{0} -- {1}".format(a, b) for a, b in issues)))
323 self.transfer_mapping()
324 return done, issues
326 def transfer(self, to, data):
327 """
328 transfer data
330 @param to remote path
331 @param data binary data
332 @return boolean
333 """
334 return self._api.transfer(to, data)
336 def retrieve(self, path, filename=None, root=None):
337 """
338 retrieve a backuped file
340 @param path path of the file to retrieve
341 @param filename if not None, store the file into this file
342 @param root if not None, store the file into root + path
343 @return filename or data
344 """
345 if self.Mapping is None:
346 raise EncryptedBackupError(
347 "load the mapping with method load_mapping")
348 if path not in self.Mapping:
349 raise EncryptedBackupError(
350 "the mapping is not up to date or file {0} cannot be found".format(path))
351 info = self.Mapping[path]
352 if len(info.pieces) == 0:
353 # the file is empty
354 if root is not None:
355 filename = os.path.join(root, path)
356 if filename is not None:
357 dirname = os.path.dirname(filename)
358 if not os.path.exists(dirname):
359 os.makedirs(dirname)
360 with open(filename, "w") as f:
361 pass
362 return filename
363 else:
364 if root is not None:
365 filename = os.path.join(root, path)
366 if filename is not None:
367 dirname = os.path.dirname(filename)
368 if not os.path.exists(dirname):
369 os.makedirs(dirname)
370 with open(filename, "wb") as f:
371 for p in info.pieces:
372 data = self._api.retrieve(p)
373 data = decrypt_stream(
374 self._key, data, chunksize=None, algo=self._algo)
375 data = self.decompress(data)
376 f.write(data)
377 return filename
378 else:
379 if len(info.pieces) == 1:
380 return self._api.retrieve(info.pieces[0])
381 else:
382 byt = StreamIO()
383 for p in info.pieces:
384 data = self._api.retrieve(p)
385 data = decrypt_stream(
386 self._key, data, chunksize=None, algo=self._algo)
387 data = self.decompress(data)
388 byt.write(data)
389 return byt.getvalue()
391 def retrieve_all(self, dest, regex=None):
392 """
393 retrieve all backuped files
395 @param dest destination
396 @param regex retrieve a subset matching the regular expression
397 @return list of restored files
398 """
399 rema = re.compile(regex) if regex else None
401 def match(na):
402 "local function"
403 if rema:
404 return rema.search(na)
405 else:
406 return True
408 self.fLOG("load mapping")
409 self.load_mapping()
410 self.fLOG("number of files", len(self.Mapping))
411 done = []
412 for k in sorted(self.Mapping.keys()):
413 name = self.retrieve(k, root=dest)
414 if match(name):
415 size = os.stat(name).st_size
416 self.fLOG("[download % 8d bytes name=%s -- fullname=%s -- to=%s]" % (
417 size,
418 os.path.split(name)[-1],
419 dest,
420 os.path.dirname(name)))
421 done.append(name)
422 return done