Coverage for src/ensae_teaching_cs/td_1a/edit_distance.py: 95%

38 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-01-27 05:44 +0100

1""" 

2@file 

3@brief edit distance 

4""" 

5 

6 

7def edit_distance(mot1, mot2): 

8 """ 

9 Computes the edit distance between two strings. 

10 

11 @param mot1 first string 

12 @param mot2 second string 

13 @return distance, path 

14 

15 More alternatives are available in the following paper 

16 `Harry: A Tool for Measuring String Similarity <http://jmlr.org/papers/v17/rieck16a.html>`_. 

17 

18 *distance* is an integer, 

19 *path* is a series of 2-uples of positions 

20 """ 

21 dist = {(-1, -1): 0} 

22 pred = {(-1, -1): None} 

23 if len(mot1) == 0: 

24 for j, d in enumerate(mot2): 

25 dist[-1, j] = dist[-1, j - 1] + 1 

26 pred[-1, j] = (-1, j - 1) 

27 dist[j, -1] = dist[j - 1, -1] + 1 

28 pred[j, -1] = (j - 1, -1) 

29 for i, c in enumerate(mot1): 

30 dist[i, -1] = dist[i - 1, -1] + 1 

31 pred[i, -1] = (i - 1, -1) 

32 dist[-1, i] = dist[-1, i - 1] + 1 

33 pred[-1, i] = (-1, i - 1) 

34 for j, d in enumerate(mot2): 

35 opt = [] 

36 if (i - 1, j) in dist: 

37 x = dist[i - 1, j] + 1 

38 opt.append((x, (i - 1, j))) 

39 if (i, j - 1) in dist: 

40 x = dist[i, j - 1] + 1 

41 opt.append((x, (i, j - 1))) 

42 if (i - 1, j - 1) in dist: 

43 x = dist[i - 1, j - 1] + (1 if c != d else 0) 

44 opt.append((x, (i - 1, j - 1))) 

45 mi = min(opt) 

46 dist[i, j] = mi[0] 

47 pred[i, j] = mi[1] 

48 

49 p = (len(mot1) - 1, len(mot2) - 1) 

50 chemin = [] 

51 try: 

52 while p is not None: 

53 chemin.append(p) 

54 p = pred[p] 

55 except KeyError as e: 

56 raise Exception("Issue with:\n'{0}'\n'{1}'\ndist={2}\npred={3}\np={4}".format( 

57 mot1, mot2, dist, pred, p)) from e 

58 chemin.reverse() 

59 return dist[len(mot1) - 1, len(mot2) - 1], chemin