Coverage for src/ensae_teaching_cs/td_1a/edit_distance.py: 95%

38 statements

, created at 2023-01-27 05:44 +0100

1"""

2@file

3@brief edit distance

4"""

7def edit_distance(mot1, mot2):

8 """

9 Computes the edit distance between two strings.

11 @param mot1 first string

12 @param mot2 second string

13 @return distance, path

15 More alternatives are available in the following paper

16 `Harry: A Tool for Measuring String Similarity <http://jmlr.org/papers/v17/rieck16a.html>`_.

18 *distance* is an integer,

19 *path* is a series of 2-uples of positions

20 """

21 dist = {(-1, -1): 0}

22 pred = {(-1, -1): None}

23 if len(mot1) == 0:

24 for j, d in enumerate(mot2):

25 dist[-1, j] = dist[-1, j - 1] + 1

26 pred[-1, j] = (-1, j - 1)

27 dist[j, -1] = dist[j - 1, -1] + 1

28 pred[j, -1] = (j - 1, -1)

29 for i, c in enumerate(mot1):

30 dist[i, -1] = dist[i - 1, -1] + 1

31 pred[i, -1] = (i - 1, -1)

32 dist[-1, i] = dist[-1, i - 1] + 1

33 pred[-1, i] = (-1, i - 1)

34 for j, d in enumerate(mot2):

35 opt = []

36 if (i - 1, j) in dist:

37 x = dist[i - 1, j] + 1

38 opt.append((x, (i - 1, j)))

39 if (i, j - 1) in dist:

40 x = dist[i, j - 1] + 1

41 opt.append((x, (i, j - 1)))

42 if (i - 1, j - 1) in dist:

43 x = dist[i - 1, j - 1] + (1 if c != d else 0)

44 opt.append((x, (i - 1, j - 1)))

45 mi = min(opt)

46 dist[i, j] = mi[0]

47 pred[i, j] = mi[1]

49 p = (len(mot1) - 1, len(mot2) - 1)

50 chemin = []

51 try:

52 while p is not None:

53 chemin.append(p)

54 p = pred[p]

55 except KeyError as e:

56 raise Exception("Issue with:\n'{0}'\n'{1}'\ndist={2}\npred={3}\np={4}".format(

57 mot1, mot2, dist, pred, p)) from e

58 chemin.reverse()

59 return dist[len(mot1) - 1, len(mot2) - 1], chemin