{"cells": [{"cell_type": "markdown", "id": "1a0effd2", "metadata": {}, "source": ["# Magic command to compare files\n", "\n", "Some ways to display differences between files."]}, {"cell_type": "code", "execution_count": 1, "id": "04c56557", "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", ""], "text/plain": ["0 | 0 |
|
1 | 1 | def edit_distance_string(s1, s2): |
2 | 2 | \"\"\" |
3 | 3 | Computes the edit distance between strings *s1* and *s2*. |
4 | 4 |
|
5 | 5 | :param s1: first string |
6 | 6 | :param s2: second string |
7 | 7 | :return: dist, list of tuples of aligned characters |
8 | 8 | \"\"\" |
9 | 9 | n1 = len(s1) + 1 |
10 | 10 | n2 = len(s2) + 1 |
11 | 11 | dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) |
12 | 12 | pred = numpy.full(dist.shape, 0, dtype=numpy.int32) |
13 | 13 |
|
14 | 14 | for j in range(1, n2): for i in range(0, n1): |
15 | 15 | dist[0, j] = j dist[i, 0] = i |
16 | 16 | pred[0, j] = 2 pred[i, 0] = 1 |
17 | 17 | for i in range(0, n1): for j in range(1, n2): |
18 | 18 | dist[i, 0] = i dist[0, j] = j |
19 | 19 | pred[i, 0] = 1 pred[0, j] = 2 |
20 | 20 | pred[0, 0] = -1 |
21 | 21 |
|
22 | 22 | for j in range(1, n2): for i in range(1, n1): |
23 | 23 | for i in range(1, n1): for j in range(1, n2): |
24 | 24 | c = dist[i, j] |
25 | 25 |
|
26 | 26 | p = 0 |
27 | 27 | if dist[i - 1, j] + 1 < c: |
28 | 28 | c = dist[i - 1, j] + 1 |
29 | 29 | p = 1 |
30 | 30 | if dist[i, j - 1] + 1 < c: |
31 | 31 | c = dist[i, j - 1] + 1 |
32 | 32 | p = 2 |
33 | 33 | d = 0 if s1[i - 1] == s2[j - 1] else 1 |
34 | 34 | if dist[i - 1, j - 1] + d < c: |
35 | 35 | c = dist[i - 1, j - 1] + d |
36 | 36 | p = 3 |
37 | 37 | if p == 0: |
38 | 38 | raise RuntimeError( |
39 | 39 | \"Unexpected value for p=%d at position=%r.\" % (p, (i, j))) |
40 | 40 |
|
41 | 41 | dist[i, j] = c |
42 | 42 | pred[i, j] = p |
43 | 43 |
|
44 | 44 | d = dist[len(s1), len(s2)] |
45 | return d | |
45 | equals = [] | |
46 | i, j = len(s1), len(s2) | |
47 | p = pred[i, j] | |
48 | while p != -1: | |
49 | if p == 3: | |
50 | equals.append((i - 1, j - 1)) | |
51 | i -= 1 | |
52 | j -= 1 | |
53 | elif p == 2: | |
54 | j -= 1 | |
55 | elif p == 1: | |
56 | i -= 1 | |
57 | else: | |
58 | raise RuntimeError( | |
59 | \"Unexpected value for p=%d at position=%r.\" % (p, (i, j))) | |
60 | p = pred[i, j] | |
61 | return d, list(reversed(equals)) | |
46 | 62 |
|
0 | 0 |
|
|
1 | 1 | def edit_distance_string(s1, s2): | def edit_distance_string(s1, s2): |
2 | 2 | \"\"\" | \"\"\" |
3 | 3 | Computes the edit distance between strings *s1* and *s2*. | Computes the edit distance between strings *s1* and *s2*. |
4 | 4 |
|
|
5 | 5 | :param s1: first string | :param s1: first string |
6 | 6 | :param s2: second string | :param s2: second string |
7 | 7 | :return: dist, list of tuples of aligned characters | :return: dist, list of tuples of aligned characters |
8 | 8 | \"\"\" | \"\"\" |
9 | 9 | n1 = len(s1) + 1 | n1 = len(s1) + 1 |
10 | 10 | n2 = len(s2) + 1 | n2 = len(s2) + 1 |
11 | 11 | dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) | dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) |
12 | 12 | pred = numpy.full(dist.shape, 0, dtype=numpy.int32) | pred = numpy.full(dist.shape, 0, dtype=numpy.int32) |
13 | 13 |
|
|
14 | 14 | for j in range(1, n2): |
|
15 | 15 | dist[0, j] = j |
|
16 | 16 | pred[0, j] = 2 |
|
17 | 17 | for i in range(0, n1): |
|
18 | 18 | dist[i, 0] = i |
|
19 | 19 | pred[i, 0] = 1 |
|
20 | 20 | pred[0, 0] = -1 | pred[0, 0] = -1 |
21 | 21 |
|
|
22 | 22 | for j in range(1, n2): |
|
23 | 23 | for i in range(1, n1): |
|
24 | 24 | c = dist[i, j] | c = dist[i, j] |
25 | 25 |
|
|
26 | 26 | p = 0 | p = 0 |
27 | 27 | if dist[i - 1, j] + 1 < c: | if dist[i - 1, j] + 1 < c: |
28 | 28 | c = dist[i - 1, j] + 1 | c = dist[i - 1, j] + 1 |
29 | 29 | p = 1 | p = 1 |
30 | 30 | if dist[i, j - 1] + 1 < c: | if dist[i, j - 1] + 1 < c: |
31 | 31 | c = dist[i, j - 1] + 1 | c = dist[i, j - 1] + 1 |
32 | 32 | p = 2 | p = 2 |
33 | 33 | d = 0 if s1[i - 1] == s2[j - 1] else 1 | d = 0 if s1[i - 1] == s2[j - 1] else 1 |
34 | 34 | if dist[i - 1, j - 1] + d < c: | if dist[i - 1, j - 1] + d < c: |
35 | 35 | c = dist[i - 1, j - 1] + d | c = dist[i - 1, j - 1] + d |
36 | 36 | p = 3 | p = 3 |
37 | 37 | if p == 0: | if p == 0: |
38 | 38 | raise RuntimeError( | raise RuntimeError( |
39 | 39 | \"Unexpected value for p=%d at position=%r.\" % (p, (i, j))) | \"Unexpected value for p=%d at position=%r.\" % (p, (i, j))) |
40 | 40 |
|
|
41 | 41 | dist[i, j] = c | dist[i, j] = c |
42 | 42 | pred[i, j] = p | pred[i, j] = p |
43 | 43 |
|
|
44 | 44 | d = dist[len(s1), len(s2)] | d = dist[len(s1), len(s2)] |
45 | return d | ||
45 | equals = [] | ||
46 | i, j = len(s1), len(s2) | ||
47 | p = pred[i, j] | ||
48 | while p != -1: | ||
49 | if p == 3: | ||
50 | equals.append((i - 1, j - 1)) | ||
51 | i -= 1 | ||
52 | j -= 1 | ||
53 | elif p == 2: | ||
54 | j -= 1 | ||
55 | elif p == 1: | ||
56 | i -= 1 | ||
57 | else: | ||
58 | raise RuntimeError( | ||
59 | \"Unexpected value for p=%d at position=%r.\" % (p, (i, j))) | ||
60 | p = pred[i, j] | ||
61 | return d, list(reversed(equals)) | ||
46 | 62 |
|
|
\n", "
def edit_distance_string(s1, s2):
def edit_distance_string(s1, s2):
\n", """"
"""
\n", "Computes the edit distance between strings *s1* and *s2*.
Computes the edit distance between strings *s1* and *s2*.
\n", ":param s1: first string
:param s1: first string
\n", ":param s2: second string
:param s2: second string
\n", ":return: dist, list of tuples of aligned characters
:return: dist, list of tuples of aligned characters
\n", """"
"""
\n", "n1 = len(s1) + 1
n1 = len(s1) + 1
\n", "n2 = len(s2) + 1
n2 = len(s2) + 1
\n", "dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
\n", "pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
\n", "for i in range(0, n1):
\n", "dist[i, 0] = i
\n", "pred[i, 0] = 1
\n", "for j in range(1, n2):
for j in range(1, n2):
\n", "dist[0, j] = j
dist[0, j] = j
\n", "pred[0, j] = 2
pred[0, j] = 2
\n", "for i in range(0, n1):
\n", "
dist[i, 0] = i
\n", "
pred[i, 0] = 1
\n", "
pred[0, 0] = -1
pred[0, 0] = -1
\n", "for j in range(1, n2):
for i in range(1, n1):
\n", "for i in range(1, n1):
for j in range(1, n2):
\n", "c = dist[i, j]
c = dist[i, j]
\n", "p = 0
p = 0
\n", "if dist[i - 1, j] + 1 < c:
if dist[i - 1, j] + 1 < c:
\n", "c = dist[i - 1, j] + 1
c = dist[i - 1, j] + 1
\n", "p = 1
p = 1
\n", "if dist[i, j - 1] + 1 < c:
if dist[i, j - 1] + 1 < c:
\n", "c = dist[i, j - 1] + 1
c = dist[i, j - 1] + 1
\n", "p = 2
p = 2
\n", "d = 0 if s1[i - 1] == s2[j - 1] else 1
d = 0 if s1[i - 1] == s2[j - 1] else 1
\n", "if dist[i - 1, j - 1] + d < c:
if dist[i - 1, j - 1] + d < c:
\n", "c = dist[i - 1, j - 1] + d
c = dist[i - 1, j - 1] + d
\n", "p = 3
p = 3
\n", "if p == 0:
if p == 0:
\n", "raise RuntimeError(
raise RuntimeError(
\n", ""Unexpected value for p=%d at position=%r." % (p, (i, j)))
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
\n", "dist[i, j] = c
dist[i, j] = c
\n", "pred[i, j] = p
pred[i, j] = p
\n", "d = dist[len(s1), len(s2)]
d = dist[len(s1), len(s2)]
\n", "return d
equals = []
\n", "i, j = len(s1), len(s2)
\n", "p = pred[i, j]
\n", "while p != -1:
\n", "if p == 3:
\n", "equals.append((i - 1, j - 1))
\n", "i -= 1
\n", "j -= 1
\n", "elif p == 2:
\n", "j -= 1
\n", "elif p == 1:
\n", "i -= 1
\n", "else:
\n", "raise RuntimeError(
\n", ""Unexpected value for p=%d at position=%r." % (p, (i, j)))
\n", "p = pred[i, j]
\n", "return d, list(reversed(equals))
\n", "\n", "