Magic command to compare files

Links: notebook, html, PDF, python, slides, GitHub

Some ways to display differences between files.

from jyquickhelper import add_notebook_menu
add_notebook_menu()

Two functions slighly different

f1 = '''
def edit_distance_string(s1, s2):
    """
    Computes the edit distance between strings *s1* and *s2*.

    :param s1: first string
    :param s2: second string
    :return: dist, list of tuples of aligned characters
    """
    n1 = len(s1) + 1
    n2 = len(s2) + 1
    dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
    pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

    for j in range(1, n2):
        dist[0, j] = j
        pred[0, j] = 2
    for i in range(0, n1):
        dist[i, 0] = i
        pred[i, 0] = 1
    pred[0, 0] = -1

    for j in range(1, n2):
        for i in range(1, n1):
            c = dist[i, j]

            p = 0
            if dist[i - 1, j] + 1 < c:
                c = dist[i - 1, j] + 1
                p = 1
            if dist[i, j - 1] + 1 < c:
                c = dist[i, j - 1] + 1
                p = 2
            d = 0 if s1[i - 1] == s2[j - 1] else 1
            if dist[i - 1, j - 1] + d < c:
                c = dist[i - 1, j - 1] + d
                p = 3
            if p == 0:
                raise RuntimeError(
                    "Unexpected value for p=%d at position=%r." % (p, (i, j)))

            dist[i, j] = c
            pred[i, j] = p

    d = dist[len(s1), len(s2)]
    return d
'''
f2 = '''
def edit_distance_string(s1, s2):
    """
    Computes the edit distance between strings *s1* and *s2*.

    :param s1: first string
    :param s2: second string
    :return: dist, list of tuples of aligned characters
    """
    n1 = len(s1) + 1
    n2 = len(s2) + 1
    dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
    pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

    for i in range(0, n1):
        dist[i, 0] = i
        pred[i, 0] = 1
    for j in range(1, n2):
        dist[0, j] = j
        pred[0, j] = 2
    pred[0, 0] = -1

    for i in range(1, n1):
        for j in range(1, n2):
            c = dist[i, j]

            p = 0
            if dist[i - 1, j] + 1 < c:
                c = dist[i - 1, j] + 1
                p = 1
            if dist[i, j - 1] + 1 < c:
                c = dist[i, j - 1] + 1
                p = 2
            d = 0 if s1[i - 1] == s2[j - 1] else 1
            if dist[i - 1, j - 1] + d < c:
                c = dist[i - 1, j - 1] + d
                p = 3
            if p == 0:
                raise RuntimeError(
                    "Unexpected value for p=%d at position=%r." % (p, (i, j)))

            dist[i, j] = c
            pred[i, j] = p

    d = dist[len(s1), len(s2)]
    equals = []
    i, j = len(s1), len(s2)
    p = pred[i, j]
    while p != -1:
        if p == 3:
            equals.append((i - 1, j - 1))
            i -= 1
            j -= 1
        elif p == 2:
            j -= 1
        elif p == 1:
            i -= 1
        else:
            raise RuntimeError(
                "Unexpected value for p=%d at position=%r." % (p, (i, j)))
        p = pred[i, j]
    return d, list(reversed(equals))
'''

Visual differences: codediff

%load_ext pyquickhelper
%%html
<style>
table td, table th, table tr {text-align:left !important; white-space: pre;}
</style>

This is slow due to the edit distance computation. It could be improved by a C++ implementation.

%codediff f1 f2 --verbose 1
100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 23.05it/s]
00
11def edit_distance_string(s1, s2):
22 """
33 Computes the edit distance between strings *s1* and *s2*.
44
55 :param s1: first string
66 :param s2: second string
77 :return: dist, list of tuples of aligned characters
88 """
99 n1 = len(s1) + 1
1010 n2 = len(s2) + 1
1111 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
1212 pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
1313
1414 for j in range(1, n2):
for i in range(0, n1):
1515 dist[0, j] = j
dist[i, 0] = i
1616 pred[0, j] = 2
pred[i, 0] = 1
1717 for i in range(0, n1):
for j in range(1, n2):
1818 dist[i, 0] = i
dist[0, j] = j
1919 pred[i, 0] = 1
pred[0, j] = 2
2020 pred[0, 0] = -1
2121
2222 for j in range(1, n2):
for i in range(1, n1):
2323 for i in range(1, n1):
for j in range(1, n2):
2424 c = dist[i, j]
2525
2626 p = 0
2727 if dist[i - 1, j] + 1 < c:
2828 c = dist[i - 1, j] + 1
2929 p = 1
3030 if dist[i, j - 1] + 1 < c:
3131 c = dist[i, j - 1] + 1
3232 p = 2
3333 d = 0 if s1[i - 1] == s2[j - 1] else 1
3434 if dist[i - 1, j - 1] + d < c:
3535 c = dist[i - 1, j - 1] + d
3636 p = 3
3737 if p == 0:
3838 raise RuntimeError(
3939 "Unexpected value for p=%d at position=%r." % (p, (i, j)))
4040
4141 dist[i, j] = c
4242 pred[i, j] = p
4343
4444 d = dist[len(s1), len(s2)]
45 return d
45 equals = []
46 i, j = len(s1), len(s2)
47 p = pred[i, j]
48 while p != -1:
49 if p == 3:
50 equals.append((i - 1, j - 1))
51 i -= 1
52 j -= 1
53 elif p == 2:
54 j -= 1
55 elif p == 1:
56 i -= 1
57 else:
58 raise RuntimeError(
59 "Unexpected value for p=%d at position=%r." % (p, (i, j)))
60 p = pred[i, j]
61 return d, list(reversed(equals))
4662
%codediff f1 f2 --verbose 1 --two 1
100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 22.99it/s]
00
11def edit_distance_string(s1, s2):def edit_distance_string(s1, s2):
22 """ """
33 Computes the edit distance between strings *s1* and *s2*. Computes the edit distance between strings *s1* and *s2*.
44
55 :param s1: first string :param s1: first string
66 :param s2: second string :param s2: second string
77 :return: dist, list of tuples of aligned characters :return: dist, list of tuples of aligned characters
88 """ """
99 n1 = len(s1) + 1 n1 = len(s1) + 1
1010 n2 = len(s2) + 1 n2 = len(s2) + 1
1111 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
1212 pred = numpy.full(dist.shape, 0, dtype=numpy.int32) pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
1313
1414 for j in range(1, n2): for i in range(0, n1):
1515 dist[0, j] = j dist[i, 0] = i
1616 pred[0, j] = 2 pred[i, 0] = 1
1717 for i in range(0, n1): for j in range(1, n2):
1818 dist[i, 0] = i dist[0, j] = j
1919 pred[i, 0] = 1 pred[0, j] = 2
2020 pred[0, 0] = -1 pred[0, 0] = -1
2121
2222 for j in range(1, n2): for i in range(1, n1):
2323 for i in range(1, n1): for j in range(1, n2):
2424 c = dist[i, j] c = dist[i, j]
2525
2626 p = 0 p = 0
2727 if dist[i - 1, j] + 1 < c: if dist[i - 1, j] + 1 < c:
2828 c = dist[i - 1, j] + 1 c = dist[i - 1, j] + 1
2929 p = 1 p = 1
3030 if dist[i, j - 1] + 1 < c: if dist[i, j - 1] + 1 < c:
3131 c = dist[i, j - 1] + 1 c = dist[i, j - 1] + 1
3232 p = 2 p = 2
3333 d = 0 if s1[i - 1] == s2[j - 1] else 1 d = 0 if s1[i - 1] == s2[j - 1] else 1
3434 if dist[i - 1, j - 1] + d < c: if dist[i - 1, j - 1] + d < c:
3535 c = dist[i - 1, j - 1] + d c = dist[i - 1, j - 1] + d
3636 p = 3 p = 3
3737 if p == 0: if p == 0:
3838 raise RuntimeError( raise RuntimeError(
3939 "Unexpected value for p=%d at position=%r." % (p, (i, j))) "Unexpected value for p=%d at position=%r." % (p, (i, j)))
4040
4141 dist[i, j] = c dist[i, j] = c
4242 pred[i, j] = p pred[i, j] = p
4343
4444 d = dist[len(s1), len(s2)] d = dist[len(s1), len(s2)]
45 return d
45 equals = []
46 i, j = len(s1), len(s2)
47 p = pred[i, j]
48 while p != -1:
49 if p == 3:
50 equals.append((i - 1, j - 1))
51 i -= 1
52 j -= 1
53 elif p == 2:
54 j -= 1
55 elif p == 1:
56 i -= 1
57 else:
58 raise RuntimeError(
59 "Unexpected value for p=%d at position=%r." % (p, (i, j)))
60 p = pred[i, j]
61 return d, list(reversed(equals))
4662

strdiff

%strdiff f1 f2

def edit_distance_string(s1, s2):

def edit_distance_string(s1, s2):

"""

"""

Computes the edit distance between strings *s1* and *s2*.

Computes the edit distance between strings *s1* and *s2*.

:param s1: first string

:param s1: first string

:param s2: second string

:param s2: second string

:return: dist, list of tuples of aligned characters

:return: dist, list of tuples of aligned characters

"""

"""

n1 = len(s1) + 1

n1 = len(s1) + 1

n2 = len(s2) + 1

n2 = len(s2) + 1

dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)

dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)

pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

for i in range(0, n1):

dist[i, 0] = i

pred[i, 0] = 1

for j in range(1, n2):

for j in range(1, n2):

dist[0, j] = j

dist[0, j] = j

pred[0, j] = 2

pred[0, j] = 2

for i in range(0, n1):

dist[i, 0] = i

pred[i, 0] = 1

pred[0, 0] = -1

pred[0, 0] = -1

for j in range(1, n2):

for i in range(1, n1):

for i in range(1, n1):

for j in range(1, n2):

c = dist[i, j]

c = dist[i, j]

p = 0

p = 0

if dist[i - 1, j] + 1 < c:

if dist[i - 1, j] + 1 < c:

c = dist[i - 1, j] + 1

c = dist[i - 1, j] + 1

p = 1

p = 1

if dist[i, j - 1] + 1 < c:

if dist[i, j - 1] + 1 < c:

c = dist[i, j - 1] + 1

c = dist[i, j - 1] + 1

p = 2

p = 2

d = 0 if s1[i - 1] == s2[j - 1] else 1

d = 0 if s1[i - 1] == s2[j - 1] else 1

if dist[i - 1, j - 1] + d < c:

if dist[i - 1, j - 1] + d < c:

c = dist[i - 1, j - 1] + d

c = dist[i - 1, j - 1] + d

p = 3

p = 3

if p == 0:

if p == 0:

raise RuntimeError(

raise RuntimeError(

"Unexpected value for p=%d at position=%r." % (p, (i, j)))

"Unexpected value for p=%d at position=%r." % (p, (i, j)))

dist[i, j] = c

dist[i, j] = c

pred[i, j] = p

pred[i, j] = p

d = dist[len(s1), len(s2)]

d = dist[len(s1), len(s2)]

return d

equals = []

i, j = len(s1), len(s2)

p = pred[i, j]

while p != -1:

if p == 3:

equals.append((i - 1, j - 1))

i -= 1

j -= 1

elif p == 2:

j -= 1

elif p == 1:

i -= 1

else:

raise RuntimeError(

"Unexpected value for p=%d at position=%r." % (p, (i, j)))

p = pred[i, j]

return d, list(reversed(equals))

textdiff

%textdiff f1 f2
populating...
<IPython.core.display.Javascript object>