KMeans with norm L1#

Links: notebook, html, PDF, python, slides, GitHub

This demonstrates how results change when using norm L1 for a k-means algorithm.

from jyquickhelper import add_notebook_menu
add_notebook_menu()
%matplotlib inline
import matplotlib.pyplot as plt

Simple datasets#

import numpy
import numpy.random as rnd
N = 1000
X = numpy.zeros((N * 2, 2), dtype=numpy.float64)
X[:N] = rnd.rand(N, 2)
X[N:] = rnd.rand(N, 2)
#X[N:, 0] += 0.75
X[N:, 1] += 1
X[:N//10, 0] -= 2
X.shape
(2000, 2)
fig, ax = plt.subplots(1, 1)
ax.plot(X[:, 0], X[:, 1], '.')
ax.set_title("Two squares");
../_images/kmeans_l1_6_0.png

Classic KMeans#

It uses euclidean distance.

from sklearn.cluster import KMeans
km = KMeans(2)
km.fit(X)
KMeans(n_clusters=2)
km.cluster_centers_
array([[0.27360385, 0.50114694],
       [0.49920054, 1.50108811]])
def plot_clusters(km_, X, ax):
    lab = km_.predict(X)
    for i in range(km_.cluster_centers_.shape[0]):
        sub = X[lab == i]
        ax.plot(sub[:, 0], sub[:, 1], '.', label='c=%d' % i)
    C = km_.cluster_centers_
    ax.plot(C[:, 0], C[:, 1], 'o', ms=15, label="centers")
    ax.legend()

fig, ax = plt.subplots(1, 1)
plot_clusters(km, X, ax)
ax.set_title("L2 KMeans");
../_images/kmeans_l1_10_0.png

KMeans with L1 norm#

from mlinsights.mlmodel import KMeansL1L2
kml1 = KMeansL1L2(2, norm='L1')
kml1.fit(X)
KMeansL1L2(n_clusters=2, norm='l1')
kml1.cluster_centers_
array([[0.5812874 , 1.49145705],
       [0.33319472, 0.4959633 ]])
fig, ax = plt.subplots(1, 1)
plot_clusters(kml1, X, ax)
ax.set_title("L1 KMeans");
../_images/kmeans_l1_14_0.png

When clusters are completely different#

N = 1000
X = numpy.zeros((N * 2, 2), dtype=numpy.float64)
X[:N] = rnd.rand(N, 2)
X[N:] = rnd.rand(N, 2)
#X[N:, 0] += 0.75
X[N:, 1] += 1
X[:N//10, 0] -= 4
X.shape
(2000, 2)
km = KMeans(2)
km.fit(X)
KMeans(n_clusters=2)
kml1 = KMeansL1L2(2, norm='L1')
kml1.fit(X)
KMeansL1L2(n_clusters=2, norm='l1')
fig, ax = plt.subplots(1, 2, figsize=(10, 4))
plot_clusters(km, X, ax[0])
plot_clusters(kml1, X, ax[1])
ax[0].set_title("L2 KMeans")
ax[1].set_title("L1 KMeans");
../_images/kmeans_l1_19_0.png