Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Defines @see cl SkCustomKnn
4"""
5import numpy
6import pandas
7from mlinsights.sklapi import SkBaseClassifier, SkException
10class SkCustomKnn(SkBaseClassifier):
11 """
12 Implements the `k-Nearest Neighbors <http://en.wikipedia.org/
13 wiki/K-nearest_neighbors_algorithm>`_ as an example.
14 """
16 def __init__(self, k=1):
17 """
18 constructor
20 @param k number of neighbors to considers
21 """
22 SkBaseClassifier.__init__(self, k=k)
24 def fit(self, X, y=None, sample_weight=None):
25 """
26 Train a k-NN model. There is not much to do except storing the training
27 examples.
29 @param X Training data, numpy array or sparse matrix of
30 shape [n_samples,n_features]
31 @param y Target values, numpy array of shape
32 [n_samples, n_targets] (optional)
33 @param sample_weight Weight values, numpy array of shape
34 [n_samples, n_targets] (optional)
35 @return self : returns an instance of self.
36 """
37 if sample_weight is not None:
38 raise NotImplementedError( # pragma: no cover
39 "sample_weight must be None")
40 if len(X) < self.P.k:
41 raise SkException( # pragma: no cover
42 "number of samples cannot be smaller than k={0}".format(
43 self.P.k))
44 if isinstance(X, pandas.DataFrame):
45 X = X.asmatrix()
46 if isinstance(y, pandas.DataFrame):
47 y = y.asmatrix()
48 if len(X) != len(y):
49 raise SkException( # pragma: no cover
50 "X and y should have the same dimension not: {0} != {1}".format(
51 len(X),
52 len(y)))
53 if min(y) < 0:
54 raise SkException( # pragma: no cover
55 "class should be positive or null integer")
56 self._TrainingX = X
57 self._Trainingy = y
58 self._nbclass = max(y) + 1
59 return self
61 def predict(self, X):
62 """
63 Predicts, usually, it calls the
64 :meth:`decision_function <papierstat.mltricks.sklearn_example_classifier.
65 SkCustomKnn.decision_function>` method.
67 @param X Samples, {array-like, sparse matrix},
68 shape = (n_samples, n_features)
69 @return self : returns an instance of self.
70 """
71 scores = self.decision_function(X)
72 if len(scores.shape) == 1:
73 indices = (scores > 0).astype(numpy.int)
74 else:
75 indices = scores.argmax(axis=1)
76 return indices
78 def decision_function(self, X):
79 """
80 Computes the output of the model in case of a regressor,
81 matrix with a score for each class and each sample
82 for a classifier.
84 @param X Samples, {array-like, sparse matrix},
85 *shape = (n_samples, n_features)*
86 @return array, shape = (n_samples,.), Returns predicted values.
87 """
88 nb = len(X)
89 res = [self.knn_search(X[i, :]) for i in range(0, nb)]
90 y = self._Trainingy
91 res = [[el + (y[el[-1]],) for el in m] for m in res]
92 mk = numpy.zeros((len(X), self._nbclass))
93 for i, row in enumerate(res):
94 for el in row:
95 w = self.distance2weight(el[0])
96 mk[i, el[-1]] += w
97 return mk
99 ##################
100 # private methods
101 ##################
103 def distance2weight(self, d):
104 """
105 Converts a distance to weight.
107 @param d distance
108 @return weight (1/(d+1))
109 """
110 return 1.0 / (1.0 + d)
112 def knn_search(self, x):
113 """
114 Finds the *k* nearest neighbors for x.
116 @param x vector
117 @return k-nearest neighbors list( (distance**2, index) )
118 """
119 X = self._TrainingX
120 ones = numpy.ones((len(X), len(x)))
121 po = x * ones
122 X_x = X - po
123 prod = sorted([((X_x[i, :] ** 2).sum(), i) for i in range(0, len(X))])
124 return prod[:self.P.k]