Coverage for src/papierstat/mltricks/sklearn_example_classifier.py: 93%
45 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-30 06:49 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-30 06:49 +0200
1"""
2@file
3@brief Defines @see cl SkCustomKnn
4"""
5import numpy
6import pandas
7from mlinsights.sklapi import SkBaseClassifier, SkException
10class SkCustomKnn(SkBaseClassifier):
11 """
12 Implements the `k-Nearest Neighbors <http://en.wikipedia.org/
13 wiki/K-nearest_neighbors_algorithm>`_ as an example.
14 """
16 def __init__(self, k=1):
17 """
18 constructor
20 @param k number of neighbors to considers
21 """
22 SkBaseClassifier.__init__(self, k=k)
24 def fit(self, X, y=None, sample_weight=None):
25 """
26 Train a k-NN model. There is not much to do except storing the training
27 examples.
29 @param X Training data, numpy array or sparse matrix of
30 shape [n_samples,n_features]
31 @param y Target values, numpy array of shape
32 [n_samples, n_targets] (optional)
33 @param sample_weight Weight values, numpy array of shape
34 [n_samples, n_targets] (optional)
35 @return self : returns an instance of self.
36 """
37 if sample_weight is not None:
38 raise NotImplementedError( # pragma: no cover
39 "sample_weight must be None")
40 if len(X) < self.P.k:
41 raise SkException( # pragma: no cover
42 f"number of samples cannot be smaller than k={self.P.k}")
43 if isinstance(X, pandas.DataFrame):
44 X = X.asmatrix()
45 if isinstance(y, pandas.DataFrame):
46 y = y.asmatrix()
47 if len(X) != len(y):
48 raise SkException( # pragma: no cover
49 f"X and y should have the same dimension not: {len(X)} != {len(y)}")
50 if min(y) < 0:
51 raise SkException( # pragma: no cover
52 "class should be positive or null integer")
53 self._TrainingX = X
54 self._Trainingy = y
55 self._nbclass = max(y) + 1
56 return self
58 def predict(self, X):
59 """
60 Predicts, usually, it calls the
61 :meth:`decision_function <papierstat.mltricks.sklearn_example_classifier.
62 SkCustomKnn.decision_function>` method.
64 @param X Samples, {array-like, sparse matrix},
65 shape = (n_samples, n_features)
66 @return self : returns an instance of self.
67 """
68 scores = self.decision_function(X)
69 if len(scores.shape) == 1:
70 indices = (scores > 0).astype(numpy.int)
71 else:
72 indices = scores.argmax(axis=1)
73 return indices
75 def decision_function(self, X):
76 """
77 Computes the output of the model in case of a regressor,
78 matrix with a score for each class and each sample
79 for a classifier.
81 @param X Samples, {array-like, sparse matrix},
82 *shape = (n_samples, n_features)*
83 @return array, shape = (n_samples,.), Returns predicted values.
84 """
85 nb = len(X)
86 res = [self.knn_search(X[i, :]) for i in range(0, nb)]
87 y = self._Trainingy
88 res = [[el + (y[el[-1]],) for el in m] for m in res]
89 mk = numpy.zeros((len(X), self._nbclass))
90 for i, row in enumerate(res):
91 for el in row:
92 w = self.distance2weight(el[0])
93 mk[i, el[-1]] += w
94 return mk
96 ##################
97 # private methods
98 ##################
100 def distance2weight(self, d):
101 """
102 Converts a distance to weight.
104 @param d distance
105 @return weight (1/(d+1))
106 """
107 return 1.0 / (1.0 + d)
109 def knn_search(self, x):
110 """
111 Finds the *k* nearest neighbors for x.
113 @param x vector
114 @return k-nearest neighbors list( (distance**2, index) )
115 """
116 X = self._TrainingX
117 ones = numpy.ones((len(X), len(x)))
118 po = x * ones
119 X_x = X - po
120 prod = sorted([((X_x[i, :] ** 2).sum(), i) for i in range(0, len(X))])
121 return prod[:self.P.k]