Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Defines @see cl SkCustomKnn 

4""" 

5import numpy 

6import pandas 

7from mlinsights.sklapi import SkBaseClassifier, SkException 

8 

9 

10class SkCustomKnn(SkBaseClassifier): 

11 """ 

12 Implements the `k-Nearest Neighbors <http://en.wikipedia.org/ 

13 wiki/K-nearest_neighbors_algorithm>`_ as an example. 

14 """ 

15 

16 def __init__(self, k=1): 

17 """ 

18 constructor 

19 

20 @param k number of neighbors to considers 

21 """ 

22 SkBaseClassifier.__init__(self, k=k) 

23 

24 def fit(self, X, y=None, sample_weight=None): 

25 """ 

26 Train a k-NN model. There is not much to do except storing the training 

27 examples. 

28 

29 @param X Training data, numpy array or sparse matrix of 

30 shape [n_samples,n_features] 

31 @param y Target values, numpy array of shape 

32 [n_samples, n_targets] (optional) 

33 @param sample_weight Weight values, numpy array of shape 

34 [n_samples, n_targets] (optional) 

35 @return self : returns an instance of self. 

36 """ 

37 if sample_weight is not None: 

38 raise NotImplementedError( # pragma: no cover 

39 "sample_weight must be None") 

40 if len(X) < self.P.k: 

41 raise SkException( # pragma: no cover 

42 "number of samples cannot be smaller than k={0}".format( 

43 self.P.k)) 

44 if isinstance(X, pandas.DataFrame): 

45 X = X.asmatrix() 

46 if isinstance(y, pandas.DataFrame): 

47 y = y.asmatrix() 

48 if len(X) != len(y): 

49 raise SkException( # pragma: no cover 

50 "X and y should have the same dimension not: {0} != {1}".format( 

51 len(X), 

52 len(y))) 

53 if min(y) < 0: 

54 raise SkException( # pragma: no cover 

55 "class should be positive or null integer") 

56 self._TrainingX = X 

57 self._Trainingy = y 

58 self._nbclass = max(y) + 1 

59 return self 

60 

61 def predict(self, X): 

62 """ 

63 Predicts, usually, it calls the 

64 :meth:`decision_function <papierstat.mltricks.sklearn_example_classifier. 

65 SkCustomKnn.decision_function>` method. 

66 

67 @param X Samples, {array-like, sparse matrix}, 

68 shape = (n_samples, n_features) 

69 @return self : returns an instance of self. 

70 """ 

71 scores = self.decision_function(X) 

72 if len(scores.shape) == 1: 

73 indices = (scores > 0).astype(numpy.int) 

74 else: 

75 indices = scores.argmax(axis=1) 

76 return indices 

77 

78 def decision_function(self, X): 

79 """ 

80 Computes the output of the model in case of a regressor, 

81 matrix with a score for each class and each sample 

82 for a classifier. 

83 

84 @param X Samples, {array-like, sparse matrix}, 

85 *shape = (n_samples, n_features)* 

86 @return array, shape = (n_samples,.), Returns predicted values. 

87 """ 

88 nb = len(X) 

89 res = [self.knn_search(X[i, :]) for i in range(0, nb)] 

90 y = self._Trainingy 

91 res = [[el + (y[el[-1]],) for el in m] for m in res] 

92 mk = numpy.zeros((len(X), self._nbclass)) 

93 for i, row in enumerate(res): 

94 for el in row: 

95 w = self.distance2weight(el[0]) 

96 mk[i, el[-1]] += w 

97 return mk 

98 

99 ################## 

100 # private methods 

101 ################## 

102 

103 def distance2weight(self, d): 

104 """ 

105 Converts a distance to weight. 

106 

107 @param d distance 

108 @return weight (1/(d+1)) 

109 """ 

110 return 1.0 / (1.0 + d) 

111 

112 def knn_search(self, x): 

113 """ 

114 Finds the *k* nearest neighbors for x. 

115 

116 @param x vector 

117 @return k-nearest neighbors list( (distance**2, index) ) 

118 """ 

119 X = self._TrainingX 

120 ones = numpy.ones((len(X), len(x))) 

121 po = x * ones 

122 X_x = X - po 

123 prod = sorted([((X_x[i, :] ** 2).sum(), i) for i in range(0, len(X))]) 

124 return prod[:self.P.k]