Coverage for src/papierstat/mltricks/sklearn_example_classifier.py: 93%

45 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-30 06:49 +0200

1""" 

2@file 

3@brief Defines @see cl SkCustomKnn 

4""" 

5import numpy 

6import pandas 

7from mlinsights.sklapi import SkBaseClassifier, SkException 

8 

9 

10class SkCustomKnn(SkBaseClassifier): 

11 """ 

12 Implements the `k-Nearest Neighbors <http://en.wikipedia.org/ 

13 wiki/K-nearest_neighbors_algorithm>`_ as an example. 

14 """ 

15 

16 def __init__(self, k=1): 

17 """ 

18 constructor 

19 

20 @param k number of neighbors to considers 

21 """ 

22 SkBaseClassifier.__init__(self, k=k) 

23 

24 def fit(self, X, y=None, sample_weight=None): 

25 """ 

26 Train a k-NN model. There is not much to do except storing the training 

27 examples. 

28 

29 @param X Training data, numpy array or sparse matrix of 

30 shape [n_samples,n_features] 

31 @param y Target values, numpy array of shape 

32 [n_samples, n_targets] (optional) 

33 @param sample_weight Weight values, numpy array of shape 

34 [n_samples, n_targets] (optional) 

35 @return self : returns an instance of self. 

36 """ 

37 if sample_weight is not None: 

38 raise NotImplementedError( # pragma: no cover 

39 "sample_weight must be None") 

40 if len(X) < self.P.k: 

41 raise SkException( # pragma: no cover 

42 f"number of samples cannot be smaller than k={self.P.k}") 

43 if isinstance(X, pandas.DataFrame): 

44 X = X.asmatrix() 

45 if isinstance(y, pandas.DataFrame): 

46 y = y.asmatrix() 

47 if len(X) != len(y): 

48 raise SkException( # pragma: no cover 

49 f"X and y should have the same dimension not: {len(X)} != {len(y)}") 

50 if min(y) < 0: 

51 raise SkException( # pragma: no cover 

52 "class should be positive or null integer") 

53 self._TrainingX = X 

54 self._Trainingy = y 

55 self._nbclass = max(y) + 1 

56 return self 

57 

58 def predict(self, X): 

59 """ 

60 Predicts, usually, it calls the 

61 :meth:`decision_function <papierstat.mltricks.sklearn_example_classifier. 

62 SkCustomKnn.decision_function>` method. 

63 

64 @param X Samples, {array-like, sparse matrix}, 

65 shape = (n_samples, n_features) 

66 @return self : returns an instance of self. 

67 """ 

68 scores = self.decision_function(X) 

69 if len(scores.shape) == 1: 

70 indices = (scores > 0).astype(numpy.int) 

71 else: 

72 indices = scores.argmax(axis=1) 

73 return indices 

74 

75 def decision_function(self, X): 

76 """ 

77 Computes the output of the model in case of a regressor, 

78 matrix with a score for each class and each sample 

79 for a classifier. 

80 

81 @param X Samples, {array-like, sparse matrix}, 

82 *shape = (n_samples, n_features)* 

83 @return array, shape = (n_samples,.), Returns predicted values. 

84 """ 

85 nb = len(X) 

86 res = [self.knn_search(X[i, :]) for i in range(0, nb)] 

87 y = self._Trainingy 

88 res = [[el + (y[el[-1]],) for el in m] for m in res] 

89 mk = numpy.zeros((len(X), self._nbclass)) 

90 for i, row in enumerate(res): 

91 for el in row: 

92 w = self.distance2weight(el[0]) 

93 mk[i, el[-1]] += w 

94 return mk 

95 

96 ################## 

97 # private methods 

98 ################## 

99 

100 def distance2weight(self, d): 

101 """ 

102 Converts a distance to weight. 

103 

104 @param d distance 

105 @return weight (1/(d+1)) 

106 """ 

107 return 1.0 / (1.0 + d) 

108 

109 def knn_search(self, x): 

110 """ 

111 Finds the *k* nearest neighbors for x. 

112 

113 @param x vector 

114 @return k-nearest neighbors list( (distance**2, index) ) 

115 """ 

116 X = self._TrainingX 

117 ones = numpy.ones((len(X), len(x))) 

118 po = x * ones 

119 X_x = X - po 

120 prod = sorted([((X_x[i, :] ** 2).sum(), i) for i in range(0, len(X))]) 

121 return prod[:self.P.k]