Coverage for mlinsights/search_rank/search_engine_predictions_images.py: 86%

42 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-28 08:46 +0100

1""" 

2@file 

3@brief Implements a way to get close examples based 

4on the output of a machine learned model. 

5""" 

6import numpy 

7from .search_engine_predictions import SearchEnginePredictions 

8 

9 

10class SearchEnginePredictionImages(SearchEnginePredictions): 

11 """ 

12 Extends class @see cl SearchEnginePredictions. 

13 Vectors are coming from images. The metadata must contains 

14 information about path names. We assume all images can hold 

15 in memory. An example can found in notebook 

16 :ref:`searchimageskerasrst` or :ref:`searchimagestorchrst`. 

17 Another example can be found there: 

18 `search_images_dogcat.py 

19 <https://github.com/sdpython/ensae_projects/blob/master/src/ 

20 ensae_projects/restapi/search_images_dogcat.py>`_. 

21 """ 

22 

23 def _prepare_fit(self, data=None, features=None, metadata=None, 

24 transform=None, n=None, fLOG=None): 

25 """ 

26 Stores data in the class itself. 

27 

28 @param data a dataframe or None if the 

29 the features and the metadata 

30 are specified with an array and a 

31 dictionary 

32 @param features features columns or an array 

33 @param metadata data 

34 @param transform transform each vector before using it 

35 @param n takes *n* images (or ``len(iter_images)``) 

36 @param fLOG logging function 

37 """ 

38 if "torch" in str(type(data)): 

39 self.module_ = "torch" 

40 from torch.utils.data import DataLoader # pylint: disable=E0401,C0415,E0611 

41 dataloader = DataLoader( 

42 data, batch_size=1, shuffle=False, num_workers=0) 

43 self.iter_images_ = iter_images = iter( 

44 zip(dataloader, data.samples)) 

45 if n is None: 

46 n = len(data) 

47 elif "keras" in str(type(data)): # pragma: no cover 

48 self.module_ = "keras" 

49 iter_images = data 

50 # We delay the import as keras backend is not necessarily installed. 

51 from keras.preprocessing.image import Iterator # pylint: disable=E0401,C0415,E0611 

52 from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator # pylint: disable=E0401,C0415 

53 if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)): 

54 raise NotImplementedError( # pragma: no cover 

55 f"iter_images must be a keras Iterator. " 

56 f"No option implemented for type {type(iter_images)}.") 

57 if iter_images.batch_size != 1: 

58 raise ValueError( # pragma: no cover 

59 f"batch_size must be 1 not {iter_images.batch_size}") 

60 self.iter_images_ = iter_images 

61 if n is None: 

62 n = len(iter_images) 

63 if not hasattr(iter_images, "filenames"): 

64 raise NotImplementedError( # pragma: no cover 

65 "Iterator does not iterate on images but numpy arrays (not implemented).") 

66 else: 

67 raise TypeError( # pragma: no cover 

68 f"Unexpected data type {type(data)}.") 

69 

70 def get_current_index(flow): 

71 "get current index" 

72 return flow.index_array[(flow.batch_index + flow.n - 1) % flow.n] 

73 

74 def iterator_feature_meta(): 

75 "iterators on metadata" 

76 def accessor(iter_images): 

77 if hasattr(iter_images, 'filenames'): 

78 # keras 

79 return (lambda i, ite: (ite, iter_images.filenames[get_current_index(iter_images)])) 

80 else: 

81 # torch 

82 return (lambda i, ite: (ite[0], ite[1][0])) 

83 acc = accessor(iter_images) 

84 

85 for i, it in zip(range(n), iter_images): 

86 im, name = acc(i, it) 

87 if not isinstance(name, str): 

88 raise TypeError( # pragma: no cover 

89 f"name should be a string, not {type(name)}") 

90 yield im[0], dict(name=name, i=i) 

91 if fLOG and i % 10000 == 0: 

92 fLOG( 

93 f'[SearchEnginePredictionImages.fit] i={i}/{n} - {name}') 

94 super()._prepare_fit(data=iterator_feature_meta(), transform=transform) 

95 

96 def fit(self, iter_images, n=None, fLOG=None): # pylint: disable=W0237 

97 """ 

98 Processes images through the model and fits a *k-nn*. 

99 

100 @param iter_images `Iterator <https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py#L719>`_ 

101 @param n takes *n* images (or ``len(iter_images)``) 

102 @param fLOG logging function 

103 @param kwimg parameters used to preprocess the images 

104 """ 

105 self._prepare_fit(data=iter_images, transform=self.fct, n=n, fLOG=fLOG) 

106 return self._fit_knn() 

107 

108 def kneighbors(self, iter_images, n_neighbors=None): # pylint: disable=W0237 

109 """ 

110 Searches for neighbors close to the first image 

111 returned by *iter_images*. It returns the neighbors 

112 only for the first image. 

113 

114 @param iter_images `Iterator <https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py#L719>`_ 

115 @return score, ind, meta 

116 

117 *score* is an array representing the lengths to points, 

118 *ind* contains the indices of the nearest points in the population matrix, 

119 *meta* is the metadata. 

120 """ 

121 if isinstance(iter_images, numpy.ndarray): 

122 if self.module_ == "keras": # pragma: no cover 

123 raise NotImplementedError("Not yet implemented or Keras.") 

124 elif self.module_ == "torch": 

125 from torch import from_numpy # pylint: disable=E0611,E0401,C0415 

126 X = from_numpy(iter_images[numpy.newaxis, :, :, :]) 

127 return super().kneighbors(X, n_neighbors=n_neighbors) 

128 raise RuntimeError( # pragma: no cover 

129 f"Unknown module '{self.module_}'.") 

130 elif "keras" in str(iter_images): # pragma: no cover 

131 if self.module_ != "keras": 

132 raise RuntimeError( # pragma: no cover 

133 f"Keras object but {self.module_} was used to train the KNN.") 

134 # We delay the import as keras backend is not necessarily installed. 

135 # keras, it expects an iterator. 

136 from keras.preprocessing.image import Iterator # pylint: disable=E0401,C0415,E0611 

137 from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator # pylint: disable=E0401,C0415,E0611 

138 if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)): 

139 raise NotImplementedError( # pragma: no cover 

140 f"iter_images must be a keras Iterator. No option implemented for type {type(iter_images)}.") 

141 if iter_images.batch_size != 1: 

142 raise ValueError( # pragma: no cover 

143 f"batch_size must be 1 not {iter_images.batch_size}") 

144 for img in iter_images: 

145 X = img[0] 

146 break 

147 return super().kneighbors(X, n_neighbors=n_neighbors) 

148 elif "torch" in str(type(iter_images)): 

149 if self.module_ != "torch": 

150 raise RuntimeError( # pragma: no cover 

151 f"Torch object but {self.module_} was used to train the KNN.") 

152 # torch: it expects a tensor 

153 X = iter_images 

154 return super().kneighbors(X, n_neighbors=n_neighbors) 

155 elif isinstance(iter_images, list): 

156 res = [self.kneighbors(it, n_neighbors=n_neighbors) 

157 for it in iter_images] 

158 return (numpy.vstack([_[0] for _ in res]), 

159 numpy.vstack([_[1] for _ in res]), 

160 numpy.vstack([_[2] for _ in res])) 

161 else: 

162 raise TypeError( # pragma: no cover 

163 f"Unexpected type {type(iter_images)} in " 

164 f"SearchEnginePredictionImages.kneighbors")