Coverage for mlinsights/search_rank/search_engine_predictions_images.py: 86%
42 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-28 08:46 +0100
1"""
2@file
3@brief Implements a way to get close examples based
4on the output of a machine learned model.
5"""
6import numpy
7from .search_engine_predictions import SearchEnginePredictions
10class SearchEnginePredictionImages(SearchEnginePredictions):
11 """
12 Extends class @see cl SearchEnginePredictions.
13 Vectors are coming from images. The metadata must contains
14 information about path names. We assume all images can hold
15 in memory. An example can found in notebook
16 :ref:`searchimageskerasrst` or :ref:`searchimagestorchrst`.
17 Another example can be found there:
18 `search_images_dogcat.py
19 <https://github.com/sdpython/ensae_projects/blob/master/src/
20 ensae_projects/restapi/search_images_dogcat.py>`_.
21 """
23 def _prepare_fit(self, data=None, features=None, metadata=None,
24 transform=None, n=None, fLOG=None):
25 """
26 Stores data in the class itself.
28 @param data a dataframe or None if the
29 the features and the metadata
30 are specified with an array and a
31 dictionary
32 @param features features columns or an array
33 @param metadata data
34 @param transform transform each vector before using it
35 @param n takes *n* images (or ``len(iter_images)``)
36 @param fLOG logging function
37 """
38 if "torch" in str(type(data)):
39 self.module_ = "torch"
40 from torch.utils.data import DataLoader # pylint: disable=E0401,C0415,E0611
41 dataloader = DataLoader(
42 data, batch_size=1, shuffle=False, num_workers=0)
43 self.iter_images_ = iter_images = iter(
44 zip(dataloader, data.samples))
45 if n is None:
46 n = len(data)
47 elif "keras" in str(type(data)): # pragma: no cover
48 self.module_ = "keras"
49 iter_images = data
50 # We delay the import as keras backend is not necessarily installed.
51 from keras.preprocessing.image import Iterator # pylint: disable=E0401,C0415,E0611
52 from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator # pylint: disable=E0401,C0415
53 if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
54 raise NotImplementedError( # pragma: no cover
55 f"iter_images must be a keras Iterator. "
56 f"No option implemented for type {type(iter_images)}.")
57 if iter_images.batch_size != 1:
58 raise ValueError( # pragma: no cover
59 f"batch_size must be 1 not {iter_images.batch_size}")
60 self.iter_images_ = iter_images
61 if n is None:
62 n = len(iter_images)
63 if not hasattr(iter_images, "filenames"):
64 raise NotImplementedError( # pragma: no cover
65 "Iterator does not iterate on images but numpy arrays (not implemented).")
66 else:
67 raise TypeError( # pragma: no cover
68 f"Unexpected data type {type(data)}.")
70 def get_current_index(flow):
71 "get current index"
72 return flow.index_array[(flow.batch_index + flow.n - 1) % flow.n]
74 def iterator_feature_meta():
75 "iterators on metadata"
76 def accessor(iter_images):
77 if hasattr(iter_images, 'filenames'):
78 # keras
79 return (lambda i, ite: (ite, iter_images.filenames[get_current_index(iter_images)]))
80 else:
81 # torch
82 return (lambda i, ite: (ite[0], ite[1][0]))
83 acc = accessor(iter_images)
85 for i, it in zip(range(n), iter_images):
86 im, name = acc(i, it)
87 if not isinstance(name, str):
88 raise TypeError( # pragma: no cover
89 f"name should be a string, not {type(name)}")
90 yield im[0], dict(name=name, i=i)
91 if fLOG and i % 10000 == 0:
92 fLOG(
93 f'[SearchEnginePredictionImages.fit] i={i}/{n} - {name}')
94 super()._prepare_fit(data=iterator_feature_meta(), transform=transform)
96 def fit(self, iter_images, n=None, fLOG=None): # pylint: disable=W0237
97 """
98 Processes images through the model and fits a *k-nn*.
100 @param iter_images `Iterator <https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py#L719>`_
101 @param n takes *n* images (or ``len(iter_images)``)
102 @param fLOG logging function
103 @param kwimg parameters used to preprocess the images
104 """
105 self._prepare_fit(data=iter_images, transform=self.fct, n=n, fLOG=fLOG)
106 return self._fit_knn()
108 def kneighbors(self, iter_images, n_neighbors=None): # pylint: disable=W0237
109 """
110 Searches for neighbors close to the first image
111 returned by *iter_images*. It returns the neighbors
112 only for the first image.
114 @param iter_images `Iterator <https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py#L719>`_
115 @return score, ind, meta
117 *score* is an array representing the lengths to points,
118 *ind* contains the indices of the nearest points in the population matrix,
119 *meta* is the metadata.
120 """
121 if isinstance(iter_images, numpy.ndarray):
122 if self.module_ == "keras": # pragma: no cover
123 raise NotImplementedError("Not yet implemented or Keras.")
124 elif self.module_ == "torch":
125 from torch import from_numpy # pylint: disable=E0611,E0401,C0415
126 X = from_numpy(iter_images[numpy.newaxis, :, :, :])
127 return super().kneighbors(X, n_neighbors=n_neighbors)
128 raise RuntimeError( # pragma: no cover
129 f"Unknown module '{self.module_}'.")
130 elif "keras" in str(iter_images): # pragma: no cover
131 if self.module_ != "keras":
132 raise RuntimeError( # pragma: no cover
133 f"Keras object but {self.module_} was used to train the KNN.")
134 # We delay the import as keras backend is not necessarily installed.
135 # keras, it expects an iterator.
136 from keras.preprocessing.image import Iterator # pylint: disable=E0401,C0415,E0611
137 from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator # pylint: disable=E0401,C0415,E0611
138 if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
139 raise NotImplementedError( # pragma: no cover
140 f"iter_images must be a keras Iterator. No option implemented for type {type(iter_images)}.")
141 if iter_images.batch_size != 1:
142 raise ValueError( # pragma: no cover
143 f"batch_size must be 1 not {iter_images.batch_size}")
144 for img in iter_images:
145 X = img[0]
146 break
147 return super().kneighbors(X, n_neighbors=n_neighbors)
148 elif "torch" in str(type(iter_images)):
149 if self.module_ != "torch":
150 raise RuntimeError( # pragma: no cover
151 f"Torch object but {self.module_} was used to train the KNN.")
152 # torch: it expects a tensor
153 X = iter_images
154 return super().kneighbors(X, n_neighbors=n_neighbors)
155 elif isinstance(iter_images, list):
156 res = [self.kneighbors(it, n_neighbors=n_neighbors)
157 for it in iter_images]
158 return (numpy.vstack([_[0] for _ in res]),
159 numpy.vstack([_[1] for _ in res]),
160 numpy.vstack([_[2] for _ in res]))
161 else:
162 raise TypeError( # pragma: no cover
163 f"Unexpected type {type(iter_images)} in "
164 f"SearchEnginePredictionImages.kneighbors")