Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief About Voronoi Diagram
5"""
6import warnings
7import numpy
8from sklearn.linear_model import LinearRegression
9from sklearn.metrics.pairwise import pairwise_distances
10from mlinsights.mlmodel import QuantileLinearRegression
13class VoronoiEstimationError(Exception):
14 """
15 Raised when the algorithm failed.
16 """
17 pass
20def voronoi_estimation_from_lr(L, B, C=None, D=None, cl=0, qr=True, max_iter=None, verbose=False):
21 """
22 Determines a Voronoi diagram close to a convex
23 partition defined by a logistic regression in *n* classes.
24 :math:`M \\in \\mathbb{M}_{nd}` a row matrix :math:`(L_1, ..., L_n)`.
25 Every border between two classes *i* and *j* is defined by:
26 :math:`\\scal{L_i}{X} + B = \\scal{L_j}{X} + B`.
28 The function looks for a set of points from which the Voronoi
29 diagram can be inferred. It is done through a linear regression
30 with norm *L1*. See :ref:`l-lrvor-connection`.
32 @param L matrix
33 @param B vector
34 @param C additional conditions (see below)
35 @param D addition condictions (see below)
36 @param cl class on which the additional conditions applies
37 @param qr use quantile regression
38 @param max_iter number of condition to remove until convergence
39 @param verbose display information while training
40 @return matrix :math:`P \\in \\mathbb{M}_{nd}`
42 The function solves the linear system:
44 .. math::
46 \\begin{array}{rcl}
47 & \\Longrightarrow & \\left\\{\\begin{array}{l}\\scal{\\frac{L_i-L_j}{\\norm{L_i-L_j}}}{P_i + P_j} +
48 2 \\frac{B_i - B_j}{\\norm{L_i-L_j}} = 0 \\\\
49 \\scal{P_i- P_j}{u_{ij}} - \\scal{P_i - P_j}{\\frac{L_i-L_j}{\\norm{L_i-L_j}}} \\scal{\\frac{L_i-L_j}{\\norm{L_i-L_j}}}{u_{ij}}=0
50 \\end{array} \\right.
51 \\end{array}
53 If the number of dimension is big and
54 the number of classes small, the system has
55 multiple solution. Addition condition must be added
56 such as :math:`CP_i=D` where *i=cl*, :math:`P_i`
57 is the Voronoï point attached to class *cl*.
58 `Quantile regression <https://fr.wikipedia.org/wiki/R%C3%A9gression_quantile>`_
59 is not implemented in :epkg:`scikit-learn`.
60 We use `QuantileLinearRegression <http://www.xavierdupre.fr/app/mlinsights/helpsphinx/mlinsights/mlmodel/quantile_regression.html>`_.
62 After the first iteration, the function determines
63 the furthest pair of points and removes it from the list
64 of equations. If *max_iter* is None, the system goes until
65 the number of equations is equal to the number of points * 2,
66 otherwise it stops after *max_iter* removals. This is not the
67 optimal pair to remove as they could still be neighbors but it
68 should be a good heuristic.
69 """
70 labels_inv = {}
71 nb_constraints = numpy.zeros((L.shape[0],))
72 matL = []
73 matB = []
74 for i in range(0, L.shape[0]):
75 for j in range(i + 1, L.shape[0]):
76 li = L[i, :]
77 lj = L[j, :]
78 c = (li - lj)
79 nc = (c.T @ c) ** 0.5
81 # first condition
82 mat = numpy.zeros((L.shape))
83 mat[i, :] = c
84 mat[j, :] = c
85 d = -2 * (B[i] - B[j])
86 matB.append(d)
87 matL.append(mat.ravel())
88 labels_inv[i, j, 'eq1'] = len(matL) - 1
89 nb_constraints[i] += 1
90 nb_constraints[j] += 1
92 # condition 2 - hides multiple equation
93 # we pick one
94 coor = 0
95 found = False
96 while not found and coor < len(c):
97 if c[coor] == 0:
98 coor += 1
99 continue
100 if c[coor] == nc:
101 coor += 1
102 continue
103 found = True
104 if not found:
105 raise ValueError( # pragma: no cover
106 "Matrix L has two similar rows {0} and {1}. "
107 "Problem cannot be solved.".format(i, j))
109 c /= nc
110 c2 = c * c[coor]
111 mat = numpy.zeros((L.shape))
112 mat[i, :] = -c2
113 mat[j, :] = c2
115 mat[i, coor] += 1
116 mat[j, coor] -= 1
117 matB.append(0)
118 matL.append(mat.ravel())
119 labels_inv[i, j, 'eq2'] = len(matL) - 1
120 nb_constraints[i] += 1
121 nb_constraints[j] += 1
123 nbeq = (L.shape[0] * (L.shape[0] - 1)) // 2
124 matL = numpy.array(matL)
125 matB = numpy.array(matB)
127 if max_iter is None:
128 max_iter = matL.shape[0] - matL.shape[1]
130 if nbeq * 2 <= L.shape[0] * L.shape[1]:
131 if C is None and D is None:
132 warnings.warn( # pragma: no cover
133 "[voronoi_estimation_from_lr] Additional condition are required.")
134 if C is not None and D is not None:
135 matL = numpy.vstack([matL, numpy.zeros((1, matL.shape[1]))])
136 a = cl * L.shape[1]
137 b = a + L.shape[1]
138 matL[-1, a:b] = C
139 if not isinstance(D, float):
140 raise TypeError("D must be a float not {0}".format(type(D)))
141 matB = numpy.hstack([matB, [D]])
142 elif C is None and D is None:
143 pass # pragma: no cover
144 else:
145 raise ValueError(
146 "C and D must be None together or not None together.")
148 sample_weight = numpy.ones((matL.shape[0],))
149 tol = numpy.abs(matL.ravel()).max() * 1e-8 / matL.shape[0]
150 order_removed = []
151 removed = set()
152 for it in range(0, max(max_iter, 1)):
154 if qr:
155 clr = QuantileLinearRegression(
156 fit_intercept=False, max_iter=max(matL.shape))
157 else:
158 clr = LinearRegression(fit_intercept=False)
160 clr.fit(matL, matB, sample_weight=sample_weight)
161 score = clr.score(matL, matB, sample_weight)
163 res = clr.coef_
164 res = res.reshape(L.shape)
166 # early stopping
167 if score < tol:
168 if verbose:
169 print('[voronoi_estimation_from_lr] iter={0}/{1} score={2} tol={3}'.format(
170 it + 1, max_iter, score, tol))
171 break
173 # defines the best pair of points to remove
174 dist2 = pairwise_distances(res, res)
175 dist = [(d, n // dist2.shape[0], n % dist2.shape[1])
176 for n, d in enumerate(dist2.ravel())]
177 dist = [_ for _ in dist if _[1] < _[2]]
178 dist.sort(reverse=True)
180 # test equal points
181 if dist[-1][0] < tol:
182 _, i, j = dist[-1]
183 eq1 = labels_inv[i, j, 'eq1']
184 eq2 = labels_inv[i, j, 'eq2']
185 if sample_weight[eq1] == 0 and sample_weight[eq2] == 0:
186 sample_weight[eq1] = 1
187 sample_weight[eq2] = 1
188 nb_constraints[i] += 1
189 nb_constraints[j] += 1
190 else:
191 keep = (i, j)
192 pos = len(order_removed) - 1
193 while pos >= 0:
194 i, j = order_removed[pos]
195 if i in keep or j in keep:
196 eq1 = labels_inv[i, j, 'eq1']
197 eq2 = labels_inv[i, j, 'eq2']
198 if sample_weight[eq1] == 0 and sample_weight[eq2] == 0:
199 sample_weight[eq1] = 1
200 sample_weight[eq2] = 1
201 nb_constraints[i] += 1
202 nb_constraints[j] += 1
203 break
204 pos -= 1
205 if pos < 0:
206 raise VoronoiEstimationError( # pragma: no cover
207 'Two classes have been merged in a single Voronoi point '
208 '(dist={0} < {1}). max_iter should be lower than '
209 '{2}'.format(dist[-1][0], tol, it))
211 dmax, i, j = dist[0]
212 pos = 0
213 while (i, j) in removed or nb_constraints[i] == 0 or nb_constraints[j] == 0:
214 pos += 1
215 if pos == len(dist):
216 break
217 dmax, i, j = dist[pos]
218 if pos == len(dist):
219 break
220 removed.add((i, j))
221 order_removed.append((i, j))
222 eq1 = labels_inv[i, j, 'eq1']
223 eq2 = labels_inv[i, j, 'eq2']
224 sample_weight[eq1] = 0
225 sample_weight[eq2] = 0
226 nb_constraints[i] -= 1
227 nb_constraints[j] -= 1
229 if verbose:
230 print('[voronoi_estimation_from_lr] iter={0}/{1} score={2:.3g} tol={3:.3g} del P{4},{5} d={6:.3g}'.format(
231 it + 1, max_iter, score, tol, i, j, dmax))
233 return res