Coverage for src/mlstatpy/graph/graph_distance.py: 97%

534 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-13 20:42 +0200

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief First approach for a edit distance between two graphs. 

5 

6See :ref:`l-graph_distance`. 

7""" 

8import copy 

9import re 

10 

11 

12class _Vertex: 

13 

14 __slots__ = ('nb', 'label', 'weight') 

15 

16 def __init__(self, nb, label, weight): 

17 self.nb = nb # kind of id 

18 self.label = label # label 

19 self.weight = weight 

20 

21 

22class Vertex(_Vertex): 

23 """ 

24 Defines a vertex of a graph. 

25 """ 

26 

27 def __init__(self, nb, label, weight): 

28 """ 

29 constructor 

30 @param nb (int) index of the vertex 

31 @param label (str) label 

32 @para weight (float) 

33 """ 

34 _Vertex.__init__(self, nb, label, weight) 

35 self.pair = (None, None) 

36 self.edges = {} 

37 self.predE = {} 

38 self.succE = {} 

39 

40 def __str__(self): 

41 """ 

42 usual 

43 """ 

44 return '{}'.format(self.Label) 

45 

46 def __repr__(self): 

47 """ 

48 usual 

49 """ 

50 return "Vertex({}, {}, {})".format(repr(self.nb), repr(self.Label), self.weight) 

51 

52 def is_vertex(self): 

53 """ 

54 returns True 

55 """ 

56 return True 

57 

58 def is_edge(self): 

59 """ 

60 returns False 

61 """ 

62 return False 

63 

64 @property 

65 def Label(self): 

66 """ 

67 returns the label 

68 """ 

69 return self.label 

70 

71 

72class _Edge: 

73 

74 __slots__ = ('from_', 'to', 'label', 'weight', 'nb') 

75 

76 def __init__(self, from_, to, label, weight): 

77 self.from_, self.to = from_, to 

78 self.nb = from_, to # pylint: disable=E1101 

79 self.label = label 

80 

81 

82class Edge(_Edge): 

83 """ 

84 Defines an edge of a graph. 

85 """ 

86 

87 def __init__(self, from_, to, label, weight): 

88 """ 

89 @param from_ (int) 

90 @param to (int) 

91 @param label (str) 

92 @param weight (float) 

93 

94 ``'00'`` means the beginning of a graph, ``'11'`` the end. 

95 """ 

96 _Edge.__init__(self, from_, to, label, weight) 

97 self.pair = (None, None) 

98 self.weight = weight 

99 if self.from_ == "00" and self.to == "00": 

100 raise AssertionError("should not happen") # pragma: no cover 

101 if self.from_ == "11" and self.to == "11": 

102 raise AssertionError("should not happen") # pragma: no cover 

103 

104 def __str__(self): 

105 """ 

106 usual 

107 """ 

108 return "{} -> {} [{}]".format( 

109 self.nb[0], self.nb[1], self.Label) # pylint: disable=E1101 

110 

111 def __repr__(self): 

112 """ 

113 usual 

114 """ 

115 return "Edge({}, {}, {}, {})".format( 

116 repr(self.nb[0]), repr(self.nb[1]), # pylint: disable=E1101 

117 repr(self.Label), self.weight) # pylint: disable=E1101 

118 

119 def is_vertex(self): 

120 """ 

121 returns False 

122 """ 

123 return False 

124 

125 def is_edge(self): 

126 """ 

127 returns True 

128 """ 

129 return True 

130 

131 @property 

132 def Label(self): 

133 """ 

134 returns the label 

135 """ 

136 return self.label 

137 

138 

139class GraphDistance: 

140 """ 

141 Defines a graph to compute a distance between two graphs. 

142 

143 .. exref:: 

144 :title: Compute a distance between two graphs. 

145 

146 See :ref:`l-graph_distance`. 

147 

148 .. runpython:: 

149 :showcode: 

150 

151 import copy 

152 from mlstatpy.graph import GraphDistance 

153 

154 # We define two graphs as list of edges. 

155 graph1 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"), 

156 ("c", "d"), ("d", "e"), ("0", "b")] 

157 graph2 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"), 

158 ("c", "t"), ("t", "d"), ("d", "e"), ("d", "g")] 

159 

160 # We convert them into objects GraphDistance. 

161 graph1 = GraphDistance(graph1) 

162 graph2 = GraphDistance(graph2) 

163 

164 distance, graph = graph1.distance_matching_graphs_paths(graph2, use_min=False) 

165 

166 print("distance", distance) 

167 print("common paths:", graph) 

168 """ 

169 

170 # graph is a dictionary 

171 @staticmethod 

172 def get_list_of_vertices(graph): 

173 edges = [edge[:2] for edge in graph] 

174 unique = {} 

175 for i, j in edges: 

176 unique[i] = unique[j] = 1 

177 vertices = list(unique.keys()) 

178 vertices.sort() 

179 return vertices 

180 

181 def __init__(self, edge_list, vertex_label=None, add_loop=False, 

182 weight_vertex=1., weight_edge=1.): 

183 """ 

184 constructor 

185 

186 @param edge_list list of edges 

187 @param add_loop automatically add a loop on each vertex (an edge from a vertex to itself) 

188 @param weight_vertex weight for every vertex 

189 @param weight_edge weight for every edge 

190 """ 

191 if vertex_label is None: 

192 vertex_label = {} 

193 if isinstance(edge_list, str): 

194 self.load_from_file(edge_list, add_loop) 

195 else: 

196 self.vertices = {} 

197 self.edges = {} 

198 self.labelBegin = "00" 

199 self.labelEnd = "11" 

200 vid = GraphDistance.get_list_of_vertices(edge_list) 

201 for u in vid: 

202 self.vertices[u] = Vertex( 

203 u, vertex_label.get(u, str(u)), weight_vertex) 

204 for e in edge_list: 

205 i, j = e[:2] 

206 ls = "" if len(e) < 3 else e[2] 

207 self.edges[i, j] = Edge(i, j, str(ls), weight_edge) 

208 self._private__init__(add_loop, weight_vertex, weight_edge) 

209 

210 def __getitem__(self, index): 

211 """ 

212 returns a vertex or an edge if no vertex with the given index was found 

213 @param index id (index) to look for 

214 @return Vertex or Edge 

215 """ 

216 if isinstance(index, str): 

217 return self.vertices[index] 

218 if isinstance(index, tuple): 

219 return self.edges[index] 

220 raise KeyError( # pragma: no cover 

221 "unable to get element " + str(index)) 

222 

223 @staticmethod 

224 def load_from_file(filename, add_loop): 

225 """ 

226 loads a graph from a file 

227 @param filename file name 

228 @param add_loop @see me __init__ 

229 """ 

230 lines = open(filename, "r").readlines() # pylint: disable=R1732,W1514 

231 regV = re.compile("\\\"?([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]") 

232 regE = re.compile("\\\"?([a-z0-9_]+)\\\"? *-> *\\\"?" + 

233 "([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]") 

234 edge_list = [] 

235 vertex_label = {} 

236 for line in lines: 

237 line = line.strip("\r\n ;") 

238 ed = regE.search(line) 

239 ve = regV.search(line) 

240 if ed: 

241 g = ed.groups() 

242 edge_list.append((g[0], g[1], g[2])) 

243 elif ve: 

244 g = ve.groups() 

245 vertex_label[g[0]] = g[1] 

246 if len(vertex_label) == 0 or len(edge_list) == 0: 

247 raise OSError( # pragma: no cover 

248 "Unable to parse file %r." % filename) 

249 return GraphDistance(edge_list, vertex_label, add_loop) 

250 

251 def _private__init__(self, add_loop, weight_vertex, weight_edge): 

252 if add_loop: 

253 for k in self.vertices: 

254 if k not in (self.labelBegin, self.labelEnd): 

255 self.edges[k, k] = Edge(k, k, "", weight_edge) 

256 self.connect_root_and_leave(weight_vertex, weight_edge) 

257 self.compute_predecessor() 

258 self.compute_successor() 

259 

260 def connect_root_and_leave(self, weight_vertex, weight_edge): 

261 order = self.get_order_vertices() 

262 roots = [v for v, k in order.items() if k == 0] 

263 vert = {} 

264 for o in order: 

265 vert[o] = 0 

266 for k in self.edges: 

267 if k[0] != k[1]: 

268 vert[k[0]] += 1 

269 for r in roots: 

270 if self.labelBegin not in self.vertices: 

271 self.vertices[self.labelBegin] = Vertex( 

272 self.labelBegin, self.labelBegin, weight_vertex) 

273 if r != self.labelBegin: 

274 self.edges[self.labelBegin, r] = Edge( 

275 self.labelBegin, r, "", weight_edge) 

276 

277 leaves = [k for k, v in vert.items() if v == 0] 

278 for r in leaves: 

279 if self.labelEnd not in self.vertices: 

280 self.vertices[self.labelEnd] = Vertex( 

281 self.labelEnd, self.labelEnd, weight_vertex) 

282 if r != self.labelEnd: 

283 self.edges[r, self.labelEnd] = Edge( 

284 r, self.labelEnd, "", weight_edge) 

285 

286 def get_order_vertices(self): 

287 edges = self.edges 

288 order = {} 

289 for k in edges: 

290 order[k[0]] = 0 

291 order[k[1]] = 0 

292 

293 modif = 1 

294 while modif > 0: 

295 modif = 0 

296 for k in edges: 

297 i, j = k 

298 if i != j and order[j] <= order[i]: 

299 order[j] = order[i] + 1 

300 modif += 1 

301 

302 return order 

303 

304 def __str__(self): 

305 """ 

306 usual 

307 """ 

308 li = [] 

309 for v in self.vertices.values(): 

310 li.append(str(v)) 

311 for k, e in self.edges.items(): 

312 li.append(str(e)) 

313 return "\n".join(li) 

314 

315 def __repr__(self): 

316 """ 

317 usual 

318 """ 

319 edges = ", ".join(repr(v) for _, v in sorted(self.edges.items())) 

320 vertices = ", ".join("'{}': {}".format(k, repr(v)) 

321 for k, v in sorted(self.vertices.items())) 

322 return "GraphDistance(\n [{0}],\n {{{1}}})".format(edges, vertices) 

323 

324 def compute_predecessor(self): 

325 """ 

326 usual 

327 """ 

328 pred = {} 

329 for i, j in self.edges: 

330 if j not in pred: 

331 pred[j] = {} 

332 pred[j][i, j] = 0 

333 for k, v in pred.items(): 

334 for n in v: 

335 self.vertices[k].predE[n] = self.edges[n] 

336 

337 def compute_successor(self): 

338 succ = {} 

339 for i, j in self.edges: 

340 if i not in succ: 

341 succ[i] = {} 

342 succ[i][i, j] = i, j 

343 for k, v in succ.items(): 

344 for n in v: 

345 self.vertices[k].succE[n] = self.edges[n] 

346 

347 def get_matching_functions(self, function_mach_vertices, function_match_edges, 

348 cost=False): 

349 """ 

350 returns default matching functions between two vertices and two edges 

351 @param function_mach_vertices if not None, this function is returned, othewise, it returns a new fonction. 

352 See below. 

353 @param function_match_edges if not None, this function is returned, othewise, it returns a new fonction. 

354 See below. 

355 @param cost if True, the returned function should return a float, otherwise a boolean 

356 @return a pair of functions 

357 

358 Example for * if cost is False: 

359 

360 :: 

361 

362 lambda v1,v2,g1,g2,w1,w2 : v1.label == v2.label 

363 

364 Example for *function_mach_vertices* if cost is True: 

365 

366 :: 

367 

368 def tempF1 (v1,v2,g1,g2,w1,w2) : 

369 if v1 is not None and not v1.is_vertex() : raise TypeError("should be a vertex") 

370 if v2 is not None and not v2.is_vertex() : raise TypeError("should be a vertex") 

371 if v1 is None and v2 is None : return 0 

372 elif v1 is None or v2 is None : 

373 return v2.weight*w2 if v1 is None else v1.weight*w1 

374 else : 

375 return 0 if v1.label == v2.label else 0.5*(v1.weight*w1 + v2.weight*w2) 

376 

377 Example for *function_match_edges* if cost is False: 

378 

379 :: 

380 

381 lambda e1,e2,g1,g2,w1,w2 : e1.label == e2.label and 

382 (e1.from_ != e1.to or e2.from_ != e2.to) and 

383 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and 

384 (e1.from_ != self.labelEnd or e1.to != self.labelEnd) 

385 

386 Example if cost is True: 

387 

388 :: 

389 

390 def tempF2 (e1,e2,g1,g2,w1,w2) : 

391 if e1 is not None and not e1.is_edge() : raise TypeError("should be an edge") 

392 if e2 is not None and not e2.is_edge() : raise TypeError("should be an edge") 

393 if e1 is None and e2 is None : return 0 

394 elif e1 is None or e2 is None : 

395 return e2.weight*w2 if e1 is None else e1.weight*w1 

396 elif e1.label != e2.label : return 0.5*(e1.weight*w1 + e2.weight*w2) 

397 else : 

398 lab1 = g1.vertices [e1.from_].label == g2.vertices [e2.from_].label 

399 lab2 = g1.vertices [e1.to].label == g2.vertices [e2.to].label 

400 if lab1 and lab2 : return 0 

401 else : return e1.weight*w1 + e2.weight*w2 

402 

403 """ 

404 if cost: 

405 

406 if function_mach_vertices is None: 

407 def tempF1_vertex(v1, v2, g1, g2, w1, w2): 

408 if v1 is None: 

409 if v2 is None: 

410 return 0. 

411 if not v2.is_vertex(): 

412 raise TypeError( # pragma: no cover 

413 "v2 should be a vertex") 

414 return v2.weight * w2 

415 elif v2 is None: 

416 if not v1.is_vertex(): 

417 raise TypeError( # pragma: no cover 

418 "v1 should be a vertex") 

419 if not v1.is_vertex(): 

420 raise TypeError( # pragma: no cover 

421 "v1 should be a vertex") 

422 return v1.weight * w1 

423 else: 

424 if not v1.is_vertex(): 

425 raise TypeError( # pragma: no cover 

426 "v1 should be a vertex") 

427 if not v2.is_vertex(): 

428 raise TypeError( # pragma: no cover 

429 "v2 should be a vertex") 

430 return ( 

431 0 if v1.label == v2.label 

432 else 0.5 * (v1.weight * w1 + v2.weight * w2)) 

433 

434 function_mach_vertices = tempF1_vertex 

435 

436 if function_match_edges is None: 

437 def tempF2_edge(e1, e2, g1, g2, w1, w2): 

438 if e1 is not None and not e1.is_edge(): 

439 raise TypeError("should be an edge") 

440 if e2 is not None and not e2.is_edge(): 

441 raise TypeError("should be an edge") 

442 if e1 is None and e2 is None: 

443 return 0 

444 elif e1 is None or e2 is None: 

445 return e2.weight * w2 if e1 is None else e1.weight * w1 

446 elif e1.label != e2.label: 

447 return 0.5 * (e1.weight * w1 + e2.weight * w2) 

448 else: 

449 lab1 = g1.vertices[e1.from_].label == g2.vertices[e2.from_].label 

450 lab2 = g1.vertices[e1.to].label == g2.vertices[e2.to].label 

451 if lab1 and lab2: 

452 return 0 

453 else: 

454 return e1.weight * w1 + e2.weight * w2 

455 

456 function_match_edges = tempF2_edge 

457 else: 

458 if function_mach_vertices is None: 

459 function_mach_vertices = \ 

460 lambda v1, v2, g1, g2, w1, w2: \ 

461 v1.label == v2.label 

462 if function_match_edges is None: 

463 function_match_edges = \ 

464 lambda e1, e2, g1, g2, w1, w2: \ 

465 e1.label == e2.label and \ 

466 (e1.from_ != e1.to or e2.from_ != e2.to) and \ 

467 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and \ 

468 (e1.from_ != self.labelEnd or e1.to != self.labelEnd) 

469 return function_mach_vertices, function_match_edges 

470 

471 def common_paths(self, graph2, 

472 function_mach_vertices=None, 

473 function_match_edges=None, 

474 noClean=False): 

475 function_mach_vertices, function_match_edges = \ 

476 self.get_matching_functions( 

477 function_mach_vertices, function_match_edges) 

478 g = GraphDistance([]) 

479 vfirst = Vertex(self.labelBegin, "%s-%s" % (self.labelBegin, self.labelBegin), 

480 (self.vertices[self.labelBegin].weight + 

481 graph2.vertices[self.labelBegin].weight) / 2) 

482 g.vertices[self.labelBegin] = vfirst 

483 vfirst.pair = self.vertices[ 

484 self.labelBegin], graph2.vertices[self.labelBegin] 

485 

486 modif = 1 

487 while modif > 0: 

488 modif = 0 

489 add = {} 

490 for k, v in g.vertices.items(): 

491 

492 v1, v2 = v.pair 

493 if len(v.succE) == 0: 

494 for e1 in v1.succE: 

495 for e2 in v2.succE: 

496 oe1 = self.edges[e1] 

497 oe2 = graph2.edges[e2] 

498 if function_match_edges(oe1, oe2, self, graph2, 1., 1.): 

499 tv1 = self.vertices[oe1.to] 

500 tv2 = graph2.vertices[oe2.to] 

501 if function_mach_vertices(tv1, tv2, self, graph2, 1., 1.): 

502 # we have a match 

503 ii = "%s-%s" % (tv1.nb, tv2.nb) 

504 if tv1.nb == self.labelEnd and tv2.nb == self.labelEnd: 

505 ii = self.labelEnd 

506 lab = "%s-%s" % (tv1.label, tv2.label) \ 

507 if tv1.label != tv2.label else tv1.label 

508 tv = Vertex( 

509 ii, lab, (tv1.weight + tv2.weight) / 2) 

510 lab = "%s-%s" % (oe1.label, oe2.label) \ 

511 if oe1.label != oe2.label else oe1.label 

512 ne = Edge(v.nb, tv.nb, lab, 

513 (oe1.weight + oe2.weight) / 2) 

514 add[tv.nb] = tv 

515 g.edges[ne.from_, ne.to] = ne 

516 ne.pair = oe1, oe2 

517 tv.pair = tv1, tv2 

518 v.succE[ne.from_, ne.to] = ne 

519 modif += 1 

520 for k, v in add.items(): 

521 g.vertices[k] = v 

522 

523 if not noClean: 

524 # g.connect_root_and_leave() 

525 g.compute_predecessor() 

526 g.clean_dead_ends() 

527 return g 

528 

529 def clean_dead_ends(self): 

530 edgesToKeep = {} 

531 verticesToKeep = {} 

532 if self.labelEnd in self.vertices: 

533 v = self.vertices[self.labelEnd] 

534 verticesToKeep[v.nb] = False 

535 

536 modif = 1 

537 while modif > 0: 

538 modif = 0 

539 add = {} 

540 for k, v in verticesToKeep.items(): 

541 if v: 

542 continue 

543 modif += 1 

544 verticesToKeep[k] = True 

545 for pred, vv in self.vertices[k].predE.items(): 

546 edgesToKeep[pred] = True 

547 add[vv.from_] = verticesToKeep.get(vv.from_, False) 

548 for k, v in add.items(): 

549 verticesToKeep[k] = v 

550 

551 remove = {} 

552 for k in self.vertices: 

553 if k not in verticesToKeep: 

554 remove[k] = True 

555 for k in remove: 

556 del self.vertices[k] 

557 

558 remove = {} 

559 for k in self.edges: 

560 if k not in edgesToKeep: 

561 remove[k] = True 

562 for k in remove: 

563 del self.edges[k] 

564 else: 

565 self.vertices = {} 

566 self.edges = {} 

567 

568 def enumerate_all_paths(self, edges_and_vertices, begin=None): 

569 if begin is None: 

570 begin = [] 

571 if len(self.vertices) > 0 and len(self.edges) > 0: 

572 if edges_and_vertices: 

573 last = begin[-1] if len(begin) > 0 \ 

574 else self.vertices[self.labelBegin] 

575 else: 

576 last = self.vertices[begin[-1].to] if len(begin) > 0 \ 

577 else self.vertices[self.labelBegin] 

578 

579 if edges_and_vertices and len(begin) == 0: 

580 begin = [last] 

581 

582 for ef in last.succE: 

583 e = self.edges[ef] 

584 path = copy.copy(begin) 

585 v = self.vertices[e.to] 

586 if e.to == e.from_: 

587 # cycle 

588 continue 

589 path.append(e) 

590 if edges_and_vertices: 

591 path.append(v) 

592 if v.label == self.labelEnd: 

593 yield path 

594 else: 

595 for p in self.enumerate_all_paths(edges_and_vertices, path): 

596 yield p 

597 

598 def edit_distance_path(self, p1, p2, g1, g2, 

599 function_mach_vertices=None, 

600 function_match_edges=None, 

601 use_min=False, 

602 debug=False, 

603 cache=None): 

604 """ 

605 Tries to align two paths from two graphs. 

606 

607 @param p1 path 1 (from g1) 

608 @param p2 path 2 (from g2) 

609 @param g1 graph 1 

610 @param g2 graph 2 

611 @param function_mach_vertices function which gives a distance bewteen two vertices, 

612 if None, it take the output of @see me get_matching_functions 

613 @param function_match_edges function which gives a distance bewteen two edges, 

614 if None, it take the output of @see me get_matching_functions 

615 @param use_min the returned is based on a edit distance, if this parameter is True, the returned value will be: 

616 

617 :: 

618 

619 if use_min : 

620 n = min (len(p1), len(p2)) 

621 d = d*1.0 / n if n > 0 else 0 

622 

623 @param debug unused 

624 @param cache to cache the costs 

625 @return 2-uple: distance, aligned path 

626 """ 

627 def edge_vertex_match(x, y, g1, g2, w1, w2): 

628 if x is None: 

629 if y is None: 

630 raise RuntimeError( 

631 "Both x and y are None.") 

632 return y.weight * w2 

633 elif y is None: 

634 return x.weight * w1 

635 return (x.weight * w1 + y.weight * w2) / 2 

636 

637 def cache_cost(func, a, b, g1, g2, w1, w2): 

638 if cache is None: 

639 return func(a, b, g1, g2, w1, w2) 

640 key = (id(a), id(b), w1, w2) 

641 if key in cache: 

642 return cache[key] 

643 cost = func(a, b, g1, g2, w1, w2) 

644 cache[key] = cost 

645 return cost 

646 

647 function_mach_vertices, function_match_edges = ( 

648 self.get_matching_functions( 

649 function_mach_vertices, function_match_edges, True)) 

650 dist = {(-1, -1): (0, None, None)} 

651 

652 if use_min: 

653 w1 = 1.0 / len(p1) 

654 w2 = 1.0 / len(p2) 

655 else: 

656 w1 = 1. 

657 w2 = 1. 

658 

659 p2l = list(enumerate(p2)) 

660 for i1, eorv1 in enumerate(p1): 

661 ed1 = eorv1.is_edge() 

662 ve1 = eorv1.is_vertex() 

663 for i2, eorv2 in p2l: 

664 np = i1, i2 

665 posit = [((i1 - 1, i2 - 1), (eorv1, eorv2)), 

666 ((i1 - 1, i2), (eorv1, None)), 

667 ((i1, i2 - 1), (None, eorv2))] 

668 

669 if ed1 and eorv2.is_edge(): 

670 func = function_match_edges 

671 elif ve1 and eorv2.is_vertex(): 

672 func = function_mach_vertices 

673 else: 

674 func = edge_vertex_match 

675 

676 for p, co in posit: 

677 if p in dist: 

678 c0 = dist[p][0] 

679 c1 = cache_cost(func, co[0], co[1], g1, g2, w1, w2) 

680 c = c0 + c1 

681 if np not in dist or c < dist[np][0]: 

682 dist[np] = (c, p, co, (c0, c1)) 

683 

684 last = dist[len(p1) - 1, len(p2) - 1] 

685 path = [] 

686 while last[1] is not None: 

687 path.append(last) 

688 last = dist[last[1]] 

689 

690 path.reverse() 

691 

692 d = dist[len(p1) - 1, len(p2) - 1][0] 

693 if use_min: 

694 n = min(len(p1), len(p2)) 

695 d = d * 1.0 / n if n > 0 else 0 

696 return d, path 

697 

698 def private_count_left_right(self, valuesInList): 

699 countLeft = {} 

700 countRight = {} 

701 for k, v in valuesInList: 

702 i, j = v 

703 if i not in countRight: 

704 countRight[i] = {} 

705 countRight[i][j] = countRight[i].get(j, 0) + 1 

706 if j not in countLeft: 

707 countLeft[j] = {} 

708 countLeft[j][i] = countLeft[j].get(i, 0) + 1 

709 return countLeft, countRight 

710 

711 def private_kruskal_matrix(self, matrix, reverse): 

712 countLeft, countRight = self.private_count_left_right(matrix) 

713 cleft, cright = len(countLeft), len(countRight) 

714 matrix.sort(reverse=reverse) 

715 count = max(max(sum(_.values()) for _ in countRight.values()), 

716 max(sum(_.values()) for _ in countLeft.values())) 

717 while count > 1: 

718 k, v = matrix.pop() 

719 i, j = v 

720 countRight[i][j] -= 1 

721 countLeft[j][i] -= 1 

722 count = max(max(max(_.values()) for _ in countRight.values()), 

723 max(max(_.values()) for _ in countLeft.values())) 

724 

725 mini = min(cleft, cright) 

726 if len(matrix) < mini: 

727 raise Exception("impossible: the smallest set should get all" + 

728 "its element associated to at least one coming from the other set") 

729 

730 def _private_string_path_matching(self, path, skipEdge=False): 

731 temp = [] 

732 for p in path: 

733 u, v = p[2] 

734 if skipEdge and ((u is not None and u.is_edge()) or 

735 (v is not None and v.is_edge())): 

736 continue 

737 su = "-" if u is None else str(u.nb) 

738 sv = "-" if v is None else str(v.nb) 

739 s = "(%s,%s)" % (su, sv) 

740 temp.append(s) 

741 return " ".join(temp) 

742 

743 def distance_matching_graphs_paths(self, graph2, 

744 function_mach_vertices=None, 

745 function_match_edges=None, 

746 noClean=False, 

747 store=None, 

748 use_min=True, 

749 weight_vertex=1., 

750 weight_edge=1., 

751 verbose=0, fLOG=print): 

752 """ 

753 Computes an alignment between two graphs. 

754 

755 @param graph2 the other graph 

756 @param function_mach_vertices function which gives a distance bewteen two vertices, 

757 if None, it take the output of @see me get_matching_functions 

758 @param function_match_edges function which gives a distance bewteen two edges, 

759 if None, it take the output of @see me get_matching_functions 

760 @param noClean if True, clean unmatched vertices and edges 

761 @param store if None, does nothing, if it is a dictionary, the function will store here various 

762 information about how th matching was operated 

763 @param use_min @see me edit_distance_path 

764 @param weight_vertex a weight for every vertex 

765 @param weight_edge a weight for every edge 

766 @param verbose display some progress with :epkg:`tqdm` 

767 @param fLOG logging functino 

768 @return 2 tuple (a distance, a graph containing the aligned paths between the two graphs) 

769 

770 See :ref:`l-graph_distance`. 

771 """ 

772 

773 function_mach_vertices, function_match_edges = ( 

774 self.get_matching_functions( 

775 function_mach_vertices, function_match_edges, True)) 

776 

777 paths1 = list(self.enumerate_all_paths(True)) 

778 paths2 = list(graph2.enumerate_all_paths(True)) 

779 

780 if store is not None: 

781 store["nbpath1"] = len(paths1) 

782 store["nbpath2"] = len(paths2) 

783 

784 matrix_distance = {} 

785 if verbose > 0 and fLOG is not None: 

786 fLOG("[distance_matching_graphs_paths] builds matrix_distance") 

787 from tqdm import tqdm 

788 loop1 = tqdm(list(enumerate(paths1))) 

789 else: 

790 loop1 = enumerate(paths1) 

791 loop2 = list(enumerate(paths2)) 

792 if verbose > 0 and fLOG is not None: 

793 fLOG("[distance_matching_graphs_paths] len(loop1)=%d" % len( 

794 list(enumerate(paths1)))) 

795 fLOG("[distance_matching_graphs_paths] len(loop2)=%d" % len(loop2)) 

796 cache = {} 

797 for i1, p1 in loop1: 

798 for i2, p2 in loop2: 

799 matrix_distance[i1, i2] = self.edit_distance_path( 

800 p1, p2, self, graph2, function_mach_vertices, 

801 function_match_edges, use_min=use_min, 

802 cache=cache) 

803 if verbose > 0 and fLOG is not None: 

804 fLOG("[distance_matching_graphs_paths] len(cache)=%d" % len(cache)) 

805 

806 if store is not None: 

807 store["matrix_distance"] = matrix_distance 

808 reduction = [(v[0], k) for k, v in matrix_distance.items()] 

809 if store is not None: 

810 store["path_mat1"] = copy.deepcopy(reduction) 

811 if verbose > 0 and fLOG is not None: 

812 fLOG("[distance_matching_graphs_paths] private_kruskal_matrix") 

813 self.private_kruskal_matrix(reduction, False) 

814 if store is not None: 

815 store["path_mat2"] = copy.deepcopy(reduction) 

816 

817 if verbose > 0 and fLOG is not None: 

818 fLOG("[distance_matching_graphs_paths] pair_count_vertex") 

819 pair_count_edge = {} 

820 pair_count_vertex = {} 

821 for k, v in reduction: 

822 path = matrix_distance[v][1] 

823 for el in path: 

824 n1, n2 = el[2] 

825 if n1 is not None and n2 is not None: 

826 if n1.is_edge() and n2.is_edge(): 

827 add = n1.nb, n2.nb 

828 pair_count_edge[add] = pair_count_edge.get(add, 0) + 1 

829 elif n1.is_vertex() and n2.is_vertex(): 

830 add = n1.nb, n2.nb 

831 pair_count_vertex[ 

832 add] = pair_count_vertex.get(add, 0) + 1 

833 

834 if store is not None: 

835 store["pair_count_vertex"] = pair_count_vertex 

836 store["pair_count_edge"] = pair_count_edge 

837 

838 reduction_edge = [(v, k) for k, v in pair_count_edge.items()] 

839 if store is not None: 

840 store["edge_mat1"] = copy.copy(reduction_edge) 

841 self.private_kruskal_matrix(reduction_edge, True) 

842 if store is not None: 

843 store["edge_mat2"] = copy.copy(reduction_edge) 

844 

845 reduction_vertex = [(v, k) for k, v in pair_count_vertex.items()] 

846 if store is not None: 

847 store["vertex_mat1"] = copy.copy(reduction_vertex) 

848 self.private_kruskal_matrix(reduction_vertex, True) 

849 if store is not None: 

850 store["vertex_mat2"] = copy.copy(reduction_vertex) 

851 

852 if verbose > 0 and fLOG is not None: 

853 fLOG("[distance_matching_graphs_paths] private_count_left_right") 

854 count_edge_left, count_edge_right = self.private_count_left_right( 

855 reduction_edge) 

856 count_vertex_left, count_vertex_right = self.private_count_left_right( 

857 reduction_vertex) 

858 

859 res_graph = GraphDistance([]) 

860 doneVertex = {} 

861 done_edge = {} 

862 

863 if verbose > 0 and fLOG is not None: 

864 fLOG("[distance_matching_graphs_paths] builds merged graph") 

865 for k, v in self.vertices.items(): 

866 newv = Vertex(v.nb, v.label, weight_vertex) 

867 res_graph.vertices[k] = newv 

868 if v.nb in count_vertex_right: 

869 ind = list(count_vertex_right[v.nb].keys())[0] 

870 newv.pair = (v, graph2.vertices[ind]) 

871 doneVertex[ind] = newv 

872 if newv.pair[0].label != newv.pair[1].label: 

873 newv.label = "%s|%s" % ( 

874 newv.pair[0].label, newv.pair[1].label) 

875 else: 

876 newv.pair = (v, None) 

877 

878 for k, v in graph2.vertices.items(): 

879 if k in doneVertex: 

880 continue 

881 newv = Vertex("2a.%s" % v.nb, v.label, weight_vertex) 

882 res_graph.vertices[newv.nb] = newv 

883 newv.pair = (None, v) 

884 

885 for k, e in self.edges.items(): 

886 newe = Edge(e.from_, e.to, e.label, weight_edge) 

887 res_graph.edges[k] = newe 

888 if e.nb in count_edge_right: 

889 ind = list(count_edge_right[e.nb].keys())[0] 

890 newe.pair = (e, graph2.edges[ind]) 

891 done_edge[ind] = newe 

892 else: 

893 newe.pair = (e, None) 

894 

895 for k, e in graph2.edges.items(): 

896 if k in done_edge: 

897 continue 

898 from_ = list(count_vertex_left[e.from_].keys())[0] if e.from_ in count_vertex_left \ 

899 else "2a.%s" % e.from_ 

900 to = list(count_vertex_left[e.to].keys())[0] if e.to in count_vertex_left \ 

901 else "2a.%s" % e.to 

902 if from_ not in res_graph.vertices: 

903 raise RuntimeError("should not happen " + 

904 from_) # pragma: no cover 

905 if to not in res_graph.vertices: 

906 raise RuntimeError("should not happen " + 

907 to) # pragma: no cover 

908 newe = Edge(from_, to, e.label, weight_edge) 

909 res_graph.edges[newe.nb] = newe # pylint: disable=E1101 

910 newe.pair = (None, e) 

911 

912 if verbose > 0 and fLOG is not None: 

913 fLOG( 

914 "[distance_matching_graphs_paths] compute_predecessor, compute_successor") 

915 res_graph.compute_predecessor() 

916 res_graph.compute_successor() 

917 

918 allPaths = list(res_graph.enumerate_all_paths(True)) 

919 

920 temp = [sum(0 if None in _.pair else 1 for _ in p) * 1.0 / len(p) 

921 for p in allPaths] 

922 distance = 1.0 - 1.0 * sum(temp) / len(allPaths) 

923 

924 return distance, res_graph 

925 

926 def draw_vertices_edges(self): 

927 vertices = [] 

928 edges = [] 

929 for k, v in self.vertices.items(): 

930 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None): 

931 vertices.append((k, v.label)) 

932 elif v.pair[1] is None: 

933 vertices.append((k, "-" + v.label, "red")) 

934 elif v.pair[0] is None: 

935 vertices.append((k, "+" + v.label, "green")) 

936 else: 

937 raise RuntimeError("?") # pragma: no cover 

938 

939 for k, v in self.edges.items(): 

940 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None): 

941 edges.append((v.from_, v.to, v.label)) 

942 elif v.pair[1] is None: 

943 edges.append((v.from_, v.to, "-" + v.label, "red")) 

944 elif v.pair[0] is None: 

945 edges.append((v.from_, v.to, "+" + v.label, "green")) 

946 else: 

947 raise RuntimeError("?") # pragma: no cover 

948 

949 return vertices, edges