Scippy

GCG

Branch-and-Price & Column Generation for Everyone

graphalgorithms_def.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program */
4 /* GCG --- Generic Column Generation */
5 /* a Dantzig-Wolfe decomposition based extension */
6 /* of the branch-cut-and-price framework */
7 /* SCIP --- Solving Constraint Integer Programs */
8 /* */
9 /* Copyright (C) 2010-2021 Operations Research, RWTH Aachen University */
10 /* Zuse Institute Berlin (ZIB) */
11 /* */
12 /* This program is free software; you can redistribute it and/or */
13 /* modify it under the terms of the GNU Lesser General Public License */
14 /* as published by the Free Software Foundation; either version 3 */
15 /* of the License, or (at your option) any later version. */
16 /* */
17 /* This program is distributed in the hope that it will be useful, */
18 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
19 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
20 /* GNU Lesser General Public License for more details. */
21 /* */
22 /* You should have received a copy of the GNU Lesser General Public License */
23 /* along with this program; if not, write to the Free Software */
24 /* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.*/
25 /* */
26 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
27 
28 /**@file graphalgorithms_def.h
29  * @brief several metrics for graphs
30  * @author Martin Bergner
31  */
32 
33 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
34 
35 #ifndef GCG_GRAPHALGORITHMS_DEF_H_
36 #define GCG_GRAPHALGORITHMS_DEF_H_
37 
38 #include "graph/graphalgorithms.h"
39 #include <vector>
40 #include <algorithm>
41 #include <map>
42 #include <iostream>
43 #include "stdlib.h"
44 #include "graph/graph_tclique.h"
45 #include "graph/graph_gcg.h"
46 
47 
48 using std::vector;
49 
50 
51 namespace gcg {
52 
53 template <typename T>
54 double GraphAlgorithms<T>::cutoff = 0.0;
55 
56 
57 /** compute weighted sum of external degrees */
58 template<class T>
60  Hypergraph<T>& graph /**< the hypergraph */
61 )
62 {
63  SCIP_Real soed = 0.0;
64  size_t nedges = graph.getNHyperedges();
65  vector<int> partition = vector<int>(graph.getPartition());
66 
67  for( size_t i = 0; i < nedges; ++i )
68  {
69  vector<int> nodes = graph.getHyperedgeNodes(i);
70  for( auto &it : nodes)
71  {
72  it = partition[it];
73  }
74  auto end = std::unique(nodes.begin(), nodes.end());
75  if( end - nodes.begin() > 1)
76  soed += ( end - nodes.begin())*graph.getHyperedgeWeight(i);
77  }
78  return soed;
79 }
80 
81 /** compute minimum hyperedge cut */
82 template<class T>
84  Hypergraph<T>& graph /**< the hypergraph */
85 )
86 {
87  SCIP_Real mincut = 0.0;
88  size_t nedges = graph.getNHyperedges();
89  vector<int> partition = vector<int>(graph.getPartition());
90 
91  for( size_t i = 0; i < nedges; ++i )
92  {
93  vector<int> nodes = graph.getHyperedgeNodes(i);
94  for( auto &it : nodes)
95  {
96  it = partition[it];
97  }
98  auto end = std::unique(nodes.begin(), nodes.end());
99 
100  if( end - nodes.begin() > 1)
101  mincut += graph.getHyperedgeWeight(i);
102  }
103 
104  return mincut;
105 }
106 
107 /** compute k-1 metric */
108 template<class T>
110  Hypergraph<T>& graph /**< the hypergraph */
111 )
112 {
113  SCIP_Real kmetric = 0.0;
114  size_t nedges = graph.getNHyperedges();
115  vector<int> partition = vector<int>(graph.getPartition());
116 
117  for( size_t i = 0; i < nedges; ++i )
118  {
119  vector<int> nodes = graph.getHyperedgeNodes(i);
120  for( auto &it : nodes)
121  {
122  it = partition[it];
123  }
124  auto end = std::unique(nodes.begin(), nodes.end());
125 
126  kmetric += ( end - nodes.begin() -1)*graph.getHyperedgeWeight(i);
127  }
128 
129  return kmetric;
130 }
131 
132 
133 /** run DBSCAN on the distance graph */
134 template<class T>
135 std::vector<int> GraphAlgorithms<T>::dbscan(
136  Graph<GraphGCG>& graph, /**< the graph with weighted edges */
137  double eps, /**< radius in which we search for the neighbors */
138  int minPts /**< minimum number of neighbors needed to define a core point (can be fixed to 4 as stated in the paper) */
139 )
140 {
141 
142  int n_nodes = graph.getNNodes();
143  std::vector<bool> visited(n_nodes, false);
144  std::vector<bool> is_core(n_nodes, false);
145  std::vector<int> labels(n_nodes, -1);
146  int curr_cluster = -1;
147 
148  // visit all the points and check for the core points
149  for(int i = 0; i < n_nodes; i++)
150  {
151  if(visited[i])
152  {
153  continue;
154  }
155  visited[i] = true;
156 
157  // check if i is a core point
158  std::vector<std::pair<int, double> > NeighborPts_row = graph.getNeighborWeights(i); // gets ALL connected points
159  std::vector<int> NeighborPts; // holds ONLY the neighbors within the radius
160  for(int j = 0; j < (int)NeighborPts_row.size(); j++)
161  {
162  if(NeighborPts_row[j].second <= eps)
163  {
164  NeighborPts.push_back(NeighborPts_row[j].first);
165  }
166 
167  }
168  if((int)NeighborPts.size() < minPts)
169  {
170  labels[i] = -1; // mark point as noise
171  }
172  else {
173  curr_cluster++;
174  expandCluster(graph, visited, is_core, labels, i, NeighborPts, curr_cluster, eps, minPts);
175  }
176  }
177 
178  // Add border points also. We assign the border point the same label as the first core point. This is basically arbitrary, but not random.
179  for(int i = 0; i < n_nodes; i++)
180  {
181  if(is_core[i])
182  {
183  continue;
184  }
185  std::vector<std::pair<int, double> > NeighborPts_row = graph.getNeighborWeights(i); // gets ALL connected points // holds ONLY the neighbors within the radius
186  for(int j = 0; j < (int)NeighborPts_row.size(); j++)
187  {
188  if(NeighborPts_row[j].second <= eps && is_core[NeighborPts_row[j].first])
189  {
190  labels[i] = labels[NeighborPts_row[j].first];
191  break;
192  }
193  }
194  }
195 
196  return labels;
197 }
198 
199 /** help function for DBSCAN */
200 template<class T>
202  Graph<T>& graph,
203  std::vector<bool>& visited,
204  std::vector<bool>& is_core,
205  std::vector<int>& labels,
206  int point,
207  std::vector<int>& NeighborPts,
208  int curr_cluster,
209  double eps,
210  int minPts
211 )
212 {
213  // add P to cluster C
214  labels[point] = curr_cluster;
215  is_core[point] = true;
216 
217  // for each point P' in NeighborPts
218  for(int i = 0; i < (int)NeighborPts.size(); i++)
219  {
220  int neighbor = NeighborPts[i];
221  // if P' is not visited {
222  if(!visited[neighbor])
223  {
224  // mark P' as visited
225  visited[neighbor] = true;
226  // NeighborPts' = regionQuery(P', eps)
227  std::vector<std::pair<int, double> > NeighborPts_tmp_row = graph.getNeighborWeights(neighbor); // gets ALL connected points
228  std::vector<int> NeighborPts_tmp; // holds ONLY the neighbors within the radius
229  for(int j = 0; j < (int)NeighborPts_tmp_row.size(); j++)
230  {
231  if(NeighborPts_tmp_row[j].second <= eps)
232  {
233  NeighborPts_tmp.push_back(NeighborPts_tmp_row[j].first);
234  }
235 
236  }
237  if((int)NeighborPts_tmp.size() >= minPts)
238  {
239  // NeighborPts = NeighborPts joined with NeighborPts'
240  NeighborPts.insert(NeighborPts.end(), NeighborPts_tmp.begin(), NeighborPts_tmp.end());
241  }
242 
243  }
244 
245  // if P' is not yet member of any cluster
246  // add P' to cluster C
247  if(labels[neighbor] < 0)
248  {
249  labels[neighbor] = curr_cluster;
250  is_core[neighbor] = true;
251  }
252  }
253 }
254 
255 /*
256  * template<class T>
257  * std::vector<int> GraphAlgorithms<T>::mst(
258  */
259 template<class T>
260 std::vector<int> GraphAlgorithms<T>::mst(
261  Graph<GraphGCG>& graph, /**< the graph with weighted edges */
262  double _cutoff, /**< threshold below which we cut the edges */
263  int minPts /**< minimum number of points needed in a cluster */
264 )
265 {
266  cutoff = _cutoff;
267  // Step 1: find a minimum spanning tree using Kruskal's algorithm
268 
269  int nnodes = graph.getNNodes();
270  std::vector<void*> edges(graph.getNEdges());
271  graph.getEdges(edges);
272 
273  vector<EdgeGCG> resultMST(nnodes-1);
274  int e = 0; // An index variable, used for resultMST
275  unsigned int j = 0; // An index variable, used for sorted edges
276 
277  // Step 1: Sort all the edges in non-decreasing order of their weight
278  // If we are not allowed to change the given graph, we can create a copy of
279  // array of edges
280  sort(edges.begin(), edges.end(), weightComp);
281 
282  // Create V subsets (one for each node)
283  vector<subset> subsetsmst(nnodes);
284 
285  // Create V subsets with single elements
286  for (int v = 0; v < nnodes; ++v)
287  {
288  subsetsmst[v].parent = v;
289  subsetsmst[v].rank = 0;
290  }
291 
292  // Number of edges to be taken is equal to V-1
293  while (e < nnodes - 1)
294  {
295  // Step 2: Pick the smallest edge. And increment the index
296  // for next iteration
297  if(j == edges.size()) break;
298  EdgeGCG next_edge = *(EdgeGCG *)(edges[j++]);
299  assert(next_edge.src < graph.getNNodes());
300  assert(next_edge.dest < graph.getNNodes());
301 
302  int x = mstfind(subsetsmst, next_edge.src); // O(tree height)
303  int y = mstfind(subsetsmst, next_edge.dest);
304 
305  // If including this edge does't cause cycle, include it
306  // in result and increment the index of result for next edge
307  if (x != y)
308  {
309  resultMST[e++] = next_edge;
310  mstunion(subsetsmst, x, y);
311  }
312  // Else discard the next_edge
313  }
314 
315  resultMST.resize(e);
316 
317  // Step 2: remove all the edges from the MST that are greater or equal to cutoff
318 
319  //std::vector<EdgeGCG> resultDBG = resultMST;
320  resultMST.erase(std::remove_if(resultMST.begin(), resultMST.end(), cutoffif), resultMST.end());
321  // Step 3: use find-union to find the graph components
322 
323  // saves the component labels (i.e. cluster labels)
324  std::vector<int> labels(nnodes, -1);
325 
326  // Create V subsets (one for each node)
327  std::vector<subset> subsetscomp(nnodes);
328 
329  // Create V subsets with single elements
330  for (int v = 0; v < nnodes; ++v)
331  {
332  subsetscomp[v].parent = v;
333  subsetscomp[v].rank = 0;
334  }
335 
336 
337  // iterate all the edges and assign its nodes to the root node of the set
338  for(unsigned int edge_it = 0; edge_it < resultMST.size(); edge_it++)
339  {
340  auto edge = resultMST[edge_it];
341  /*if(!(edge.src < graph.getNNodes()) || !(edge.dest < graph.getNNodes())){
342  cout << "DEBUG ME!!!" << endl;
343  }*/
344  assert(edge.src < graph.getNNodes());
345  assert(edge.dest < graph.getNNodes());
346  // if the nodes are directly connected, put them in the same set
347  mstunion(subsetscomp, edge.src, edge.dest);
348  }
349 
350  for(int i = 0; i < nnodes; i++)
351  {
352  labels[i] = mstfind(subsetscomp, i);
353  }
354 
355 
356  // remove the clusters that are smaller than minPts
357  std::map<int,int> labelcount;
358  for(int i = 0; i < nnodes; i++)
359  {
360  labelcount[labels[i]]++;
361  }
362 
363  for(int i = 0; i < nnodes; i++)
364  {
365  if(labelcount[labels[i]] < minPts)
366  labels[i] = -1;
367  }
368 
369 
370  // reassign the labels so that they start from 0 (actually this is done in postProcess, so we can skip this part here)
371 
372 
373  return labels;
374 }
375 
376 
377 template<class T>
379 {
380  return a.weight > cutoff;
381 }
382 
383 
384 // Compare two edges according to their weights.
385 // Used in sort() for sorting an array of edges
386 template<class T>
387 int GraphAlgorithms<T>::weightComp(const void* a, const void* b)
388 {
389  const EdgeGCG* a1 = static_cast<const EdgeGCG*>(a);
390  const EdgeGCG* b1 = static_cast<const EdgeGCG*>(b);
391  return a1->weight < b1->weight;
392 }
393 
394 
395 // A utility function to find set of an element i
396 // (uses path compression technique)
397 template<class T>
398 int GraphAlgorithms<T>::mstfind(std::vector<subset>& subsets, int i)
399 {
400  // find root and make root as parent of i (path compression)
401  if (subsets[i].parent != i)
402  subsets[i].parent = mstfind(subsets, subsets[i].parent);
403 
404  return subsets[i].parent;
405 }
406 
407 
408 // A function that does union of two sets of x and y
409 // (uses union by rank)
410 template<class T>
411 void GraphAlgorithms<T>::mstunion(std::vector<subset>& subsets, int x, int y)
412 {
413  int xroot = mstfind(subsets, x);
414  int yroot = mstfind(subsets, y);
415 
416  // Attach smaller rank tree under root of high rank tree
417  // (Union by Rank)
418  if (subsets[xroot].rank < subsets[yroot].rank)
419  subsets[xroot].parent = yroot;
420  else if (subsets[xroot].rank > subsets[yroot].rank)
421  subsets[yroot].parent = xroot;
422 
423  // If ranks are same, then make one as root and increment
424  // its rank by one
425  else
426  {
427  subsets[yroot].parent = xroot;
428  subsets[xroot].rank++;
429  }
430 }
431 
432 
433 template<class T>
434 std::vector<int> GraphAlgorithms<T>::mcl(
435  Graph<GraphGCG>& graph, /**< the graph with weighted edges */
436  int& stoppedAfter, /**< number of iterations after which the clustering terminated */
437  double inflatefac, /**< inflate factor */
438  int maxiters, /**< max number of iterations, set to 25 per default */
439  int expandfac /**< expand factor, should be always set to 2 */
440 )
441 {
442 #ifdef WITH_GSL
443  graph.initMCL();
444  graph.colL1Norm();
445  graph.prune();
446 
447  int i = 0;
448  for(; i < maxiters; i++)
449  {
450  graph.inflate(inflatefac);
451  graph.expand(expandfac);
452  graph.prune();
453 
454  if(graph.stopMCL(i))
455  {
456  break;
457  }
458  }
459  assert(i > 6);
460  stoppedAfter = i;
461 
462  std::vector<int> res = graph.getClustersMCL();
463  graph.clearMCL();
464  return res;
465 #else
466  return vector<int>();
467 #endif
468 }
469 
470 
471 
472 
473 } /* namespace gcg */
474 #endif
int getHyperedgeWeight(int i)
static bool cutoffif(EdgeGCG &a)
static std::vector< int > dbscan(Graph< GraphGCG > &graph, double eps, int minPts=4)
static void expandCluster(Graph< T > &graph, std::vector< bool > &visited, std::vector< bool > &is_core, std::vector< int > &labels, int point, std::vector< int > &NeighborPts, int curr_cluster, double eps, int minPts)
static SCIP_Real computeMincut(Hypergraph< T > &graph)
static void mstunion(std::vector< subset > &subsets, int x, int y)
static std::vector< int > mcl(Graph< GraphGCG > &graph, int &stoppedAfter, double inflatefac, int maxiters=25, int expandfac=2)
static SCIP_RETCODE partition(SCIP *scip, SCIP_VAR **J, int *Jsize, SCIP_Longint *priority, SCIP_VAR **F, int Fsize, SCIP_VAR **origvar, SCIP_Real *median)
double weight
Definition: graph_gcg.h:52
std::vector< std::pair< int, double > > getNeighborWeights(int i)
Definition: graph_def.h:270
virtual std::vector< int > getPartition() const
interface to the SCIP tclique graph library
static std::vector< int > mst(Graph< GraphGCG > &graph, double cutoff, int minPts=4)
std::vector< int > getHyperedgeNodes(int i)
int getNEdges()
Definition: graph_def.h:79
static int mstfind(std::vector< subset > &subsets, int i)
static SCIP_Real computeSoed(Hypergraph< T > &graph)
Implementation of the graph which supports both node and edge weights.
several metrics for graphs
SCIP_RETCODE getEdges(std::vector< void * > &edges)
Definition: graph_def.h:84
static int weightComp(const void *a, const void *b)
static SCIP_Real computekMetric(Hypergraph< T > &graph)
int getNNodes()
Definition: graph_def.h:74