public static Heap.SubmodularHeap ReadSubmodularHeap(string filename) { System.IO.StreamReader srHeap = new System.IO.StreamReader(filename); List <Graph.ClusteringEdge> data = new List <Graph.ClusteringEdge>(); while (srHeap.Peek() > 0) { string content = srHeap.ReadLine(); string[] items = content.Split(new char[] { ' ' }); Graph.ClusteringEdge ce = new Graph.ClusteringEdge(Convert.ToInt32(items[0]), Convert.ToInt32(items[1]), Convert.ToDouble(items[2]), Convert.ToInt32(items[3]), Convert.ToDouble(items[4])); data.Add(ce); } srHeap.Close(); Heap.SubmodularHeap subHeap = new Heap.SubmodularHeap(data); return(subHeap); }
public static List <Graph.CompositeNode> Clustering(Graph.HyperGraph inGraph, double lambda, double gamma, double mu, int numClusters, string outputDirectory, out double entropy, out double purity, out double balance, out double diversity, out double objective) { #region initialize //when begin, each cluster contains a single node List <Graph.CompositeNode> clusters = new List <Graph.CompositeNode>(); foreach (var nodei in inGraph.Nodes) { foreach (var node in nodei) { List <int> tmp = new List <int>(); List <int> tmp2 = new List <int>(); List <int> tmp3 = new List <int>(); tmp.Add(node.NodeID); tmp2.Add(node.Label); tmp3.Add(node.Modality); clusters.Add(new Graph.CompositeNode(tmp, tmp2, tmp3)); } } //for debugging //CoupledDL.LazyGreedy.OutputClusterResult(clusters, inGraph, @"D:\Result\Temporary\Cluster\"); //calculate the self loop weight and the total of weight of each vertex double[] loop = Function.CalculateLoopWeight(inGraph); double totalWeight = Function.CalculateTotalWeight(loop); Function.NormalizeWeight(ref loop, ref inGraph, totalWeight, 1); //calculate the initial objective gain and decide the trade-off parameter double[] hGain = new double[inGraph.NumEdges]; double[] pGain = new double[inGraph.NumEdges]; double[] lGain = new double[inGraph.NumEdges]; double[] mGain = new double[inGraph.NumEdges]; double maxHGain = 0, maxPGain = 0, maxLGain = 0, maxMGain = 0; for (int i = 0; i < inGraph.NumEdges; i++) { Console.WriteLine(i + "th edge's gain is initialized."); hGain[i] = Function.CalculateHGain(inGraph.Edges[i].Weight, loop[inGraph.Edges[i].StartID] - inGraph.Edges[i].Weight, loop[inGraph.Edges[i].EndID] - inGraph.Edges[i].Weight); int clusterIndex1 = Function.FindIndex(inGraph, inGraph.Edges[i].StartID); int clusterIndex2 = Function.FindIndex(inGraph, inGraph.Edges[i].EndID); if (clusterIndex1 != clusterIndex2) { pGain[i] = Function.CalculatePGain(clusters, inGraph.NumLabelNodes, clusterIndex1, clusterIndex2); lGain[i] = Function.CalculateLGain(clusters, inGraph.NumLabelNodes, inGraph.NumUnLabelNodes, clusterIndex1, clusterIndex2); mGain[i] = Function.CalculateMGain(clusters, inGraph.NumEachModality, clusterIndex1, clusterIndex2); } if (hGain[i] > maxHGain) { maxHGain = hGain[i]; } if (pGain[i] > maxPGain) { maxPGain = pGain[i]; } if (lGain[i] > maxLGain) { maxLGain = lGain[i]; } if (mGain[i] > maxMGain) { maxMGain = mGain[i]; } } double adjustedLambda = lambda * maxHGain / maxPGain; double adjustedGamma = gamma * maxHGain / maxLGain; double adjustedMu = mu * maxHGain / maxMGain; List <Graph.ClusteringEdge> edges = new List <Graph.ClusteringEdge>(); for (int i = 0; i < inGraph.NumEdges; i++) { double gain = hGain[i] + adjustedLambda * pGain[i] + adjustedGamma * lGain[i]; edges.Add(new Graph.ClusteringEdge(inGraph.Edges[i].StartID, inGraph.Edges[i].EndID, inGraph.Edges[i].Weight, inGraph.Edges[i].Type, gain)); } hGain = null; pGain = null; lGain = null; mGain = null; #endregion #region build the heap Heap.SubmodularHeap heap = new Heap.SubmodularHeap(edges); heap.CheckMaxHeap(); // for debugging #endregion #region greedily add the edge and perform the clustering Graph.ClusteringEdge bestEdge; int countCluster = inGraph.NumNodes; List <double> addedEdges = new List <double>(); while (countCluster > numClusters) { if (heap.IsEmpty()) { Console.WriteLine("Heap is empty!"); objective = Function.CalculateObjective(clusters, inGraph, addedEdges, loop, adjustedLambda, adjustedGamma, adjustedMu, out entropy, out purity, out balance, out diversity); loop = null; return(clusters); } Console.WriteLine(countCluster + "->" + numClusters + "clusters remained"); //find the best edge to add bestEdge = heap.ExtractMax(); addedEdges.Add(bestEdge.Weight); //merge the clusters which the best edge connects int clusterID1 = Function.FindIndex(inGraph, bestEdge.StartID); int clusterID2 = Function.FindIndex(inGraph, bestEdge.EndID); if (clusterID1 != clusterID2) { int remove = Function.MergeTwoClusters(ref clusters, clusterID1, clusterID2); int stay = clusterID1 == remove ? clusterID2 : clusterID1; foreach (var c in clusters[stay].SingleNodes) { inGraph.FindNode(c).ClusterID = stay; } countCluster--; for (int c = remove; c < countCluster; c++) { foreach (var n in clusters[c].SingleNodes) { inGraph.FindNode(n).ClusterID--; } } loop[bestEdge.StartID] -= bestEdge.Weight; loop[bestEdge.EndID] -= bestEdge.Weight; } heap.UpdateSubmodularHeap(clusters, inGraph, loop, inGraph.NumLabelNodes, inGraph.NumUnLabelNodes, adjustedLambda, adjustedGamma, adjustedMu); } objective = Function.CalculateObjective(clusters, inGraph, addedEdges, loop, adjustedLambda, adjustedGamma, adjustedMu, out entropy, out purity, out balance, out diversity); //string clusterFilename = Path.Combine(outputDirectory, "cluster.txt"); //string heapFilename = Path.Combine(outputDirectory, "heap.txt"); //string paraFilename = Path.Combine(outputDirectory, "parameter.txt"); //Function.WriteClusters(clusters, clusterFilename); //Function.WriteParameters(loop, totalWeight, adjustedLambda, adjustedGamma, paraFilename); //heap.PrintHeap(heapFilename); loop = null; return(clusters); #endregion }