Beispiel #1
0
        public static Heap.SubmodularHeap ReadSubmodularHeap(string filename)
        {
            System.IO.StreamReader      srHeap = new System.IO.StreamReader(filename);
            List <Graph.ClusteringEdge> data   = new List <Graph.ClusteringEdge>();

            while (srHeap.Peek() > 0)
            {
                string               content = srHeap.ReadLine();
                string[]             items   = content.Split(new char[] { ' ' });
                Graph.ClusteringEdge ce      = new Graph.ClusteringEdge(Convert.ToInt32(items[0]), Convert.ToInt32(items[1]), Convert.ToDouble(items[2]), Convert.ToInt32(items[3]), Convert.ToDouble(items[4]));
                data.Add(ce);
            }
            srHeap.Close();
            Heap.SubmodularHeap subHeap = new Heap.SubmodularHeap(data);

            return(subHeap);
        }
        public static List <Graph.CompositeNode> Clustering(Graph.HyperGraph inGraph, double lambda, double gamma, double mu, int numClusters, string outputDirectory, out double entropy, out double purity, out double balance, out double diversity, out double objective)
        {
            #region initialize
            //when begin, each cluster contains a single node
            List <Graph.CompositeNode> clusters = new List <Graph.CompositeNode>();
            foreach (var nodei in inGraph.Nodes)
            {
                foreach (var node in nodei)
                {
                    List <int> tmp  = new List <int>();
                    List <int> tmp2 = new List <int>();
                    List <int> tmp3 = new List <int>();

                    tmp.Add(node.NodeID);
                    tmp2.Add(node.Label);
                    tmp3.Add(node.Modality);
                    clusters.Add(new Graph.CompositeNode(tmp, tmp2, tmp3));
                }
            }

            //for debugging
            //CoupledDL.LazyGreedy.OutputClusterResult(clusters, inGraph, @"D:\Result\Temporary\Cluster\");

            //calculate the self loop weight and the total of weight of each vertex
            double[] loop        = Function.CalculateLoopWeight(inGraph);
            double   totalWeight = Function.CalculateTotalWeight(loop);
            Function.NormalizeWeight(ref loop, ref inGraph, totalWeight, 1);
            //calculate the initial objective gain and decide the trade-off parameter
            double[] hGain = new double[inGraph.NumEdges];
            double[] pGain = new double[inGraph.NumEdges];
            double[] lGain = new double[inGraph.NumEdges];
            double[] mGain = new double[inGraph.NumEdges];

            double maxHGain = 0, maxPGain = 0, maxLGain = 0, maxMGain = 0;

            for (int i = 0; i < inGraph.NumEdges; i++)
            {
                Console.WriteLine(i + "th edge's gain is initialized.");
                hGain[i] = Function.CalculateHGain(inGraph.Edges[i].Weight, loop[inGraph.Edges[i].StartID] - inGraph.Edges[i].Weight, loop[inGraph.Edges[i].EndID] - inGraph.Edges[i].Weight);
                int clusterIndex1 = Function.FindIndex(inGraph, inGraph.Edges[i].StartID);
                int clusterIndex2 = Function.FindIndex(inGraph, inGraph.Edges[i].EndID);
                if (clusterIndex1 != clusterIndex2)
                {
                    pGain[i] = Function.CalculatePGain(clusters, inGraph.NumLabelNodes, clusterIndex1, clusterIndex2);
                    lGain[i] = Function.CalculateLGain(clusters, inGraph.NumLabelNodes, inGraph.NumUnLabelNodes, clusterIndex1, clusterIndex2);
                    mGain[i] = Function.CalculateMGain(clusters, inGraph.NumEachModality, clusterIndex1, clusterIndex2);
                }
                if (hGain[i] > maxHGain)
                {
                    maxHGain = hGain[i];
                }
                if (pGain[i] > maxPGain)
                {
                    maxPGain = pGain[i];
                }
                if (lGain[i] > maxLGain)
                {
                    maxLGain = lGain[i];
                }
                if (mGain[i] > maxMGain)
                {
                    maxMGain = mGain[i];
                }
            }

            double adjustedLambda = lambda * maxHGain / maxPGain;
            double adjustedGamma  = gamma * maxHGain / maxLGain;
            double adjustedMu     = mu * maxHGain / maxMGain;

            List <Graph.ClusteringEdge> edges = new List <Graph.ClusteringEdge>();
            for (int i = 0; i < inGraph.NumEdges; i++)
            {
                double gain = hGain[i] + adjustedLambda * pGain[i] + adjustedGamma * lGain[i];
                edges.Add(new Graph.ClusteringEdge(inGraph.Edges[i].StartID, inGraph.Edges[i].EndID, inGraph.Edges[i].Weight, inGraph.Edges[i].Type, gain));
            }
            hGain = null; pGain = null; lGain = null; mGain = null;
            #endregion

            #region build the heap
            Heap.SubmodularHeap heap = new Heap.SubmodularHeap(edges);
            heap.CheckMaxHeap(); // for debugging
            #endregion

            #region greedily add the edge and perform the clustering
            Graph.ClusteringEdge bestEdge;
            int           countCluster = inGraph.NumNodes;
            List <double> addedEdges   = new List <double>();
            while (countCluster > numClusters)
            {
                if (heap.IsEmpty())
                {
                    Console.WriteLine("Heap is empty!");
                    objective = Function.CalculateObjective(clusters, inGraph, addedEdges, loop, adjustedLambda, adjustedGamma, adjustedMu, out entropy, out purity, out balance, out diversity);
                    loop      = null;
                    return(clusters);
                }

                Console.WriteLine(countCluster + "->" + numClusters + "clusters remained");
                //find the best edge to add
                bestEdge = heap.ExtractMax();
                addedEdges.Add(bestEdge.Weight);

                //merge the clusters which the best edge connects
                int clusterID1 = Function.FindIndex(inGraph, bestEdge.StartID);
                int clusterID2 = Function.FindIndex(inGraph, bestEdge.EndID);
                if (clusterID1 != clusterID2)
                {
                    int remove = Function.MergeTwoClusters(ref clusters, clusterID1, clusterID2);
                    int stay   = clusterID1 == remove ? clusterID2 : clusterID1;
                    foreach (var c in clusters[stay].SingleNodes)
                    {
                        inGraph.FindNode(c).ClusterID = stay;
                    }
                    countCluster--;
                    for (int c = remove; c < countCluster; c++)
                    {
                        foreach (var n in clusters[c].SingleNodes)
                        {
                            inGraph.FindNode(n).ClusterID--;
                        }
                    }

                    loop[bestEdge.StartID] -= bestEdge.Weight;
                    loop[bestEdge.EndID]   -= bestEdge.Weight;
                }

                heap.UpdateSubmodularHeap(clusters, inGraph, loop, inGraph.NumLabelNodes, inGraph.NumUnLabelNodes, adjustedLambda, adjustedGamma, adjustedMu);
            }

            objective = Function.CalculateObjective(clusters, inGraph, addedEdges, loop, adjustedLambda, adjustedGamma, adjustedMu, out entropy, out purity, out balance, out diversity);

            //string clusterFilename = Path.Combine(outputDirectory, "cluster.txt");
            //string heapFilename = Path.Combine(outputDirectory, "heap.txt");
            //string paraFilename = Path.Combine(outputDirectory, "parameter.txt");

            //Function.WriteClusters(clusters, clusterFilename);
            //Function.WriteParameters(loop, totalWeight, adjustedLambda, adjustedGamma, paraFilename);
            //heap.PrintHeap(heapFilename);

            loop = null;
            return(clusters);

            #endregion
        }