예제 #1
0
 public static void CoupledDictionaryLearning(string graphFilename, string outputClusterDirectory, int numClusters, double lambda, double gamma, double mu, out double entropy, out double purity, out double balance, out double diversity, out double objective)
 {
     //read in the graph
     CoupledDL.Graph.HyperGraph inGraph = CoupledDL.Graph.HyperGraph.ReadGraph(graphFilename);
     //perform clustering
     List <CoupledDL.Graph.CompositeNode> clusters = CoupledDL.LazyGreedy.Clustering(inGraph, lambda, gamma, mu, numClusters, outputClusterDirectory, out entropy, out purity, out balance, out diversity, out objective);
     //check the clustering result
     //CoupledDL.LazyGreedy.OutputClusterResult(clusters, inGraph, outputClusterDirectory, 1);
 }
예제 #2
0
        public static void ConstructGraph(string roadFilename, string roadNetworkFilename, string poiFilename, string meterologyDirectory, string mobilityDirectory, string aqiDirectory, string outputGraphFilename, int numDays, int knn, int timeSlot, double s)
        {
            RoadNetwork roadNetwork = IO.ReadRoadNetwork(roadFilename);
            Grid        grid        = new Grid(roadNetwork.MinPoint, roadNetwork.MaxPoint, 0.015);

            CoupledDL.GraphInput       graphInput = new CoupledDL.GraphInput(timeSlot, numDays, 4, roadNetworkFilename, poiFilename, meterologyDirectory, mobilityDirectory, aqiDirectory, grid);
            CoupledDL.Graph.HyperGraph graph      = graphInput.ReadGraphFromFiles(1);
            double[] sigma = new double[] { 10, 2, 4, 4 };
            graphInput.BuildEdgesOfGraph(graph, knn, 9, sigma, 0, outputGraphFilename);
        }
예제 #3
0
 public static double[] CalculateLoopWeight(Graph.HyperGraph inGraph)
 {
     double[] loop = new double[inGraph.NumNodes];
     foreach (var e in inGraph.Edges)
     {
         loop[e.StartID] += e.Weight;
         loop[e.EndID]   += e.Weight;
     }
     return(loop);
 }
예제 #4
0
        public static double CalculateObjective(List <Graph.CompositeNode> clusters, Graph.HyperGraph inGraph, List <double> addedEdges, double[] loop, double adjustedLambda, double adjustedGamma, double adjustedMu, out double entropy, out double purity, out double balance, out double diversity)
        {
            entropy = 0.0;
            //first part, calculate the entropy rate
            foreach (var e in addedEdges)
            {
                double weight = e;
                entropy += -Xlogx(weight);
            }
            for (int i = 0; i < inGraph.NumNodes; i++)
            {
                double loopWeight = loop[i];
                entropy += -Xlogx(loopWeight);
            }
            purity = 0.0;
            //second part, calculate the total purity
            int numClusters = clusters.Count();

            for (int i = 0; i < numClusters; i++)
            {
                purity += clusters[i].MaxLabelCount;
            }
            purity /= inGraph.NumLabelNodes;
            purity -= numClusters;
            //third part, calculate the balancing part
            balance = 0.0;
            for (int i = 0; i < numClusters; i++)
            {
                double p1 = (double)clusters[i].LabelCount / inGraph.NumLabelNodes;
                balance += -Xlogx(p1);
                double p2 = (double)clusters[i].UnLabelCount / inGraph.NumUnLabelNodes;
                balance += -Xlogx(p2);
            }
            balance -= 2 * numClusters;

            //fourth part, calculate the modality part
            diversity = 0.0; adjustedMu = 0.0;
            for (int m = 1; m <= inGraph.NumModality; m++)
            {
                double p = 0.0;
                for (int i = 0; i < numClusters; i++)
                {
                    if (clusters[i].NumModalities.ContainsKey(m))
                    {
                        double tmp = (double)clusters[i].NumModalities[m] / inGraph.NumEachModality[m];
                        p += Xlogx(tmp);
                    }
                }
                p         *= (double)inGraph.NumEachModality[m] / inGraph.NumNodes;
                diversity += p;
            }
            diversity -= numClusters;

            return(entropy + adjustedLambda * purity + adjustedGamma * balance + adjustedMu * diversity);
        }
예제 #5
0
 public static void NormalizeWeight(ref double[] loop, ref Graph.HyperGraph inGraph, double totalWeight, int init)
 {
     if (init == 1)
     {
         for (int i = 0; i < inGraph.NumNodes; i++)
         {
             loop[i] /= totalWeight;
         }
     }
     foreach (var e in inGraph.Edges)
     {
         e.Weight /= totalWeight;
     }
 }
예제 #6
0
        public static int FindIndex(Graph.HyperGraph inGraph, int nodeID)
        {
            //foreach(var c in clusters)
            //{
            //    foreach(var n in c.SingleNodes)
            //    {
            //        if (n == nodeID)
            //            return clusters.IndexOf(c);
            //    }
            //}
            //return -1;

            Graph.SingleNode v = inGraph.FindNode(nodeID);
            return(v.ClusterID);
        }
예제 #7
0
            public void UpdateSubmodularHeap(List <Graph.CompositeNode> clusters, Graph.HyperGraph inGraph, double[] loop, int numLabelNodes, int numUnLabelNodes, double lambda, double gamma, double mu)
            {
                while (UpdateHeapValue(clusters, inGraph, loop, 0, numLabelNodes, numUnLabelNodes, lambda, gamma, mu) == 0) //only update the root node acoording to the diminishing return property of submodular functions
                {
                    if (this.IsEmpty())
                    {
                        break;
                    }

                    if (this.Data[0].Gain == 0)
                    {
                        this.ExtractMax();
                    }
                    else
                    {
                        this.MaxHeapify(0);
                    }
                    //this.CheckMaxHeap();
                }
            }
예제 #8
0
            public int UpdateHeapValue(List <Graph.CompositeNode> clusters, Graph.HyperGraph inGraph, double[] loop, int i, int numLabelNodes, int numUnLabelNodes, double lambda, double gamma, double mu)
            {
                if (this.IsEmpty())
                {
                    return(1);
                }
                double hGain = 0, pGain = 0, lGain = 0, mGain = 0; mu = 0;
                //store the old gain
                double oldGain = this.Data[i].Gain;
                //find the index of the clusters
                int clusterID1 = Function.FindIndex(inGraph, this.Data[i].StartID);
                int clusterID2 = Function.FindIndex(inGraph, this.Data[i].EndID);

                //if the edge forms a cycle, we make the gain zero.
                //later we will remove the the zero-gain edges from the heap
                if (clusterID1 == clusterID2)
                {
                    this.Data[i].Gain = 0;
                }
                else
                {
                    //recompute the entropy rate gain
                    hGain = Function.CalculateHGain(this.Data[i].Weight, loop[this.Data[i].StartID] - this.Data[i].Weight, loop[this.Data[i].EndID] - this.Data[i].Weight);
                    //recopmute the pure function gain
                    pGain = Function.CalculatePGain(clusters, numLabelNodes, clusterID1, clusterID2);
                    //recopmute the balance function gain
                    lGain = Function.CalculateLGain(clusters, numLabelNodes, numUnLabelNodes, clusterID1, clusterID2);
                    //recompute the modality function gain
                    mGain = Function.CalculateMGain(clusters, inGraph.NumEachModality, clusterID1, clusterID2);

                    this.Data[i].Gain = hGain + lambda * pGain + gamma * lGain + mu * mGain;
                }

                if (oldGain == this.Data[i].Gain)
                {
                    return(1);
                }
                return(0);
            }
        public static List <Graph.CompositeNode> Clustering(Graph.HyperGraph inGraph, double lambda, double gamma, double mu, int numClusters, string outputDirectory, out double entropy, out double purity, out double balance, out double diversity, out double objective)
        {
            #region initialize
            //when begin, each cluster contains a single node
            List <Graph.CompositeNode> clusters = new List <Graph.CompositeNode>();
            foreach (var nodei in inGraph.Nodes)
            {
                foreach (var node in nodei)
                {
                    List <int> tmp  = new List <int>();
                    List <int> tmp2 = new List <int>();
                    List <int> tmp3 = new List <int>();

                    tmp.Add(node.NodeID);
                    tmp2.Add(node.Label);
                    tmp3.Add(node.Modality);
                    clusters.Add(new Graph.CompositeNode(tmp, tmp2, tmp3));
                }
            }

            //for debugging
            //CoupledDL.LazyGreedy.OutputClusterResult(clusters, inGraph, @"D:\Result\Temporary\Cluster\");

            //calculate the self loop weight and the total of weight of each vertex
            double[] loop        = Function.CalculateLoopWeight(inGraph);
            double   totalWeight = Function.CalculateTotalWeight(loop);
            Function.NormalizeWeight(ref loop, ref inGraph, totalWeight, 1);
            //calculate the initial objective gain and decide the trade-off parameter
            double[] hGain = new double[inGraph.NumEdges];
            double[] pGain = new double[inGraph.NumEdges];
            double[] lGain = new double[inGraph.NumEdges];
            double[] mGain = new double[inGraph.NumEdges];

            double maxHGain = 0, maxPGain = 0, maxLGain = 0, maxMGain = 0;

            for (int i = 0; i < inGraph.NumEdges; i++)
            {
                Console.WriteLine(i + "th edge's gain is initialized.");
                hGain[i] = Function.CalculateHGain(inGraph.Edges[i].Weight, loop[inGraph.Edges[i].StartID] - inGraph.Edges[i].Weight, loop[inGraph.Edges[i].EndID] - inGraph.Edges[i].Weight);
                int clusterIndex1 = Function.FindIndex(inGraph, inGraph.Edges[i].StartID);
                int clusterIndex2 = Function.FindIndex(inGraph, inGraph.Edges[i].EndID);
                if (clusterIndex1 != clusterIndex2)
                {
                    pGain[i] = Function.CalculatePGain(clusters, inGraph.NumLabelNodes, clusterIndex1, clusterIndex2);
                    lGain[i] = Function.CalculateLGain(clusters, inGraph.NumLabelNodes, inGraph.NumUnLabelNodes, clusterIndex1, clusterIndex2);
                    mGain[i] = Function.CalculateMGain(clusters, inGraph.NumEachModality, clusterIndex1, clusterIndex2);
                }
                if (hGain[i] > maxHGain)
                {
                    maxHGain = hGain[i];
                }
                if (pGain[i] > maxPGain)
                {
                    maxPGain = pGain[i];
                }
                if (lGain[i] > maxLGain)
                {
                    maxLGain = lGain[i];
                }
                if (mGain[i] > maxMGain)
                {
                    maxMGain = mGain[i];
                }
            }

            double adjustedLambda = lambda * maxHGain / maxPGain;
            double adjustedGamma  = gamma * maxHGain / maxLGain;
            double adjustedMu     = mu * maxHGain / maxMGain;

            List <Graph.ClusteringEdge> edges = new List <Graph.ClusteringEdge>();
            for (int i = 0; i < inGraph.NumEdges; i++)
            {
                double gain = hGain[i] + adjustedLambda * pGain[i] + adjustedGamma * lGain[i];
                edges.Add(new Graph.ClusteringEdge(inGraph.Edges[i].StartID, inGraph.Edges[i].EndID, inGraph.Edges[i].Weight, inGraph.Edges[i].Type, gain));
            }
            hGain = null; pGain = null; lGain = null; mGain = null;
            #endregion

            #region build the heap
            Heap.SubmodularHeap heap = new Heap.SubmodularHeap(edges);
            heap.CheckMaxHeap(); // for debugging
            #endregion

            #region greedily add the edge and perform the clustering
            Graph.ClusteringEdge bestEdge;
            int           countCluster = inGraph.NumNodes;
            List <double> addedEdges   = new List <double>();
            while (countCluster > numClusters)
            {
                if (heap.IsEmpty())
                {
                    Console.WriteLine("Heap is empty!");
                    objective = Function.CalculateObjective(clusters, inGraph, addedEdges, loop, adjustedLambda, adjustedGamma, adjustedMu, out entropy, out purity, out balance, out diversity);
                    loop      = null;
                    return(clusters);
                }

                Console.WriteLine(countCluster + "->" + numClusters + "clusters remained");
                //find the best edge to add
                bestEdge = heap.ExtractMax();
                addedEdges.Add(bestEdge.Weight);

                //merge the clusters which the best edge connects
                int clusterID1 = Function.FindIndex(inGraph, bestEdge.StartID);
                int clusterID2 = Function.FindIndex(inGraph, bestEdge.EndID);
                if (clusterID1 != clusterID2)
                {
                    int remove = Function.MergeTwoClusters(ref clusters, clusterID1, clusterID2);
                    int stay   = clusterID1 == remove ? clusterID2 : clusterID1;
                    foreach (var c in clusters[stay].SingleNodes)
                    {
                        inGraph.FindNode(c).ClusterID = stay;
                    }
                    countCluster--;
                    for (int c = remove; c < countCluster; c++)
                    {
                        foreach (var n in clusters[c].SingleNodes)
                        {
                            inGraph.FindNode(n).ClusterID--;
                        }
                    }

                    loop[bestEdge.StartID] -= bestEdge.Weight;
                    loop[bestEdge.EndID]   -= bestEdge.Weight;
                }

                heap.UpdateSubmodularHeap(clusters, inGraph, loop, inGraph.NumLabelNodes, inGraph.NumUnLabelNodes, adjustedLambda, adjustedGamma, adjustedMu);
            }

            objective = Function.CalculateObjective(clusters, inGraph, addedEdges, loop, adjustedLambda, adjustedGamma, adjustedMu, out entropy, out purity, out balance, out diversity);

            //string clusterFilename = Path.Combine(outputDirectory, "cluster.txt");
            //string heapFilename = Path.Combine(outputDirectory, "heap.txt");
            //string paraFilename = Path.Combine(outputDirectory, "parameter.txt");

            //Function.WriteClusters(clusters, clusterFilename);
            //Function.WriteParameters(loop, totalWeight, adjustedLambda, adjustedGamma, paraFilename);
            //heap.PrintHeap(heapFilename);

            loop = null;
            return(clusters);

            #endregion
        }
        public static void OutputClusterResult(List <Graph.CompositeNode> clusters, Graph.HyperGraph inGraph, string outputClusterDirectory, int threshold)
        {
            int numClusters = clusters.Count();

            int[] sortID = new int[numClusters];
            int   id     = 0;

            for (int l = 1; l <= 6; l++)
            {
                for (int i = 0; i < numClusters; i++)
                {
                    if (clusters[i].MaxLabel == l)
                    {
                        sortID[i] = id++;
                    }
                }
            }

            int numModality = inGraph.NumModality;

            for (int i = 0; i < numModality; i++)
            {
                List <double>[] results = new List <double> [inGraph.NumNodes];
                for (int j = 0; j < inGraph.NumNodes; j++)
                {
                    results[j] = new List <double>();
                }
                int count = 0;

                for (int l = 1; l <= 6; l++)
                {
                    foreach (var node in inGraph.Nodes)
                    {
                        foreach (var n in node)
                        {
                            if (n.Modality == i + 1)// && n.Label == l)
                            {
                                int clusterID = Function.FindIndex(inGraph, n.NodeID);
                                if (clusters[clusterID].SingleNodes.Count() > threshold && clusters[clusterID].MaxLabel == l)
                                {
                                    results[count].AddRange(n.Data);
                                    results[count].Add(sortID[clusterID]); //the last dimension is the cluster ID
                                    results[count].Add(clusters[clusterID].MaxLabel);
                                    //results[count].Add(n.Label);
                                    if (clusters[clusterID].LabelCount == 0)
                                    {
                                        results[count].Add(0.0);
                                    }
                                    else
                                    {
                                        results[count].Add((double)clusters[clusterID].MaxLabelCount / clusters[clusterID].LabelCount);
                                    }
                                    count++;
                                }
                            }
                        }
                    }
                }
                Preprocess.IO.WriteFeature(results, System.IO.Path.Combine(outputClusterDirectory, i.ToString("D2") + ".txt"));
            }
        }
        public Graph.HyperGraph BuildEdgesOfGraph(Graph.HyperGraph inGraph, int k1, int k2, double[] sigma, int radius, string outputGraphFilename)
        {
            //start to include the edges

            for (int i = 0; i < inGraph.NumModality; i++)
            {
                List <Graph.SingleNode> mNodes = inGraph.Nodes[i];

                if (mNodes.Count() > 0)
                {
                    //there are two types of edges
                    //first type: intra-link type = 0
                    int nodeCount = mNodes.Count();
                    int nodeDim   = mNodes[0].Data.Count();
                    double[,] inputData = new double[nodeCount, nodeDim];
                    for (int n = 0; n < nodeCount; n++)
                    {
                        if (mNodes[n].Data.Count() > 1)
                        {
                            for (int d = 0; d < nodeDim; d++)
                            {
                                inputData[n, d] = mNodes[n].Data[d];
                            }
                        }
                    }
                    int[] tags = new int[nodeCount];
                    for (int t = 0; t < nodeCount; t++)
                    {
                        tags[t] = t;
                    }
                    //build the k-d tree
                    alglib.kdtree kdt;
                    alglib.kdtreebuildtagged(inputData, tags, nodeDim, 0, 2, out kdt);
                    //find k-nn
                    int countNode = 0;
                    Dictionary <Preprocess.Pair <int, int>, double> mutual = new Dictionary <Preprocess.Pair <int, int>, double>();
                    foreach (var node in inGraph.Nodes[i])
                    {
                        Console.WriteLine(i + "th modality: " + countNode++ + "th node");
                        if (node.Data.Count() == 1)
                        {
                            break;
                        }
                        double[] x = new double[nodeDim];
                        for (int j = 0; j < nodeDim; j++)
                        {
                            x[j] = node.Data[j];
                        }
                        int k = alglib.kdtreequeryknn(kdt, x, k1);
                        double[,] result = new double[0, 0];
                        alglib.kdtreequeryresultsx(kdt, ref result);
                        double[] distances = new double[0];
                        alglib.kdtreequeryresultsdistances(kdt, ref distances);
                        int[] index = new int[0];
                        alglib.kdtreequeryresultstags(kdt, ref index);
                        List <int>    foundBuffer = new List <int>();
                        List <double> css         = new List <double>();
                        List <double> euDist      = new List <double>();
                        for (int j = 1; j < k; j++) // j =0 is the data itself do not link itself
                        {
                            //List<double> tmp = new List<double>();
                            //for (int l = 0; l < nodeDim; l++)
                            //{
                            //    tmp.Add(result[j, l]);
                            //}

                            Graph.SingleNode neighbor = new Graph.SingleNode();
                            neighbor = mNodes[index[j]];
                            //foreach (var n in inGraph.Nodes[i])
                            //{
                            //    bool equal = true;
                            //    for (int l = 0; l < nodeDim; l++)
                            //    {
                            //        if (n.Data[l] != result[j, l])
                            //        {
                            //            equal = false;
                            //            break;
                            //        }
                            //    }
                            //    if (equal && n.NodeID != node.NodeID && !foundBuffer.Contains(n.NodeID))
                            //    {
                            //        neighbor = n;
                            //        foundBuffer.Add(n.NodeID);
                            //        break;
                            //    }
                            //}

                            double weight = Function.CalculateGaussianSimilarity(distances[j], i, sigma);
                            euDist.Add(weight);
                            //css.Add(Function.CalculateGaussianSimilarity(Function.GetCosineSimilarity(node.Data, neighbor.Data), i, sigma));
                            css.Add(Function.GetCosineSimilarity(node.Data, neighbor.Data));
                            int nodeID1 = node.NodeID;
                            int nodeID2 = neighbor.NodeID;

                            bool success = false;
                            if (mutual.ContainsKey(new Preprocess.Pair <int, int>(nodeID1, nodeID2)))
                            {
                                mutual[new Preprocess.Pair <int, int>(nodeID1, nodeID2)] = -1;
                            }
                            else if (mutual.ContainsKey(new Preprocess.Pair <int, int>(nodeID2, nodeID1)))
                            {
                                mutual[new Preprocess.Pair <int, int>(nodeID2, nodeID1)] = -1;
                            }
                            else
                            {
                                success = inGraph.SetEdge(node, neighbor, weight, 0);
                            }

                            if (success)
                            {
                                mutual.Add(new Preprocess.Pair <int, int>(nodeID1, nodeID2), weight);
                            }
                            //else
                            //{
                            //    if (mutual.ContainsKey(new Preprocess.Pair<int, int>(nodeID1, nodeID2)))
                            //    {
                            //        mutual[new Preprocess.Pair<int, int>(nodeID1, nodeID2)] = -1;
                            //    }
                            //    else
                            //    {
                            //        mutual[new Preprocess.Pair<int, int>(nodeID2, nodeID1)] = -1;
                            //    }
                            //}
                        }
                    }


                    List <Graph.Edge> delEdges = new List <Graph.Edge>();
                    //mutual knn
                    foreach (var m in mutual)
                    {
                        //if(m.Value == 1)
                        //{
                        //    inGraph.DelEdge(m.Key.Value1, m.Key.Value2);
                        //}
                        if (m.Value > 0)
                        {
                            delEdges.Add(new Graph.Edge(m.Key.Value1, m.Key.Value2, m.Value, 0));
                        }
                    }
                    inGraph.Edges = inGraph.Edges.Except(delEdges).ToList();

                    //second type: inter-link type = 1
                    for (int j = i + 1; j < inGraph.NumModality; j++)
                    {
                        List <Graph.SingleNode> nNodes = inGraph.Nodes[j];
                        //inGraph.PrintGraph(@"D:\Result\Temporary\Graph\graph.txt");
                        foreach (var mNode in mNodes)
                        {
                            int countNN = 0;
                            foreach (var nNode in nNodes)
                            {
                                //constraint of day
                                bool dayConstriant = mNode.Day == nNode.Day || mNode.Day == -1 || nNode.Day == -1;
                                if (radius > 0)
                                {
                                    if (this.Grid.IsAdjacent(mNode.GridIndex, nNode.GridIndex, radius) && dayConstriant) //DO NOT LINK DIFFERENT DAYS OF A REGION
                                    {
                                        inGraph.SetEdge(mNode, nNode, radius == 0 ? 1 : 1 / radius, 1);
                                        if (countNN >= k2)
                                        {
                                            break;
                                        }
                                        countNN++;
                                    }
                                }
                                else
                                {
                                    if (mNode.GridIndex == nNode.GridIndex && dayConstriant)
                                    {
                                        inGraph.SetEdge(mNode, nNode, 1, 1);
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            inGraph.PrintGraph(outputGraphFilename);
            return(inGraph);
        }