Exemplo n.º 1
0
        /// <summary>
        /// GetGPartition is different from GetPartition in 2 ways:
        /// 1. It does not require a connected graph.
        /// 2. If there are too many clusters, it combines them such that the desired number of clusters is returned
        /// </summary>
        /// <returns>A partitioning of the graph</returns>
        public Partition GetGPartition()
        {
            DistanceMatrix mat = null;

            if (_data.Type == AbstractDataset.DataType.DistanceMatrix)
            {
                mat = (DistanceMatrix)_data;
            }
            else if (_data.Type == AbstractDataset.DataType.PointSet)
            {
                mat = ((PointSet)_data).GetDistanceMatrix();
            }

            //get the actual partition (if graph not necessarily connected)
            Partition partition = Partition.GetPartition((LightWeightGraph)_data);

            //Dictionary to hold VAT
            var vatMap = new Dictionary <int, VAT>();

            //Dictionary to hold subset array
            var subsetMap = new Dictionary <int, int[]>();

            while (partition.Clusters.Count < _minK)
            //while (clusterList.Count < _minK)
            {
                Console.WriteLine("Count = " + partition.Clusters.Count);
                Console.WriteLine("mink = " + _minK);
                //Calculate the VAT for all values
                foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId)))
                {
                    //We must calculate a graph for this subset of data
                    List <int> clusterSubset = c.Points.Select(p => p.Id).ToList();

                    //Now calculate Vat
                    LightWeightGraph lwg;
                    if (_data.Type == AbstractDataset.DataType.Graph)
                    {
                        bool[] exclusion = new bool[_data.Count];
                        for (int i = 0; i < _data.Count; i++)
                        {
                            exclusion[i] = true;
                        }
                        foreach (var p in c.Points)
                        {
                            exclusion[p.Id] = false;
                        }
                        lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion);
                    }
                    else //Distance matrix or Pointset
                    {
                        Debug.Assert(mat != null, "mat != null");
                        var subMatrix = mat.GetReducedDataSet(clusterSubset);

                        //Generate our graph
                        lwg = _graphGen.GenerateGraph(subMatrix.Mat);
                    }

                    subsetMap.Add(c.ClusterId, clusterSubset.ToArray());
                    lwg.IsWeighted = _weighted;
                    VAT v = new VAT(lwg, _reassignNodes, _alpha, _beta);
                    _vatNodeRemovalOrder = v.NodeRemovalOrder;
                    _vatNumNodesRemoved  = v.NumNodesRemoved;
                    if (_hillClimb)
                    {
                        v.HillClimb();
                    }
                    ////VATClust v = new VATClust(subMatrix.Mat, _weighted, _useKnn, _kNNOffset, _alpha, _beta);
                    vatMap.Add(c.ClusterId, v);
                    Console.WriteLine("Calculated Vat for cluster " + c.ClusterId);
                }

                meta.AppendLine("All calculated VATs:");
                //Now find the minimum vat value
                int    minVatCluster = 0;
                double minVatValue   = double.MaxValue;
                foreach (var c in vatMap)
                {
                    meta.Append(String.Format("{0} ", c.Value.MinVat));
                    if (c.Value.MinVat < minVatValue)
                    {
                        minVatCluster = c.Key;
                        minVatValue   = c.Value.MinVat;
                    }
                }
                meta.AppendLine();

                //now merge the partition into the cluster
                var minVAT       = vatMap[minVatCluster];
                var subPartition = minVAT.GetPartition();
                var nodeIndexMap = subsetMap[minVatCluster];

                meta.AppendFormat("Vat: MinVat={0}\r\n", minVAT.MinVat);
                meta.AppendFormat("Removed Count:{0}\r\n", minVAT.NumNodesRemoved);
                meta.AppendLine(String.Join(",",
                                            minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c])));

                partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster);
                vatMap.Remove(minVatCluster);
                subsetMap.Remove(minVatCluster);
                Console.WriteLine("Found min cluster");
                Console.WriteLine(meta);
            }
            partition.MetaData = meta.ToString();
            // The idea is now that we have partitions, combine them so that partition.Clusters.Count == minK
            if (partition.Clusters.Count > _minK)
            {
                combineClusters(partition, _minK);
            }
            return(partition);
        }