public Partition GetPartition() { DistanceMatrix mat = null; if (_data.Type == AbstractDataset.DataType.DistanceMatrix) { mat = (DistanceMatrix)_data; } else if (_data.Type == AbstractDataset.DataType.PointSet) { mat = ((PointSet)_data).GetDistanceMatrix(); } //Setup our partition with a single cluster, with all points List <Cluster> clusterList = new List <Cluster> { new Cluster(0, Enumerable.Range(0, _data.Count).ToList()) }; Partition partition = new Partition(clusterList, _data); //Dictionary to hold VAT var vatMap = new Dictionary <int, Integrity>(); //Dictionary to hold subset array var subsetMap = new Dictionary <int, int[]>(); while (clusterList.Count < _minK) { //Calculate the VAT for all values foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId))) { //We must calculate a graph for this subset of data List <int> clusterSubset = c.Points.Select(p => p.Id).ToList(); //Now calculate Vat LightWeightGraph lwg; if (_data.Type == AbstractDataset.DataType.Graph) { bool[] exclusion = new bool[_data.Count]; for (int i = 0; i < _data.Count; i++) { exclusion[i] = true; } foreach (var p in c.Points) { exclusion[p.Id] = false; } lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion); } else //Distance matrix or Pointset { Debug.Assert(mat != null, "mat != null"); var subMatrix = mat.GetReducedDataSet(clusterSubset); //Generate our graph lwg = _graphGen.GenerateGraph(subMatrix.Mat); } subsetMap.Add(c.ClusterId, clusterSubset.ToArray()); lwg.IsWeighted = _weighted; Integrity v = new Integrity(lwg, _reassignNodes, _alpha, _beta); _vatNodeRemovalOrder = v.NodeRemovalOrder; _vatNumNodesRemoved = v.NumNodesRemoved; if (_hillClimb) { v.HillClimb(); } ////VATClust v = new VATClust(subMatrix.Mat, _weighted, _useKnn, _kNNOffset, _alpha, _beta); vatMap.Add(c.ClusterId, v); } meta.AppendLine("All calculated Integritys:"); //Now find the minimum vat value int minVatCluster = 0; double minVatValue = double.MaxValue; foreach (var c in vatMap) { meta.Append(String.Format("{0} ", c.Value.MinVat)); if (c.Value.MinVat < minVatValue) { minVatCluster = c.Key; minVatValue = c.Value.MinVat; } } meta.AppendLine(); //now merge the partition into the cluster var minVAT = vatMap[minVatCluster]; var subPartition = minVAT.GetPartition(); var nodeIndexMap = subsetMap[minVatCluster]; meta.AppendFormat("Integrity: MinIntegrity={0}\r\n", minVAT.MinVat); meta.AppendFormat("Removed Count:{0} \r\n", minVAT.NumNodesRemoved); meta.AppendLine(String.Join(",", minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c]))); partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster); vatMap.Remove(minVatCluster); subsetMap.Remove(minVatCluster); } partition.MetaData = meta.ToString(); return(partition); }
/// <summary> /// GetGPartition is different from GetPartition in 2 ways: /// 1. It does not require a connected graph. /// 2. If there are too many clusters, it combines them such that the desired number of clusters is returned /// </summary> /// <returns>A partitioning of the graph</returns> public Partition GetGPartition() { DistanceMatrix mat = null; if (_data.Type == AbstractDataset.DataType.DistanceMatrix) { mat = (DistanceMatrix)_data; } else if (_data.Type == AbstractDataset.DataType.PointSet) { mat = ((PointSet)_data).GetDistanceMatrix(); } // Get the actual partition (if graph not necessarily connected) Partition partition = Partition.GetPartition((LightWeightGraph)_data); //Dictionary to hold Integrity var vatMap = new Dictionary <int, Integrity>(); //Dictionary to hold subset array var subsetMap = new Dictionary <int, int[]>(); //while (clusterList.Count < _minK) while (partition.Clusters.Count < _minK) { Console.WriteLine("Count = " + partition.Clusters.Count); Console.WriteLine("mink = " + _minK); //Calculate the Integrity for all values foreach (var c in partition.Clusters.Where(c => !vatMap.ContainsKey(c.ClusterId))) { //We must calculate a graph for this subset of data List <int> clusterSubset = c.Points.Select(p => p.Id).ToList(); //Now calculate Integrity LightWeightGraph lwg; if (_data.Type == AbstractDataset.DataType.Graph) { bool[] exclusion = new bool[_data.Count]; for (int i = 0; i < _data.Count; i++) { exclusion[i] = true; } foreach (var p in c.Points) { exclusion[p.Id] = false; } lwg = new LightWeightGraph((LightWeightGraph)_data, exclusion); } else //Distance matrix or Pointset { Debug.Assert(mat != null, "mat != null"); var subMatrix = mat.GetReducedDataSet(clusterSubset); //Generate our graph lwg = _graphGen.GenerateGraph(subMatrix.Mat); } subsetMap.Add(c.ClusterId, clusterSubset.ToArray()); lwg.IsWeighted = _weighted; Integrity v = new Integrity(lwg, _reassignNodes, _alpha, _beta); _vatNodeRemovalOrder = v.NodeRemovalOrder; _vatNumNodesRemoved = v.NumNodesRemoved; if (_hillClimb) { v.HillClimb(); } vatMap.Add(c.ClusterId, v); } meta.AppendLine("All calculated Integritys:"); //Now find the minimum integrity value int minVatCluster = 0; double minVatValue = double.MaxValue; foreach (var c in vatMap) { meta.Append(String.Format("{0} ", c.Value.MinVat)); if (c.Value.MinVat < minVatValue) { minVatCluster = c.Key; minVatValue = c.Value.MinVat; } } meta.AppendLine(); //now merge the partition into the cluster var minVAT = vatMap[minVatCluster]; var subPartition = minVAT.GetPartition(); var nodeIndexMap = subsetMap[minVatCluster]; meta.AppendFormat("Integrity: MinIntegrity={0}\r\n", minVAT.MinVat); meta.AppendFormat("Removed Count:{0} \r\n", minVAT.NumNodesRemoved); meta.AppendLine(String.Join(",", minVAT.NodeRemovalOrder.GetRange(0, minVAT.NumNodesRemoved).Select(c => nodeIndexMap[c]))); partition.MergeSubPartition(subPartition, nodeIndexMap, minVatCluster); vatMap.Remove(minVatCluster); subsetMap.Remove(minVatCluster); } partition.MetaData = meta.ToString(); // The idea is now that we have partitions, combine them so that partition.Clusters.Count == minK // if (partition.Clusters.Count > _minK) // { // combineClusters(partition, _minK); // } return(partition); }