public Clusters Cluster(ClusterDistanceStrategy strategy, int k) { // build a clustering only with singleton clusters this._BuildSingletonCluster(); //build the dissimilarity matrix this._BuildDissimilarityMatrixParallel(); // build the hierarchical clustering this.BuildHierarchicalClustering(_clusters.Count(), strategy, k); return(_clusters); }
private void _UpdateDissimilarityMatrix(Cluster newCluster, ClusterDistanceStrategy strategie) { double distanceBetweenClusters; for (int i = 0; i < _clusters.Count(); i++) { // compute the distance between old clusters to the new cluster distanceBetweenClusters = ComputeDistance(_clusters.GetCluster(i), newCluster, _dissimilarityMatrix, strategie); // insert the new cluster's distance _dissimilarityMatrix.AddClusterPairAndDistance(new ClusterPair(newCluster, _clusters.GetCluster(i)), distanceBetweenClusters); //remove all old distance values of the old clusters (subclusters of the newcluster) _dissimilarityMatrix.RemoveClusterPair(new ClusterPair(newCluster.GetSubCluster(0), _clusters.GetCluster(i))); _dissimilarityMatrix.RemoveClusterPair(new ClusterPair(newCluster.GetSubCluster(1), _clusters.GetCluster(i))); } // finally, remove the distance of the old cluster pair _dissimilarityMatrix.RemoveClusterPair(new ClusterPair(newCluster.GetSubCluster(0), newCluster.GetSubCluster(1))); }
private void BuildHierarchicalClustering(int indexNewCluster, ClusterDistanceStrategy strategy, int k) { ClusterPair closestClusterPair = this._GetClosestClusterPairInDissimilarityMatrix(); // create a new cluster by merge the closest cluster pair Cluster newCluster = new Cluster(); newCluster.AddSubCluster(closestClusterPair.Cluster1); newCluster.AddSubCluster(closestClusterPair.Cluster2); newCluster.Id = indexNewCluster; newCluster.UpdateTotalQuantityOfPatterns(); //update the total quantity of patterns of the new cluster (this quantity is used by UPGMA clustering strategy) _clusters.RemoveCluster(closestClusterPair.Cluster1); _clusters.RemoveCluster(closestClusterPair.Cluster2); _UpdateDissimilarityMatrix(newCluster, strategy); //add the new cluster to clustering _clusters.AddCluster(newCluster); // recursive call of this method while there is more than 1 cluster (k>2) in the clustering if (_clusters.Count() > k) { this.BuildHierarchicalClustering(indexNewCluster + 1, strategy, k); } }
private static void AgnesClusteringPrint(IrisData[] irisData, DistanceDelegate calculateDistanceFunction, ClusterDistanceStrategy strategy) { var irisDataHier = irisData.Select(d => new double[] { d.PetalLength, d.PetalWidth, d.SepalLength, d.SepalWidth }).ToHashSet(); var hc = new AgnesClustering(calculateDistanceFunction, irisDataHier); var clusters = hc.Cluster(strategy, 3); var i = 0; Console.ForegroundColor = ConsoleColor.DarkYellow; Console.WriteLine($"Agnes {calculateDistanceFunction.Method.Name} {strategy.ToString()} Predicted:"); Console.ForegroundColor = ConsoleColor.Gray; var res = new IrisData[clusters.Count()][]; foreach (Cluster cl in clusters) { var pp = cl.GetAllPatterns(); var clusterResult = pp.Select(p => p.GetAttributes()).Select(p => _irisData.First(x => x.PetalLength == p[0] && x.PetalWidth == p[1] && x.SepalLength == p[2] && x.SepalWidth == p[3])).ToArray(); res[i++] = clusterResult; } PrintResult(res, irisData.Length); Console.WriteLine(); }
private double ComputeDistance(Cluster cluster1, Cluster cluster2, DissimilarityMatrix dissimilarityMatrix, ClusterDistanceStrategy strategy) { var distance1 = dissimilarityMatrix.ReturnClusterPairDistance(new ClusterPair(cluster1, cluster2.GetSubCluster(0))); var distance2 = dissimilarityMatrix.ReturnClusterPairDistance(new ClusterPair(cluster1, cluster2.GetSubCluster(1))); switch (strategy) { case ClusterDistanceStrategy.SingleLinkage: return(distance1 < distance2 ? distance1 : distance2); case ClusterDistanceStrategy.CompleteLinkage: return(distance1 > distance2 ? distance1 : distance2); case ClusterDistanceStrategy.AverageWeightedPairGroupMethodArithmeticMean: return((distance1 + distance2) / 2); case ClusterDistanceStrategy.AverageUnweightedPairGroupMethodArithmeticMean: return(((cluster2.GetSubCluster(0).TotalQuantityOfPatterns *distance1) / cluster2.TotalQuantityOfPatterns) + ((cluster2.GetSubCluster(1).TotalQuantityOfPatterns *distance2) / cluster2.TotalQuantityOfPatterns)); default: return(0); } }