private void BuildHierarchicalClustering(int indexNewCluster, ClusterDistance.Strategy strategy, int k) { ClusterPair closestClusterPair = this._GetClosestClusterPairInDissimilarityMatrix(); // create a new cluster by merge the closest cluster pair Cluster newCluster = new Cluster(); newCluster.AddSubCluster(closestClusterPair.Cluster1); newCluster.AddSubCluster(closestClusterPair.Cluster2); newCluster.Id = indexNewCluster; newCluster.UpdateTotalQuantityOfPatterns(); //update the total quantity of patterns of the new cluster (this quantity is used by UPGMA clustering strategy) //remove the closest cluster pair from the clustering data structure (clusters) _clusters.RemoveClusterPair(closestClusterPair); _UpdateDissimilarityMatrix(newCluster, strategy); //add the new cluster to clustering _clusters.AddCluster(newCluster); closestClusterPair = null; // recursive call of this method while there is more than 1 cluster (k>2) in the clustering if (_clusters.Count() > k) { this.BuildHierarchicalClustering(indexNewCluster + 1, strategy, k); } }
//this functionality is usefull to see the distance matrix computed and to use as in input to other clustering algorithms implementation (R or Python) public void CreateCSVMatrixFile(string path) { File.Delete(path); this._BuildSingletonCluster(); StringBuilder matrix = new StringBuilder(); string headerLine = "AggloCluster"; foreach (Cluster cluster in _clusters) { headerLine = headerLine + ", Cluster" + cluster.Id; } bool writeBlank = false; matrix.Append(headerLine); double distanceBetweenTwoClusters; ClusterPair clusterPair; for (int i = 0; i < _clusters.Count(); i++) { matrix.Append("\r\n"); matrix.Append("Cluster" + _clusters.GetCluster(i).Id); writeBlank = false; for (int j = 0; j < _clusters.Count(); j++) { clusterPair = new ClusterPair(); clusterPair.Cluster1 = _clusters.GetCluster(i); clusterPair.Cluster2 = _clusters.GetCluster(j); distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2); if (distanceBetweenTwoClusters == 0) { writeBlank = true; matrix.Append(",0"); } else { if (writeBlank) { matrix.Append("," + string.Empty); } else { matrix.Append("," + distanceBetweenTwoClusters); } } } } File.AppendAllText(path, matrix.ToString()); }
public void RemoveClusterPair(ClusterPair clusterPair) { double outvalue; if (_distanceMatrix.ContainsKey(clusterPair)) { _distanceMatrix.TryRemove(clusterPair, out outvalue); } else { _distanceMatrix.TryRemove(new ClusterPair(clusterPair.Cluster2, clusterPair.Cluster1), out outvalue); } }
// get the distance value from a cluster pair. THIS METHOD DEPENDS ON THE EqualityComparer IMPLEMENTATION IN ClusterPair CLASS public double ReturnClusterPairDistance(ClusterPair clusterPair) { double clusterPairDistance = Double.MaxValue; // look in distance matrix if there is an input of cluster1 and cluster2 (remember that ClusterPair has two childs cluster1 and cluster2) if (_distanceMatrix.ContainsKey(clusterPair)) { clusterPairDistance = _distanceMatrix[clusterPair]; } else { clusterPairDistance = _distanceMatrix[new ClusterPair(clusterPair.Cluster2, clusterPair.Cluster1)]; // if not, look in distance matrix for an input of cluster2 and cluster1 (remember that distance matrix is symetric) } return(clusterPairDistance); }
// get the lowest distance in distance matrix public double GetLowestDistance() { double minDistance = double.MaxValue; ClusterPair closestClusterPair = new ClusterPair(); foreach (var item in _distanceMatrix) { if (item.Value < minDistance) { minDistance = item.Value; } } return(minDistance); }
// get the closest cluster pair (i.e., min cluster pair distance). it is also important to reduce computational time public ClusterPair GetClosestClusterPair() { double minDistance = double.MaxValue; ClusterPair closestClusterPair = new ClusterPair(); foreach (var item in _distanceMatrix) { if (item.Value < minDistance) { minDistance = item.Value; closestClusterPair = item.Key; } } return(closestClusterPair); }
private IEnumerable <ClusterPair> _ClusterPairCollection() { ClusterPair clusterPair; for (int i = 0; i < _clusters.Count(); i++) { for (int j = i + 1; j < _clusters.Count(); j++) { clusterPair = new ClusterPair(); clusterPair.Cluster1 = _clusters.GetCluster(i); clusterPair.Cluster2 = _clusters.GetCluster(j); yield return(clusterPair); } } }
// calcula a distancia entre todos os clusters e as armazena na matrix de dissimilaridade private void CreateDissimilarityMatrix() { double distanceBetweenTwoClusters; _dissimilarityMatrix = new DissimilarityMatrix(); ClusterPair clusterPair; for (int i = 0; i < _clusters.Count(); i++) { for (int j = i + 1; j < _clusters.Count(); j++) { clusterPair = new ClusterPair(); clusterPair.Cluster1 = _clusters.GetCluster(i); clusterPair.Cluster2 = _clusters.GetCluster(j); distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2); _dissimilarityMatrix.AddClusterPairAndDistance(clusterPair, distanceBetweenTwoClusters); } } }
//remove a cluster pair from the clustering data structure public void RemoveClusterPair(ClusterPair clusterPair) { this.RemoveCluster(clusterPair.Cluster1); this.RemoveCluster(clusterPair.Cluster2); }
public void AddClusterPairAndDistance(ClusterPair clusterPair, double distance) { _distanceMatrix.TryAdd(clusterPair, distance); }
public void AddClusterPairAndDistance(ClusterPair clusterPair, double distance) { _dendrogram.Add(clusterPair, distance); }