//this functionality is usefull to see the distance matrix computed and to use as in input to other clustering algorithms implementation (R or Python) public void CreateCSVMatrixFile(string path) { File.Delete(path); this._BuildSingletonCluster(); StringBuilder matrix = new StringBuilder(); string headerLine = "AggloCluster"; foreach (Cluster cluster in _clusters) { headerLine = headerLine + ", Cluster" + cluster.Id; } bool writeBlank = false; matrix.Append(headerLine); double distanceBetweenTwoClusters; ClusterPair clusterPair; for (int i = 0; i < _clusters.Count(); i++) { matrix.Append("\r\n"); matrix.Append("Cluster" + _clusters.GetCluster(i).Id); writeBlank = false; for (int j = 0; j < _clusters.Count(); j++) { clusterPair = new ClusterPair(); clusterPair.Cluster1 = _clusters.GetCluster(i); clusterPair.Cluster2 = _clusters.GetCluster(j); distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2); if (distanceBetweenTwoClusters == 0) { writeBlank = true; matrix.Append(",0"); } else { if (writeBlank) { matrix.Append("," + string.Empty); } else { matrix.Append("," + distanceBetweenTwoClusters); } } } } File.AppendAllText(path, matrix.ToString()); }
// compute the distance between all pair of clusters and store it on the dissimilarity matrix. this algorithm step is done using parallelization to improve performance. private void _BuildDissimilarityMatrixParallel() { double distanceBetweenTwoClusters; _dissimilarityMatrix = new DissimilarityMatrix(); Parallel.ForEach(_ClusterPairCollection(), clusterPair => { distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2); _dissimilarityMatrix.AddClusterPairAndDistance(clusterPair, distanceBetweenTwoClusters); }); }
// update dissimilarity matrix with the distance of the new formed cluster private void _UpdateDissimilarityMatrix(Cluster newCluster, ClusterDistance.Strategy strategie) { double distanceBetweenClusters; for (int i = 0; i < _clusters.Count(); i++) { // compute the distance between old clusters to the new cluster distanceBetweenClusters = ClusterDistance.ComputeDistance(_clusters.GetCluster(i), newCluster, _dissimilarityMatrix, strategie); // insert the new cluster's distance _dissimilarityMatrix.AddClusterPairAndDistance(new ClusterPair(newCluster, _clusters.GetCluster(i)), distanceBetweenClusters); //remove all old distance values of the old clusters (subclusters of the newcluster) _dissimilarityMatrix.RemoveClusterPair(new ClusterPair(newCluster.GetSubCluster(0), _clusters.GetCluster(i))); _dissimilarityMatrix.RemoveClusterPair(new ClusterPair(newCluster.GetSubCluster(1), _clusters.GetCluster(i))); } // finally, remove the distance of the old cluster pair _dissimilarityMatrix.RemoveClusterPair(new ClusterPair(newCluster.GetSubCluster(0), newCluster.GetSubCluster(1))); }
// calcula a distancia entre todos os clusters e as armazena na matrix de dissimilaridade private void CreateDissimilarityMatrix() { double distanceBetweenTwoClusters; _dissimilarityMatrix = new DissimilarityMatrix(); ClusterPair clusterPair; for (int i = 0; i < _clusters.Count(); i++) { for (int j = i + 1; j < _clusters.Count(); j++) { clusterPair = new ClusterPair(); clusterPair.Cluster1 = _clusters.GetCluster(i); clusterPair.Cluster2 = _clusters.GetCluster(j); distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2); _dissimilarityMatrix.AddClusterPairAndDistance(clusterPair, distanceBetweenTwoClusters); } } }