private void BuildHierarchicalClustering(int indexNewCluster, ClusterDistance.Strategy strategy, int k)
        {
            ClusterPair closestClusterPair = this._GetClosestClusterPairInDissimilarityMatrix();

            // create a new cluster by merge the closest cluster pair
            Cluster newCluster = new Cluster();

            newCluster.AddSubCluster(closestClusterPair.Cluster1);
            newCluster.AddSubCluster(closestClusterPair.Cluster2);
            newCluster.Id = indexNewCluster;
            newCluster.UpdateTotalQuantityOfPatterns(); //update the total quantity of patterns of the new cluster (this quantity is used by UPGMA clustering strategy)

            //remove the closest cluster pair from the clustering data structure (clusters)
            _clusters.RemoveClusterPair(closestClusterPair);
            _UpdateDissimilarityMatrix(newCluster, strategy);
            //add the new cluster to clustering
            _clusters.AddCluster(newCluster);
            closestClusterPair = null;

            // recursive call of this method while there is more than 1 cluster (k>2) in the clustering
            if (_clusters.Count() > k)
            {
                this.BuildHierarchicalClustering(indexNewCluster + 1, strategy, k);
            }
        }
        //this functionality is usefull to see the distance matrix computed and to use as in input to other clustering algorithms implementation (R or Python)
        public void CreateCSVMatrixFile(string path)
        {
            File.Delete(path);
            this._BuildSingletonCluster();

            StringBuilder matrix     = new StringBuilder();
            string        headerLine = "AggloCluster";

            foreach (Cluster cluster in _clusters)
            {
                headerLine = headerLine + ", Cluster" + cluster.Id;
            }

            bool writeBlank = false;

            matrix.Append(headerLine);

            double      distanceBetweenTwoClusters;
            ClusterPair clusterPair;

            for (int i = 0; i < _clusters.Count(); i++)
            {
                matrix.Append("\r\n");
                matrix.Append("Cluster" + _clusters.GetCluster(i).Id);
                writeBlank = false;

                for (int j = 0; j < _clusters.Count(); j++)
                {
                    clusterPair          = new ClusterPair();
                    clusterPair.Cluster1 = _clusters.GetCluster(i);
                    clusterPair.Cluster2 = _clusters.GetCluster(j);

                    distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2);

                    if (distanceBetweenTwoClusters == 0)
                    {
                        writeBlank = true;
                        matrix.Append(",0");
                    }
                    else
                    {
                        if (writeBlank)
                        {
                            matrix.Append("," + string.Empty);
                        }
                        else
                        {
                            matrix.Append("," + distanceBetweenTwoClusters);
                        }
                    }
                }
            }

            File.AppendAllText(path, matrix.ToString());
        }
예제 #3
0
        public void RemoveClusterPair(ClusterPair clusterPair)
        {
            double outvalue;

            if (_distanceMatrix.ContainsKey(clusterPair))
            {
                _distanceMatrix.TryRemove(clusterPair, out outvalue);
            }
            else
            {
                _distanceMatrix.TryRemove(new ClusterPair(clusterPair.Cluster2, clusterPair.Cluster1), out outvalue);
            }
        }
예제 #4
0
        // get the distance value from a cluster pair. THIS METHOD DEPENDS ON THE EqualityComparer IMPLEMENTATION IN ClusterPair CLASS
        public double ReturnClusterPairDistance(ClusterPair clusterPair)
        {
            double clusterPairDistance = Double.MaxValue;

            // look in distance matrix if there is an input of cluster1 and cluster2 (remember that ClusterPair has two childs cluster1 and cluster2)
            if (_distanceMatrix.ContainsKey(clusterPair))
            {
                clusterPairDistance = _distanceMatrix[clusterPair];
            }
            else
            {
                clusterPairDistance = _distanceMatrix[new ClusterPair(clusterPair.Cluster2, clusterPair.Cluster1)]; // if not, look in distance matrix for an input of cluster2 and cluster1 (remember that distance matrix is symetric)
            }
            return(clusterPairDistance);
        }
예제 #5
0
        // get the lowest distance in distance matrix
        public double GetLowestDistance()
        {
            double      minDistance        = double.MaxValue;
            ClusterPair closestClusterPair = new ClusterPair();

            foreach (var item in _distanceMatrix)
            {
                if (item.Value < minDistance)
                {
                    minDistance = item.Value;
                }
            }

            return(minDistance);
        }
예제 #6
0
        // get the closest cluster pair (i.e., min cluster pair distance). it is also important to reduce computational time
        public ClusterPair GetClosestClusterPair()
        {
            double      minDistance        = double.MaxValue;
            ClusterPair closestClusterPair = new ClusterPair();

            foreach (var item in _distanceMatrix)
            {
                if (item.Value < minDistance)
                {
                    minDistance        = item.Value;
                    closestClusterPair = item.Key;
                }
            }

            return(closestClusterPair);
        }
        private IEnumerable <ClusterPair> _ClusterPairCollection()
        {
            ClusterPair clusterPair;

            for (int i = 0; i < _clusters.Count(); i++)
            {
                for (int j = i + 1; j < _clusters.Count(); j++)
                {
                    clusterPair          = new ClusterPair();
                    clusterPair.Cluster1 = _clusters.GetCluster(i);
                    clusterPair.Cluster2 = _clusters.GetCluster(j);

                    yield return(clusterPair);
                }
            }
        }
        // calcula a distancia entre todos os clusters e as armazena na matrix de dissimilaridade
        private void CreateDissimilarityMatrix()
        {
            double distanceBetweenTwoClusters;

            _dissimilarityMatrix = new DissimilarityMatrix();
            ClusterPair clusterPair;

            for (int i = 0; i < _clusters.Count(); i++)
            {
                for (int j = i + 1; j < _clusters.Count(); j++)
                {
                    clusterPair          = new ClusterPair();
                    clusterPair.Cluster1 = _clusters.GetCluster(i);
                    clusterPair.Cluster2 = _clusters.GetCluster(j);

                    distanceBetweenTwoClusters = ClusterDistance.ComputeDistance(clusterPair.Cluster1, clusterPair.Cluster2);
                    _dissimilarityMatrix.AddClusterPairAndDistance(clusterPair, distanceBetweenTwoClusters);
                }
            }
        }
예제 #9
0
 //remove a cluster pair from the clustering data structure
 public void RemoveClusterPair(ClusterPair clusterPair)
 {
     this.RemoveCluster(clusterPair.Cluster1);
     this.RemoveCluster(clusterPair.Cluster2);
 }
예제 #10
0
 public void AddClusterPairAndDistance(ClusterPair clusterPair, double distance)
 {
     _distanceMatrix.TryAdd(clusterPair, distance);
 }
 public void AddClusterPairAndDistance(ClusterPair clusterPair, double distance)
 {
     _dendrogram.Add(clusterPair, distance);
 }