예제 #1
0
        /// <summary>
        /// calculateNearestCluster is a function that determines the nearest cluster and calculates the distance between those two clusters.
        /// </summary>
        /// <param name="Centroids">the centroids of the clusters</param>
        /// <param name="SamplesInClusters">number of samples in each cluster</param>
        /// <returns>Tuple of two Items: <br />
        /// - Item 1: contains the number of nearest cluster <br />
        /// - Item 2: contains the distance to the nearest cluster
        /// </returns>
        private static Tuple <int[], double[]> calculateNearestCluster(double[][] Centroids, int[] SamplesInClusters)
        {
            int[]    NearestClustersArray          = new int[Centroids.Length];
            double[] DistanceToNearestClusterArray = new double[Centroids.Length];
            int      Code;
            string   Message = "Function <calculateNearestCluster>: ";

            try
            {
                double curDistance;
                for (int i = 0; i < Centroids.Length; i++)
                {
                    //in case of empty cluster
                    if (SamplesInClusters[i] == 0)
                    {
                        NearestClustersArray[i]          = -1;
                        DistanceToNearestClusterArray[i] = -1;
                        continue;
                    }

                    DistanceToNearestClusterArray[i] = double.MaxValue;

                    for (int j = 0; j < Centroids.Length; j++)
                    {
                        if (i == j || SamplesInClusters[j] == 0)
                        {
                            continue;
                        }

                        curDistance = KMeans.calculateDistance(Centroids[i], Centroids[j]);

                        if (curDistance < DistanceToNearestClusterArray[i])
                        {
                            DistanceToNearestClusterArray[i] = curDistance;
                            NearestClustersArray[i]          = j;
                        }
                    }
                }

                return(Tuple.Create(NearestClustersArray, DistanceToNearestClusterArray));
            }
            catch (Exception Ex)
            {
                Code     = 400;
                Message += "Unhandled exception:\t" + Ex.ToString();
                throw new KMeansException(Code, Message);
            }
        }
예제 #2
0
        /// <summary>
        /// calculateNoreStatistics is a function that claculates statistics of a cluster. These statistics are dependent on other clusters.
        /// </summary>
        /// <param name="RawData">data to be clustered</param>
        /// <param name="DataToClusterMapping">contains the assigned cluster number for each sample of the RawData</param>
        /// <param name="Centroids">the centroids of the clusters</param>
        /// <param name="NearestCluster">nearest cluster number</param>
        /// <param name="NearestForeignSampleInNearestCluster">nearest sample belonging of the nearest cluster to this cluster's centroid</param>
        /// <param name="DistanceToNearestForeignSampleInNearestCluster">distance between the nearest sample of the nearest cluster and this cluster's centroid</param>
        /// <param name="NearestForeignSample">nearest sample not belonging to this cluster and this cluster's centroid</param>
        /// <param name="DistanceToNearestForeignSample">distance between the nearest foreign sample and this cluster's centroid</param>
        /// <param name="ClusterOfNearestForeignSample">the cluster to which the nearest foreign sample belongs</param>
        private static void calculateMoreStatistics(double[][] RawData, int[] DataToClusterMapping, double[][] Centroids, int[] NearestCluster, out double[][] NearestForeignSampleInNearestCluster, out double[] DistanceToNearestForeignSampleInNearestCluster, out double[][] NearestForeignSample, out double[] DistanceToNearestForeignSample, out int[] ClusterOfNearestForeignSample)
        {
            int    Code;
            string Message = "Function <calculateMoreStatistics>: ";

            try
            {
                NearestForeignSampleInNearestCluster           = new double[Centroids.Length][];
                DistanceToNearestForeignSampleInNearestCluster = new double[Centroids.Length];
                NearestForeignSample           = new double[Centroids.Length][];
                DistanceToNearestForeignSample = new double[Centroids.Length];
                ClusterOfNearestForeignSample  = new int[Centroids.Length];

                for (int i = 0; i < Centroids.Length; i++)
                {
                    //in case of empty cluster
                    if (NearestCluster[i] == -1)
                    {
                        NearestForeignSampleInNearestCluster[i] = null;
                        NearestForeignSample[i] = null;
                        DistanceToNearestForeignSampleInNearestCluster[i] = -1;
                        DistanceToNearestForeignSample[i] = -1;
                        ClusterOfNearestForeignSample[i]  = -1;
                    }
                    else
                    {
                        DistanceToNearestForeignSampleInNearestCluster[i] = double.MaxValue;
                        DistanceToNearestForeignSample[i] = double.MaxValue;
                    }
                }

                double curDistance;

                for (int i = 0; i < RawData.Length; i++)
                {
                    for (int j = 0; j < Centroids.Length; j++)
                    {
                        //skip if sample belong to the cluster itself or the cluster is empty
                        if (DataToClusterMapping[i] == j || NearestCluster[j] == -1)
                        {
                            continue;
                        }

                        curDistance = KMeans.calculateDistance(RawData[i], Centroids[j]);

                        if (curDistance < DistanceToNearestForeignSample[j])
                        {
                            DistanceToNearestForeignSample[j] = curDistance;
                            NearestForeignSample[j]           = RawData[i];
                            ClusterOfNearestForeignSample[j]  = DataToClusterMapping[i];
                        }

                        if (DataToClusterMapping[i] == NearestCluster[j])
                        {
                            if (curDistance < DistanceToNearestForeignSampleInNearestCluster[j])
                            {
                                DistanceToNearestForeignSampleInNearestCluster[j] = curDistance;
                                NearestForeignSampleInNearestCluster[j]           = RawData[i];
                            }
                        }
                    }
                }
            }
            catch (Exception Ex)
            {
                Code     = 400;
                Message += "Unhandled exception:\t" + Ex.ToString();
                throw new KMeansException(Code, Message);
            }
        }
예제 #3
0
        /// <summary>
        /// calculateStatistics is a function that claculates statistics and properties of a cluster. These statistics are independent on other clusters.
        /// </summary>
        /// <param name="Cluster">a cluster object</param>
        private static void calculateStatistics(Cluster cls)
        {
            int    Code;
            string Message = "Function <calculateStatistics>: ";

            try
            {
                int NumberOfSamples    = cls.ClusterData.Length;
                int NumberOfAttributes = cls.Centroid.Length;
                cls.ClusterDataDistanceToCentroid = new double[NumberOfSamples];
                cls.Mean = new double[NumberOfAttributes];
                cls.StandardDeviation    = new double[NumberOfAttributes];
                cls.InClusterMaxDistance = -1;

                //in case of empty cluster
                if (NumberOfSamples == 0)
                {
                    cls.InClusterFarthestSampleIndex = 0;
                    cls.InClusterMaxDistance         = 0;
                    cls.InClusterFarthestSample      = new double[NumberOfAttributes];

                    for (int j = 0; j < NumberOfAttributes; j++)
                    {
                        cls.Mean[j]     = 0;
                        cls.Centroid[j] = 0;
                        cls.InClusterFarthestSample[j] = 0;
                    }
                    cls.NearestCluster = -1;
                }
                else
                {
                    for (int i = 0; i < NumberOfSamples; i++)
                    {
                        //calculate distance for each sample
                        cls.ClusterDataDistanceToCentroid[i] = KMeans.calculateDistance(cls.ClusterData[i], cls.Centroid);
                        if (cls.ClusterDataDistanceToCentroid[i] > cls.InClusterMaxDistance)
                        {
                            //farthest sample
                            cls.InClusterFarthestSampleIndex = i;
                            cls.InClusterFarthestSample      = cls.ClusterData[i];
                            cls.InClusterMaxDistance         = cls.ClusterDataDistanceToCentroid[i];
                        }

                        for (int j = 0; j < NumberOfAttributes; j++)
                        {
                            cls.Mean[j] += cls.ClusterData[i][j] / NumberOfSamples;
                        }
                    }

                    double[] ClusterVariance = new double[NumberOfAttributes];

                    for (int i = 0; i < NumberOfSamples; i++)
                    {
                        for (int j = 0; j < NumberOfAttributes; j++)
                        {
                            ClusterVariance[j] += Math.Pow((cls.ClusterData[i][j] - cls.Mean[j]), 2) / NumberOfSamples;
                        }
                    }

                    for (int i = 0; i < NumberOfAttributes; i++)
                    {
                        cls.StandardDeviation[i] = Math.Sqrt(ClusterVariance[i]);
                    }
                }
            }
            catch (Exception Ex)
            {
                Code     = 400;
                Message += "Unhandled exception:\t" + Ex.ToString();
                throw new KMeansException(Code, Message);
            }
        }