Ejemplo n.º 1
0
        /// <summary>
        /// calculateStatistics is a function that claculates statistics and properties of a cluster. These statistics are independent on other clusters.
        /// </summary>
        /// <param name="Cluster">a cluster object</param>
        private static void calculateStatistics(Cluster cls)
        {
            int    Code;
            string Message = "Function <calculateStatistics>: ";

            try
            {
                int NumberOfSamples    = cls.ClusterData.Length;
                int NumberOfAttributes = cls.Centroid.Length;
                cls.ClusterDataDistanceToCentroid = new double[NumberOfSamples];
                cls.Mean = new double[NumberOfAttributes];
                cls.StandardDeviation    = new double[NumberOfAttributes];
                cls.InClusterMaxDistance = -1;

                //in case of empty cluster
                if (NumberOfSamples == 0)
                {
                    cls.InClusterFarthestSampleIndex = 0;
                    cls.InClusterMaxDistance         = 0;
                    cls.InClusterFarthestSample      = new double[NumberOfAttributes];

                    for (int j = 0; j < NumberOfAttributes; j++)
                    {
                        cls.Mean[j]     = 0;
                        cls.Centroid[j] = 0;
                        cls.InClusterFarthestSample[j] = 0;
                    }
                    cls.NearestCluster = -1;
                }
                else
                {
                    for (int i = 0; i < NumberOfSamples; i++)
                    {
                        //calculate distance for each sample
                        cls.ClusterDataDistanceToCentroid[i] = KMeans.calculateDistance(cls.ClusterData[i], cls.Centroid);
                        if (cls.ClusterDataDistanceToCentroid[i] > cls.InClusterMaxDistance)
                        {
                            //farthest sample
                            cls.InClusterFarthestSampleIndex = i;
                            cls.InClusterFarthestSample      = cls.ClusterData[i];
                            cls.InClusterMaxDistance         = cls.ClusterDataDistanceToCentroid[i];
                        }

                        for (int j = 0; j < NumberOfAttributes; j++)
                        {
                            cls.Mean[j] += cls.ClusterData[i][j] / NumberOfSamples;
                        }
                    }

                    double[] ClusterVariance = new double[NumberOfAttributes];

                    for (int i = 0; i < NumberOfSamples; i++)
                    {
                        for (int j = 0; j < NumberOfAttributes; j++)
                        {
                            ClusterVariance[j] += Math.Pow((cls.ClusterData[i][j] - cls.Mean[j]), 2) / NumberOfSamples;
                        }
                    }

                    for (int i = 0; i < NumberOfAttributes; i++)
                    {
                        cls.StandardDeviation[i] = Math.Sqrt(ClusterVariance[i]);
                    }
                }
            }
            catch (Exception Ex)
            {
                Code     = 400;
                Message += "Unhandled exception:\t" + Ex.ToString();
                throw new KMeansException(Code, Message);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// calculateNoreStatistics is a function that claculates statistics of a cluster. These statistics are dependent on other clusters.
        /// </summary>
        /// <param name="RawData">data to be clustered</param>
        /// <param name="DataToClusterMapping">contains the assigned cluster number for each sample of the RawData</param>
        /// <param name="Centroids">the centroids of the clusters</param>
        /// <param name="NearestCluster">nearest cluster number</param>
        /// <param name="NearestForeignSampleInNearestCluster">nearest sample belonging of the nearest cluster to this cluster's centroid</param>
        /// <param name="DistanceToNearestForeignSampleInNearestCluster">distance between the nearest sample of the nearest cluster and this cluster's centroid</param>
        /// <param name="NearestForeignSample">nearest sample not belonging to this cluster and this cluster's centroid</param>
        /// <param name="DistanceToNearestForeignSample">distance between the nearest foreign sample and this cluster's centroid</param>
        /// <param name="ClusterOfNearestForeignSample">the cluster to which the nearest foreign sample belongs</param>
        private static void calculateMoreStatistics(double[][] RawData, int[] DataToClusterMapping, double[][] Centroids, int[] NearestCluster, out double[][] NearestForeignSampleInNearestCluster, out double[] DistanceToNearestForeignSampleInNearestCluster, out double[][] NearestForeignSample, out double[] DistanceToNearestForeignSample, out int[] ClusterOfNearestForeignSample)
        {
            int    Code;
            string Message = "Function <calculateMoreStatistics>: ";

            try
            {
                NearestForeignSampleInNearestCluster           = new double[Centroids.Length][];
                DistanceToNearestForeignSampleInNearestCluster = new double[Centroids.Length];
                NearestForeignSample           = new double[Centroids.Length][];
                DistanceToNearestForeignSample = new double[Centroids.Length];
                ClusterOfNearestForeignSample  = new int[Centroids.Length];

                for (int i = 0; i < Centroids.Length; i++)
                {
                    //in case of empty cluster
                    if (NearestCluster[i] == -1)
                    {
                        NearestForeignSampleInNearestCluster[i] = null;
                        NearestForeignSample[i] = null;
                        DistanceToNearestForeignSampleInNearestCluster[i] = -1;
                        DistanceToNearestForeignSample[i] = -1;
                        ClusterOfNearestForeignSample[i]  = -1;
                    }
                    else
                    {
                        DistanceToNearestForeignSampleInNearestCluster[i] = double.MaxValue;
                        DistanceToNearestForeignSample[i] = double.MaxValue;
                    }
                }

                double curDistance;

                for (int i = 0; i < RawData.Length; i++)
                {
                    for (int j = 0; j < Centroids.Length; j++)
                    {
                        //skip if sample belong to the cluster itself or the cluster is empty
                        if (DataToClusterMapping[i] == j || NearestCluster[j] == -1)
                        {
                            continue;
                        }

                        curDistance = KMeans.calculateDistance(RawData[i], Centroids[j]);

                        if (curDistance < DistanceToNearestForeignSample[j])
                        {
                            DistanceToNearestForeignSample[j] = curDistance;
                            NearestForeignSample[j]           = RawData[i];
                            ClusterOfNearestForeignSample[j]  = DataToClusterMapping[i];
                        }

                        if (DataToClusterMapping[i] == NearestCluster[j])
                        {
                            if (curDistance < DistanceToNearestForeignSampleInNearestCluster[j])
                            {
                                DistanceToNearestForeignSampleInNearestCluster[j] = curDistance;
                                NearestForeignSampleInNearestCluster[j]           = RawData[i];
                            }
                        }
                    }
                }
            }
            catch (Exception Ex)
            {
                Code     = 400;
                Message += "Unhandled exception:\t" + Ex.ToString();
                throw new KMeansException(Code, Message);
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// RecommendedNumberOfClusters is a function that gives a recommended number of clusters for the given samples based on some provided methods.
        /// </summary>
        /// <param name="rawData">The samples to be clustered</param>
        /// <param name="kmeansMaxIterations">Maximum allowed number of Kmeans iteration for clustering</param>
        /// <param name="kmeansAlgorithm">The desired Kmeans clustering algorithm (1 or 2)
        /// <ul style="list-style-type:none">
        /// <li> - 1: Centoids are the nearest samples to the means</li>
        /// <li> - 2: Centoids are the means</li>
        /// </ul></param>
        /// <param name="numberOfAttributes">Number of attributes for each sample</param>
        /// <param name="maxNumberOfClusters">Maximum desired number of clusters</param>
        /// <param name="minNumberOfClusters">Minimum desired number of clusters</param>
        /// <param name="method">Integer 0,1,2 or 3 representing the method to be used
        /// <ul style = "list-style-type:none" >
        /// <li> - Method 0: Radial method in which the farthest sample of each cluster must be closer to the cluster centoid than the nearest foreign sample of the other clusters </li>
        /// <li> - Method 1: Standard Deviation method in which the standard deviation in each cluster must be less than the desired standard deviation </li>
        /// <li> - Method 2: Both. uses radial and standard deviation methods at the same time </li>
        /// <li> - Method 3: Balanced clusters method in which the clusters contain the closest number of samples</li>
        /// </ul>
        /// </param>
        /// <param name="standardDeviation">The desired standard deviation upper limit in each cluster</param>
        /// <param name="recommendedNumbersOfCluster">The variable through which the recommended number of clusters is returned</param>
        /// <param name="centroids">Initial Centroids</param>
        /// <returns>The recommended number of clusters for the given samples based on the specified method.</returns>
        public int RecommendedNumberOfClusters(double[][] rawData, int kmeansMaxIterations, int numberOfAttributes, int maxNumberOfClusters, int minNumberOfClusters, int method, double[] standardDeviation, int kmeansAlgorithm = 1, double[][] centroids = null)
        {
            int    recommendedNumbersOfCluster;
            int    Code;
            string Message = "Function <RecommendedNumberOfClusters>: ";

            try
            {
                //some checks
                if (maxNumberOfClusters < 2)
                {
                    Code     = 104;
                    Message += "Maximum number of clusters must be at least 2";
                    throw new KMeansException(Code, Message);
                }

                int MaxClusters = Math.Min(rawData.Length, maxNumberOfClusters);

                if (minNumberOfClusters < 2)
                {
                    minNumberOfClusters = 2;
                }

                if (method > 3 || method < 0)
                {
                    Code     = 122;
                    Message += "Method must be either 0,1,2 or 3";
                    throw new KMeansException(Code, Message);
                }

                if ((method == 1 || method == 2) && standardDeviation == null)
                {
                    Code     = 123;
                    Message += "Parameter StdDev is needed";
                    throw new KMeansException(Code, Message);
                }

                if (kmeansMaxIterations < 1)
                {
                    Code     = 108;
                    Message += "Unacceptable number of maximum iterations";
                    throw new KMeansException(Code, Message);
                }

                if (rawData == null)
                {
                    Code     = 100;
                    Message += "RawData is null";
                    throw new KMeansException(Code, Message);
                }

                if (numberOfAttributes < 1)
                {
                    Code     = 107;
                    Message += "Unacceptable number of attributes. Must be at least 1";
                    throw new KMeansException(Code, Message);
                }

                if (kmeansAlgorithm != 2)
                {
                    kmeansAlgorithm = 1;
                }

                //checks that all the samples have same number of attributes
                KMeans.verifyRawDataConsistency(rawData, numberOfAttributes);


                double[][] Centroids;
                int        IterationReached = -1;
                int[]      kMeansResponse;
                Cluster[]  cluster;
                bool       isRadial, isStandardDeviation;
                double[]   balancedError = new double[MaxClusters - minNumberOfClusters + 1];

                for (int i = minNumberOfClusters; i <= MaxClusters; i++)
                {
                    //cluster the data with number of clusters equals to i
                    kMeansResponse = KMeans.runKMeansAlgorithm(rawData, i, numberOfAttributes, kmeansMaxIterations, kmeansAlgorithm, centroids, out Centroids, out IterationReached);

                    cluster = ClusteringResults.CreateClusteringResult(rawData, kMeansResponse, Centroids, i);

                    isRadial = true;

                    isStandardDeviation = true;

                    if (method == 0 || method == 2)
                    {
                        //radial method check
                        isRadial = radialClustersCheck(cluster);
                    }

                    if (method == 1 || method == 2)
                    {
                        //standard deviation check
                        isStandardDeviation = stdDeviationClustersCheck(cluster, standardDeviation);
                    }

                    if (method == 3)
                    {
                        //start balanced check
                        balancedError[i - minNumberOfClusters] = 0;
                        double[] countSamples = new double[i];
                        double   average      = 0;
                        for (int c = 0; c < i; c++)
                        {
                            countSamples[c] = cluster[c].ClusterData.Length;
                            average         = average + countSamples[c] / i;
                        }
                        for (int c = 0; c < i; c++)
                        {
                            //error calculation
                            balancedError[i - minNumberOfClusters] = balancedError[i - minNumberOfClusters] + Math.Pow(countSamples[c] - average, 2) / i;
                        }
                    }
                    else if (isRadial && isStandardDeviation)
                    {
                        recommendedNumbersOfCluster = i;

                        //return new AnomalyDetectionResponse(0, "OK");
                        return(recommendedNumbersOfCluster);
                    }
                }

                if (method == 3)
                {
                    // get minimum value (most balanced solution)
                    int minIndex = 0;
                    for (int l = 1; l < balancedError.Length; l++)
                    {
                        if (balancedError[l] < balancedError[minIndex])
                        {
                            minIndex = l;
                        }
                    }

                    recommendedNumbersOfCluster = minIndex + minNumberOfClusters;

                    //return new AnomalyDetectionResponse(0, "OK");
                    return(recommendedNumbersOfCluster);
                }

                ///// find a way to throw the response
                recommendedNumbersOfCluster = 0;

                //return new AnomalyDetectionResponse(1, "Could not find a recommended number of clusters based on the desired constraints");
                return(recommendedNumbersOfCluster);
            }
            catch (Exception Ex)
            {
                Code     = 400;
                Message += "Unhandled exception:\t" + Ex.ToString();
                throw new KMeansException(Code, Message);
            }
        }