Exemple #1
0
        /// <summary>
        /// initialize first iteration by choosing random mean points within the given array of points
        /// </summary>
        /// <param name="points"></param>
        /// <returns></returns>
        virtual protected KMeansCluster[] pickStartingClusters(int[] points, int numClusters)
        {
            Random rnd = new Random();

            KMeansCluster[] currentClusters = new KMeansCluster[numClusters];
            for (int i = 0; i < numClusters; i++)
            {
                currentClusters[i] = new KMeansCluster(points[rnd.Next(points.Length)]);
            }
            return(currentClusters);
        }
Exemple #2
0
        /// <summary>
        /// initialize first iteration by choosing first mean point at random and next ones with the KMeans++ algorithm
        /// </summary>
        /// <param name="points"></param>
        /// <returns></returns>
        /// <remarks>
        /// The exact algorithm is as follows:
        /// 1 - Choose one center uniformly at random from among the data points.
        /// 2 - For each data point x, compute D(x), the distance between x and the nearest center that has already been chosen.
        /// 3 - Choose one new data point at random as a new center, using a weighted probability distribution where a point x is chosen with probability proportional to D(x)2.
        /// 4 - Repeat Steps 2 and 3 until k centers have been chosen.
        /// 5 - Now that the initial centers have been chosen, proceed using standard k-means clustering.
        /// </remarks>
        override protected KMeansCluster[] pickStartingClusters(int[] points, int numClusters)
        {
            Random rnd = new Random();

            KMeansCluster[] currentClusters = new KMeansCluster[numClusters];
            // first item is chosen randomly
            currentClusters[0] = new KMeansCluster(points[rnd.Next(points.Length)]);

            for (int i = 1; i < numClusters; i++)
            {
                // compute the total of squared distances of each point compared to existing clusters
                float accumulatedDistances = 0.0f;
                // store results of the first loop into this array
                float[] accDistances = new float[points.Length];
                for (int pointIdx = 0; pointIdx < points.Length; pointIdx++)
                {
                    // find the minimum distance between the current point and all existing clusters
                    float minDistance = currentClusters[0].distanceFromMean(points[pointIdx]);
                    for (int clusterIdx = 1; clusterIdx < i; clusterIdx++)
                    {
                        float currentDistance = currentClusters[clusterIdx].distanceFromMean(points[pointIdx]);
                        if (currentDistance < minDistance)
                        {
                            minDistance = currentDistance;
                        }
                    }
                    // accumulate squared min distance
                    // note: points already used in previous clusters will have zero distance, so they will not be picked in
                    // the following loop as they have the same accDistances value as the previous point
                    accumulatedDistances  += minDistance * minDistance;
                    accDistances[pointIdx] = accumulatedDistances;
                }
                // pick a random point in the distribution of squared min distances
                float targetPoint = (float)rnd.NextDouble() * accumulatedDistances;
                // create new cluster using this point as mean
                for (int pointIdx = 0; pointIdx < points.Length; pointIdx++)
                {
                    if (accDistances[pointIdx] >= targetPoint)
                    {
                        currentClusters[i] = new KMeansCluster(points[pointIdx]);
                        break;
                    }
                }
            }
            return(currentClusters);
        }
Exemple #3
0
        /// <summary>
        /// classify array of points into numCluster clusters
        /// </summary>
        /// <param name="points"></param>
        /// <param name="numCluster">number of clusters to divide </param>
        /// <returns>array of clusters</returns>
        public KMeansCluster[] Run(int[] points, int numClusters)
        {
            KMeansCluster[] currentClusters = pickStartingClusters(points, numClusters);
            // iteratively improve the clusters by moving points to the cluster with the nearby centroid
            while (true)
            {
                KMeansCluster[] newClusters = new KMeansCluster[numClusters];
                for (int i = 0; i < numClusters; i++)
                {
                    newClusters[i] = new KMeansCluster();
                }

                assingPointsToClusters(points, currentClusters, newClusters);
                dumpClustersToConsole(newClusters);
                if (isStable(currentClusters, newClusters))
                {
                    return(newClusters);
                }
                currentClusters = newClusters;
            }
        }