Exemplo n.º 1
0
        public static List <DataPoint> KMeansAlgorithm(List <DataPoint> points, int k)
        {
            if (k < 1)
            {
                throw new Exception("K must be greater than 0");
            }

            Debug.Log("Initial set:");
            foreach (DataPoint p in points)
            {
                Debug.Log(" " + p);
            }
            Debug.Log("");

            double minX = double.MaxValue, minY = double.MaxValue;
            double maxX = double.MinValue, maxY = double.MinValue;

            //searching for boundaries of coordinates
            for (int i = 0; i < points.Count(); i++)
            {
                if (points[i].X < minX)
                {
                    minX = points[i].X;
                }
                if (points[i].X > maxX)
                {
                    maxX = points[i].X;
                }
                if (points[i].Y < minY)
                {
                    minY = points[i].Y;
                }
                if (points[i].Y > maxY)
                {
                    maxY = points[i].Y;
                }
            }
            //Debug.Log("Borders: xє[{0},{1}] ; yє[{2},{3}]", minX, maxX, minY, maxY);

            //initializing k centroids randomly
            DataPoint[]   centroids = new DataPoint[k];
            System.Random random = new System.Random();
            double        centrX, centrY = 0;

            Debug.Log("Centroids ");
            for (int i = 0; i < k; i++)
            {
                centrX       = random.NextDouble() * (maxX - minX) + minX;
                centrY       = random.NextDouble() * (maxY - minY) + minY;
                centroids[i] = new DataPoint(centrX, centrY);
                Debug.Log(centroids[i] + " ");
            }
            Debug.Log("");

            bool centroidsChanged = true;
            int  counter          = 0;

            //while centroids are changing their positions
            while (centroidsChanged)
            {
                //binding each point to the nearest centroid
                for (int i = 0; i < points.Count(); i++)
                {
                    double minDist = int.MaxValue;
                    double dist    = 0;
                    for (int c = 0; c < k; c++)
                    {
                        dist = Math.Sqrt(Math.Pow(points[i].X - centroids[c].X, 2) + Math.Pow(points[i].Y - centroids[c].Y, 2));
                        if (dist < minDist)
                        {
                            points[i].Cluster = c;
                            minDist           = dist;
                        }
                    }
                }

                int[]    clusterSize = new int[k];
                double[] xSum        = new double[k];
                double[] ySum        = new double[k];

                string[] clusterPoints = new string[k];

                //calculating mean point for each cluster
                for (int i = 0; i < points.Count(); i++)
                {
                    int currCluster = points[i].Cluster;
                    clusterSize[currCluster]++;
                    xSum[currCluster] += points[i].X;
                    ySum[currCluster] += points[i].Y;
                    if (clusterPoints[points[i].Cluster] == null)
                    {
                        clusterPoints[points[i].Cluster] = "";
                    }
                    clusterPoints[points[i].Cluster] += " " + points[i].ToString();
                }

                //printing clusters
                for (int i = 0; i < k; i++)
                {
                    Debug.Log("Cluster " + i + " : " + clusterPoints[i]);
                }

                int centroidsChangedCounter = 0;

                //setting new mean points as centroids
                Debug.Log("Centroids ");
                for (int i = 0; i < k; i++)
                {
                    DataPoint dp = new DataPoint(xSum[i] / clusterSize[i], ySum[i] / clusterSize[i]);
                    if (double.IsNaN(dp.X) || double.IsNaN(dp.Y))
                    {
                        throw new Exception("K value is too large for this set");
                    }
                    else if (dp != centroids[i])
                    {
                        centroids[i] = dp;
                        centroidsChangedCounter++;
                    }
                    Debug.Log(centroids[i] + " ");
                }
                Debug.Log("");

                //if new centroids are the same as old - stop algorithm
                if (centroidsChangedCounter == 0)
                {
                    centroidsChanged = false;
                }

                counter++;
            }

            Debug.Log("RESULT:");

            string[] resultClusters = new string[k];

            for (int i = 0; i < points.Count(); i++)
            {
                resultClusters[points[i].Cluster] += " " + points[i].ToString();
            }

            for (int i = 0; i < k; i++)
            {
                Debug.Log("Cluster " + i + " :" + resultClusters[i]);
            }

            return(points);
        }
 private static int CompareByClusterIndex(DataPoint a, DataPoint b)
 {
     return(a.ClusterIndex.CompareTo(b.ClusterIndex));
 }