/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double [,] data) { int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double [] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); //System.Console.Out.Write(dataPoint[field] ); } cluster.Add(dataPoint); clusters.Add(cluster); } } while (stableClustersCount != clusters.Count) { stableClustersCount = 0; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { if ((KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)) == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(ClusterCollection clusters, double [,] data) { double [] dataPoint; double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for (int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if (clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for (int row = 0; row < rowCount; row++) { dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[row, field]), field); } for (int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; // System.Console.Out.WriteLine(cluster + ") ( " + clusterMean[0] + " , " + clusterMean[1] + " )"); if (cluster == 0) { firstClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean); position = cluster; } else { secondClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(dataPoint); // System.Console.Out.WriteLine(newClusters[position ].ClusterMean + " , " + newClusters[position ]); } return(newClusters); }