/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, IList<IDataList> data) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.Count; int fieldCount = data[0].Data.Length; int stableClustersCount = 0; int iterationCount = 0; double[] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); cluster.Add(data[clusterNumber]); clusters.Add(cluster); } } while (stableClustersCount != clusters.Count) { stableClustersCount = 0; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { if ((KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)) == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return clusters; }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(ClusterCollection clusters, IList<IDataList> data) { double[] dataPoint; double[] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.Count; int fieldCount = data[0].Data.Length; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for (int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if (clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for (int row = 0; row < rowCount; row++) { IDataList rowData = data[row]; for (int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if (cluster == 0) { firstClusterDistance = KMeans.EuclideanDistance(rowData.Data, clusterMean); position = cluster; } else { secondClusterDistance = KMeans.EuclideanDistance(rowData.Data, clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(rowData); } return newClusters; }
/// <summary> /// Adds a Cluster to the collection of Clusters /// </summary> /// <param name="cluster">A Cluster to be added to the collection of clusters</param> public virtual void Add(Cluster cluster) { this.List.Add(cluster); }