/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(ClusterCollection clusters, double [][] data) { double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.Length; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for(int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if(clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for( int row = 0; row < rowCount; row++) { for(int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if(cluster == 0) { firstClusterDistance = this.EuclideanDistance(data[row], clusterMean); position = cluster; } else { secondClusterDistance = this.EuclideanDistance(data[row], clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(data[row]); } return newClusters; }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(int clusterCount, double [][] data) { int rowCount = data.Length; int stableClustersCount = 0; Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); //setup seed clusters for (int i = 0; i < clusterCount; i++) { cluster = new Cluster(); cluster.Add(data[i]); clusters.Add(cluster); } DateTime start = DateTime.Now; Console.WriteLine("Start clustering {0} objects into {1} clusters: {2}", rowCount.ToString(), clusterCount.ToString(), start.ToLongTimeString()); //do actual clustering int iterationCount = 0; while (stableClustersCount != clusters.Count) { iterationCount++; stableClustersCount = 0; //Do actual clustering //Console.WriteLine("Start Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); ClusterCollection newClusters = this.ClusterDataSet(clusters, data); //Console.WriteLine(" End Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double[] originalClusterMean = clusters[clusterIndex].ClusterMean; double[] newClusterMean = newClusters[clusterIndex].ClusterMean; double distance = this.EuclideanDistance(newClusterMean, originalClusterMean); if (distance ==0) { stableClustersCount++; //Console.WriteLine("{0} stable clusters out of {1}", stableClustersCount.ToString(), clusterCount.ToString()); } } clusters = newClusters; } DateTime end = DateTime.Now; TimeSpan span = end - start; Console.WriteLine("End clustering {0} objects into {1} clusters with {2} iterations: {3}", rowCount.ToString(), clusterCount.ToString(), iterationCount, end.ToLongTimeString()); Console.WriteLine("Clustering {0} objects into {1} clusters took {2} seconds", rowCount.ToString(), clusterCount.ToString(), span.TotalSeconds); Console.WriteLine(); return clusters; }
public ClusterCollection RandomSeeding(int k, double[][] data) { int size = data.Length; double[][] seeds = new double[k][]; Random random = new Random(); Hashtable random_table = new Hashtable(); Cluster cluster = null; ClusterCollection init_clusters = new ClusterCollection(); for (int i = 0; i < k; ) { int r = random.Next(size - 1); if(!random_table.ContainsKey(r)) { random_table.Add(r,0); seeds[i] = new double[3]; seeds[i][0]=data[r][0];seeds[i][1]=data[r][1];seeds[i][2]=data[r][2]; cluster = new Cluster(); cluster.Add(seeds[i]); init_clusters.Add(cluster); i++; } } return init_clusters; }
/// <summary> /// Adds a Cluster to the collection of Clusters /// </summary> /// <param name="cluster">A Cluster to be added to the collection of clusters</param> public virtual void Add(Cluster cluster) { this.List.Add(cluster); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(ClusterCollection clusters, double[][] data) { int rowCount = data.Length; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for(int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if(clusters.Count <= 0) throw new SystemException("Cluster Count Cannot Be Zero!"); //break data points into n groups int remainder = rowCount % threads; int numPerThread = rowCount / threads; int start = 0; IAsyncResult[] asyncResults = new IAsyncResult[threads]; WaitHandle[] handles = new WaitHandle[threads]; for (int i = 0; i < threads; i++) { if (i > 0) start += numPerThread; if (i == threads-1) numPerThread += remainder; asyncResults[i] = clusterDelegate.BeginInvoke(clusters, data, start, numPerThread, null, null); handles[i] = asyncResults[i].AsyncWaitHandle; } int index = 0; foreach (IAsyncResult asyncResult in asyncResults) { int[] destinationCluster = clusterDelegate.EndInvoke(asyncResult); for (int i = 0; i < destinationCluster.Length; i++) newClusters[ destinationCluster[i] ].Add(data[index++]); } return newClusters; }