public ClusterCollection RandomSeeding(int k, double[][] data) { int size = data.Length; double[][] seeds = new double[k][]; Random random = new Random(); Hashtable random_table = new Hashtable(); Cluster cluster = null; ClusterCollection init_clusters = new ClusterCollection(); for (int i = 0; i < k;) { int r = random.Next(size - 1); if (!random_table.ContainsKey(r)) { random_table.Add(r, 0); seeds[i] = new double[3]; seeds[i][0] = data[r][0]; seeds[i][1] = data[r][1]; seeds[i][2] = data[r][2]; cluster = new Cluster(); cluster.Add(seeds[i]); init_clusters.Add(cluster); i++; } } return(init_clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(int clusterCount, double [][] data) { int rowCount = data.Length; int stableClustersCount = 0; Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); //setup seed clusters for (int i = 0; i < clusterCount; i++) { cluster = new Cluster(); cluster.Add(data[i]); clusters.Add(cluster); } //DateTime start = DateTime.Now; //Console.WriteLine("Start clustering {0} objects into {1} clusters: {2}", rowCount.ToString(), clusterCount.ToString(), start.ToLongTimeString()); //do actual clustering int iterationCount = 0; while (stableClustersCount != clusters.Count) { iterationCount++; stableClustersCount = 0; //Do actual clustering //Console.WriteLine("Start Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); ClusterCollection newClusters = this.ClusterDataSet(clusters, data); //Console.WriteLine(" End Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double[] originalClusterMean = clusters[clusterIndex].ClusterMean; double[] newClusterMean = newClusters[clusterIndex].ClusterMean; double distance = this.EuclideanDistance(newClusterMean, originalClusterMean); if (distance == 0) { stableClustersCount++; //Console.WriteLine("{0} stable clusters out of {1}", stableClustersCount.ToString(), clusterCount.ToString()); } } clusters = newClusters; } //DateTime end = DateTime.Now; //TimeSpan span = end - start; //Console.WriteLine("End clustering {0} objects into {1} clusters with {2} iterations: {3}", rowCount.ToString(), clusterCount.ToString(), iterationCount, end.ToLongTimeString()); //Console.WriteLine("Clustering {0} objects into {1} clusters took {2} seconds", rowCount.ToString(), clusterCount.ToString(), span.TotalSeconds); //Console.WriteLine(); return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(ClusterCollection clusters, double [][] data) { double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.Length; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for (int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if (clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for (int row = 0; row < rowCount; row++) { for (int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if (cluster == 0) { firstClusterDistance = this.EuclideanDistance(data[row], clusterMean); position = cluster; } else { secondClusterDistance = this.EuclideanDistance(data[row], clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(data[row]); } return(newClusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(ClusterCollection clusters, double[][] data) { int rowCount = data.Length; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for (int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if (clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //break data points into n groups int remainder = rowCount % threads; int numPerThread = rowCount / threads; int start = 0; IAsyncResult[] asyncResults = new IAsyncResult[threads]; WaitHandle[] handles = new WaitHandle[threads]; for (int i = 0; i < threads; i++) { if (i > 0) { start += numPerThread; } if (i == threads - 1) { numPerThread += remainder; } asyncResults[i] = clusterDelegate.BeginInvoke(clusters, data, start, numPerThread, null, null); handles[i] = asyncResults[i].AsyncWaitHandle; } int index = 0; foreach (IAsyncResult asyncResult in asyncResults) { int[] destinationCluster = clusterDelegate.EndInvoke(asyncResult); for (int i = 0; i < destinationCluster.Length; i++) { newClusters[destinationCluster[i]].Add(data[index++]); } } return(newClusters); }
public int[] ClusterPartialDataSet(ClusterCollection clusters, double[][] data, int start, int count) { try { double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int[] destinationCluster = new int[count]; //((20+30)/2), ((170+160)/2), ((80+120)/2) for (int row = start; row < start + count; row++) { int position = 0; for (int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if (cluster == 0) { firstClusterDistance = this.EuclideanDistance(data[row], clusterMean); position = cluster; } else { secondClusterDistance = this.EuclideanDistance(data[row], clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } destinationCluster[row - start] = position; } return(destinationCluster); } catch (Exception ex) { Console.WriteLine(ex.Message); throw; } }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSetRandomSeeding(int k, double[][] data) { ClusterCollection clusters = RandomSeeding(k, data); int rowCount = data.Length; int stableClustersCount = 0; int clusterCount = clusters.Count; //DateTime start = DateTime.Now; //Console.WriteLine("Start clustering {0} objects into {1} clusters: {2}", rowCount.ToString(), clusterCount.ToString(), start.ToLongTimeString()); //do actual clustering int iterationCount = 0; while (stableClustersCount != clusters.Count) { iterationCount++; stableClustersCount = 0; //Do actual clustering //Console.WriteLine("Start Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); ClusterCollection newClusters = this.ClusterDataSet(clusters, data); //Console.WriteLine(" End Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double[] originalClusterMean = clusters[clusterIndex].ClusterMean; double[] newClusterMean = newClusters[clusterIndex].ClusterMean; double distance = this.EuclideanDistance(newClusterMean, originalClusterMean); if (distance == 0) { stableClustersCount++; //Console.WriteLine("{0} stable clusters out of {1}", stableClustersCount.ToString(), clusterCount.ToString()); } } bool reset = false; for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { if (newClusters[clusterIndex].Count == 0) { reset = true; break; } } if (reset) { clusters = RandomSeeding(k, data); } else { clusters = newClusters; } } //DateTime end = DateTime.Now; //TimeSpan span = end - start; //Console.WriteLine("End clustering {0} objects into {1} clusters with {2} iterations: {3}", rowCount.ToString(), clusterCount.ToString(), iterationCount, end.ToLongTimeString()); //Console.WriteLine("Parallel Clustering {0} objects into {1} clusters took {2} seconds", rowCount.ToString(), clusterCount.ToString(), span.TotalSeconds); //Console.WriteLine(); return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(int clusterCount, double [][] data) { int rowCount = data.Length; int stableClustersCount = 0; Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); //setup seed clusters for (int i = 0; i < clusterCount; i++) { cluster = new Cluster(); cluster.Add(data[i]); clusters.Add(cluster); } DateTime start = DateTime.Now; Console.WriteLine("Start clustering {0} objects into {1} clusters: {2}", rowCount.ToString(), clusterCount.ToString(), start.ToLongTimeString()); //do actual clustering int iterationCount = 0; while (stableClustersCount != clusters.Count) { iterationCount++; stableClustersCount = 0; //Do actual clustering //Console.WriteLine("Start Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); ClusterCollection newClusters = this.ClusterDataSet(clusters, data); //Console.WriteLine(" End Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double[] originalClusterMean = clusters[clusterIndex].ClusterMean; double[] newClusterMean = newClusters[clusterIndex].ClusterMean; double distance = this.EuclideanDistance(newClusterMean, originalClusterMean); if (distance ==0) { stableClustersCount++; //Console.WriteLine("{0} stable clusters out of {1}", stableClustersCount.ToString(), clusterCount.ToString()); } } clusters = newClusters; } DateTime end = DateTime.Now; TimeSpan span = end - start; Console.WriteLine("End clustering {0} objects into {1} clusters with {2} iterations: {3}", rowCount.ToString(), clusterCount.ToString(), iterationCount, end.ToLongTimeString()); Console.WriteLine("Clustering {0} objects into {1} clusters took {2} seconds", rowCount.ToString(), clusterCount.ToString(), span.TotalSeconds); Console.WriteLine(); return clusters; }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(ClusterCollection clusters, double [][] data) { double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.Length; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for(int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if(clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for( int row = 0; row < rowCount; row++) { for(int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if(cluster == 0) { firstClusterDistance = this.EuclideanDistance(data[row], clusterMean); position = cluster; } else { secondClusterDistance = this.EuclideanDistance(data[row], clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(data[row]); } return newClusters; }
public ClusterCollection RandomSeeding(int k, double[][] data) { int size = data.Length; double[][] seeds = new double[k][]; Random random = new Random(); Hashtable random_table = new Hashtable(); Cluster cluster = null; ClusterCollection init_clusters = new ClusterCollection(); for (int i = 0; i < k; ) { int r = random.Next(size - 1); if(!random_table.ContainsKey(r)) { random_table.Add(r,0); seeds[i] = new double[3]; seeds[i][0]=data[r][0];seeds[i][1]=data[r][1];seeds[i][2]=data[r][2]; cluster = new Cluster(); cluster.Add(seeds[i]); init_clusters.Add(cluster); i++; } } return init_clusters; }
public int[] ClusterPartialDataSet(ClusterCollection clusters, double[][] data, int start, int count) { try { double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int[] destinationCluster = new int[count]; //((20+30)/2), ((170+160)/2), ((80+120)/2) for( int row = start; row < start + count; row++) { int position = 0; for(int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if(cluster == 0) { firstClusterDistance = this.EuclideanDistance(data[row], clusterMean); position = cluster; } else { secondClusterDistance = this.EuclideanDistance(data[row], clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } destinationCluster[row - start] = position; } return destinationCluster; } catch (Exception ex) { Console.WriteLine(ex.Message); throw; } }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public ClusterCollection ClusterDataSet(ClusterCollection clusters, double[][] data) { int rowCount = data.Length; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for(int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if(clusters.Count <= 0) throw new SystemException("Cluster Count Cannot Be Zero!"); //break data points into n groups int remainder = rowCount % threads; int numPerThread = rowCount / threads; int start = 0; IAsyncResult[] asyncResults = new IAsyncResult[threads]; WaitHandle[] handles = new WaitHandle[threads]; for (int i = 0; i < threads; i++) { if (i > 0) start += numPerThread; if (i == threads-1) numPerThread += remainder; asyncResults[i] = clusterDelegate.BeginInvoke(clusters, data, start, numPerThread, null, null); handles[i] = asyncResults[i].AsyncWaitHandle; } int index = 0; foreach (IAsyncResult asyncResult in asyncResults) { int[] destinationCluster = clusterDelegate.EndInvoke(asyncResult); for (int i = 0; i < destinationCluster.Length; i++) newClusters[ destinationCluster[i] ].Add(data[index++]); } return newClusters; }
public Bitmap PaintYIQ(Bitmap srcimg, KMeansParallel kMeans, ClusterCollection clusters) { int width = srcimg.Width; int height = srcimg.Height; BitmapData srcData = srcimg.LockBits( new Rectangle(0, 0, width, height), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb); int srcOffset = srcData.Stride - width * 3; unsafe { byte* src = (byte*)srcData.Scan0.ToPointer(); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++, src += 3) { int rgb = src[RGB.R] * 256 * 256 + src[RGB.G] * 256 + src[RGB.B]; int rgb_mean; if (color_table.ContainsKey(rgb)) rgb_mean = (int)color_table[rgb]; else { double[] yiq = RGB2YIQ(rgb); double min_dist = Double.MaxValue; int idx = 0; for (int i = 0; i < clusters.Count; i++) { double dist = kMeans.EuclideanDistance(yiq, clusters[i].ClusterMean); if (dist < min_dist) { min_dist = dist; idx = i; } } rgb_mean = YIQ2RGB(clusters[idx].ClusterMean[0], clusters[idx].ClusterMean[1], clusters[idx].ClusterMean[2]); color_table.Add(rgb, rgb_mean); } src[RGB.R] = (byte)((rgb_mean & 0xFF0000) >> 16); src[RGB.G] = (byte)((rgb_mean & 0xFF00) >> 8); src[RGB.B] = (byte)(rgb_mean & 0xFF); } src += srcOffset; } } srcimg.UnlockBits(srcData); return srcimg; }
//My unsucessful code for providing center seed for kmean clustering public ClusterCollection ClusterDataSet2(int k,ClusterCollection clusters, double[][] data) { //ClusterCollection clusters = RandomSeeding(k, data); int rowCount = data.Length; int stableClustersCount = 0; int clusterCount = clusters.Count; DateTime start = DateTime.Now; Console.WriteLine("Start clustering {0} objects into {1} clusters: {2}", rowCount.ToString(), clusterCount.ToString(), start.ToLongTimeString()); //do actual clustering int iterationCount = 0; while (stableClustersCount != clusters.Count) { iterationCount++; stableClustersCount = 0; //Do actual clustering //Console.WriteLine("Start Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); ClusterCollection newClusters = this.ClusterDataSet(clusters, data); //Console.WriteLine(" End Cluster for ineration {0}: {1}", iterationCount, DateTime.Now.ToLongTimeString()); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double[] originalClusterMean = clusters[clusterIndex].ClusterMean; double[] newClusterMean = newClusters[clusterIndex].ClusterMean; double distance = this.EuclideanDistance(newClusterMean, originalClusterMean); if (distance == 0) { stableClustersCount++; //Console.WriteLine("{0} stable clusters out of {1}", stableClustersCount.ToString(), clusterCount.ToString()); } } bool reset = false; for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { if (newClusters[clusterIndex].Count == 0) { reset = true; break; } } if (reset) clusters = RandomSeeding(k, data); else clusters = newClusters; } DateTime end = DateTime.Now; TimeSpan span = end - start; Console.WriteLine("End clustering {0} objects into {1} clusters with {2} iterations: {3}", rowCount.ToString(), clusterCount.ToString(), iterationCount, end.ToLongTimeString()); Console.WriteLine("Parallel Clustering {0} objects into {1} clusters took {2} seconds", rowCount.ToString(), clusterCount.ToString(), span.TotalSeconds); Console.WriteLine(); return clusters; }