// Uses the per-pixel classes from PredictGPU to pool the location of // gestures. Supports multiple types of pooling algorithms. Each // algorithm is described before each section. private static List<Pooled> Pool(PoolType type, ProcessorState state) { List<Pooled> gestures = new List<Pooled>(); System.Drawing.Point center; int[] label_counts; Tuple<int, int> max; switch (type) { #region KMeans case PoolType.KMeans: Random rand = new Random(); Point3 p = new Point3(0, 0, 0); int K = 7, num_changes = 10, iterations = 0; List<Point3> centroids = new List<Point3>(K); for (int i = 0; i < K; i++) centroids.Insert(i, new Point3( rand.Next(width), rand.Next(height), rand.Next(400, 1500) )); List<HashSet<int>> clusters = new List<HashSet<int>>(K); for (int i = 0; i < K; i++) clusters.Insert(i, new HashSet<int>()); Dictionary<int, int> assignments = new Dictionary<int,int>(); for (int i = 0; i < state.depth.Length; i++) if (state.predict_labels_[i] != (int)HandGestureFormat.Background) { int cluster = rand.Next(K); assignments.Add(i, cluster); clusters[cluster].Add(i); } List<int> points = new List<int>(assignments.Keys); // If there have been no changes, the centroids wont // change either so KMeans has found a minimum. This may // be a local minimum. #region KMeans, can be factored out while (num_changes > 0) { num_changes = 0; iterations++; if (iterations % 10 == 0) Console.WriteLine("Iteration {0}", iterations); // Update centroids for (int i = 0; i < K; i++) { int x = (int)clusters[i].Average(point => Util.toXY(point, width, height, kDepthStride).X); int y = (int)clusters[i].Average(point => Util.toXY(point, width, height, kDepthStride).Y); int depth = (int)clusters[i].Average(point => state.depth[point]); centroids[i].update(x, y, depth); } // Update classifications foreach (int point in points) { System.Drawing.Point xy = Util.toXY(point, width, height, kDepthStride); p.update(xy.X, xy.Y, state.depth[point]); int nearest = 0; double nearest_distance = Util.EuclideanDistance(centroids[nearest], p); for (int i = 1; i < K; i++) { double distance = Util.EuclideanDistance(centroids[i], p); if (distance < nearest_distance) { nearest = i; nearest_distance = distance; } } if (assignments[point] != nearest && clusters[assignments[point]].Count != 1) { num_changes++; clusters[assignments[point]].Remove(point); clusters[nearest].Add(point); assignments[point] = nearest; } } } #endregion // Fit a Gaussian distribution on all the cluster sizes // and look for outliers that are at least two standard // deviations away from the mean. #region Gaussian outlier detection // Print the distribution of sizes within clusters var sizes = clusters.Select(cluster => cluster.Count). OrderByDescending(val => val).ToArray(); // Fit normal distribution and look for outliers double average = sizes.Average(); double stddev = Math.Sqrt(sizes.Select(val => Math.Pow(val, 2)).Sum()/sizes.Length - Math.Pow(average, 2)); Tuple<double, double> range = new Tuple<double, double>(average - 2*stddev, average + 2*stddev); List<int> outliers = new List<int>(); for (int i = 0; i < clusters.Count; i++) { Console.WriteLine("{0} - {1} ({2})", i, clusters[i].Count, clusters[i].Count > range.Item2); if (clusters[i].Count > range.Item2) outliers.Add(i); } #endregion // Draw outlier-ly large clusters List<Tuple<byte, byte, byte>> label_colors = Util.GiveMeNColors(K); ResetOverlay(state); //foreach (int outlier in outliers) for (int outlier = 0; outlier < K; outlier++) { foreach (int point in clusters[outlier]) { int bitmap_index = point * 4; state.overlay_bitmap_bits_[bitmap_index + 2] = (int)label_colors[outlier].Item1; state.overlay_bitmap_bits_[bitmap_index + 1] = (int)label_colors[outlier].Item2; state.overlay_bitmap_bits_[bitmap_index + 0] = (int)label_colors[outlier].Item3; } // Get majority label within this cluster label_counts = new int[state.feature.num_classes_]; Array.Clear(label_counts, 0, label_counts.Length); foreach (int point in clusters[outlier]) label_counts[state.predict_labels_[point]]++; max = Util.MaxNonBackground(label_counts); center = new System.Drawing.Point(centroids[outlier].x(), centroids[outlier].y()); gestures.Add(new Pooled(center, centroids[outlier].depth(), (HandGestureFormat)max.Item1)); Console.WriteLine("Center: ({0}px, {1}px, {2}mm)", center.X, center.Y, centroids[outlier].depth()); } state.overlay_start_.Value = true; break; #endregion #region DBSCAN case PoolType.DBSCAN: //List<DBScanPoint> dbpoints = new List<DBScanPoint>(); /* int count_label = 0; for (int i = 0; i < state.depth.Length; i++) if (state.predict_labels_[i] != (int)HandGestureFormat.Background) { count_label++; System.Drawing.Point xy = Util.toXY(i, width, height, kDepthStride); dbpoints.Add(new DBScanPoint(xy.X, xy.Y)); } Debug.WriteLine("{0} points are dbscanned", count_label); */ // The minPts setting automatically filters out noise. So // the clusters returned here can be safely assumed to be // hands. No need for outlier detection! //double eps = 20; //int minPts = 500; double eps = 10; int minPts = 300; DateTime ExecutionStartTime; DateTime ExecutionStopTime; TimeSpan ExecutionTime; ExecutionStartTime = DateTime.Now; List<List<int>> dbclusters = DBSCAN.GetClusters( eps, minPts, state.predict_labels_, (int)HandGestureFormat.Background, state.pool_); ExecutionStopTime = DateTime.Now; ExecutionTime = ExecutionStopTime - ExecutionStartTime; Console.WriteLine("Use {0} ms for DBSCAN.GetClusters", ExecutionTime.TotalMilliseconds.ToString()); label_colors = Util.GiveMeNColors(dbclusters.Count); Console.WriteLine("Detected {0} clusters.", dbclusters.Count); ResetOverlay(state); // The following is to get the center, and depth for each cluster. Seems unnecessary to do it as this can be done in DBScan. for (int cluster = 0; cluster < dbclusters.Count; cluster++) if (dbclusters[cluster].Count>0) { int center_x = 0, center_y = 0, average_depth= 0 ; foreach (int bitmap_index in dbclusters[cluster]) { //int bitmap_index = Util.toID(point.X, point.Y, width, height, kColorStride); state.overlay_bitmap_bits_[bitmap_index + 2] = (int)label_colors[cluster].Item1; state.overlay_bitmap_bits_[bitmap_index + 1] = (int)label_colors[cluster].Item2; state.overlay_bitmap_bits_[bitmap_index + 0] = (int)label_colors[cluster].Item3; System.Drawing.Point point = Util.toXY( bitmap_index, 640, 480, 1); center_x += point.X; center_y += point.Y; average_depth += state.depth[bitmap_index]; } // Get majority label within this cluster label_counts = new int[state.feature.num_classes_]; Array.Clear(label_counts, 0, label_counts.Length); foreach (int point_index in dbclusters[cluster]) label_counts[state.predict_labels_[point_index]]++; max = Util.MaxNonBackground(label_counts); Debug.Assert(dbclusters[cluster].Count>0); center = new System.Drawing.Point( (int)( center_x/ dbclusters[cluster].Count), (int)(center_y/ dbclusters[cluster].Count) ); // use average to get the depth int depth = (int)(average_depth / dbclusters[cluster].Count); //center = new System.Drawing.Point(centroids[outlier].x(), centroids[outlier].y()); gestures.Add(new Pooled(center, depth, (HandGestureFormat)max.Item1)); Console.WriteLine("Center: ({0}px, {1}px, {2}mm), Gesture: {3}", center.X, center.Y, depth, (HandGestureFormat)max.Item1); } state.overlay_start_.Value = true; break; #endregion #region Majority centroid case PoolType.MedianMajority: case PoolType.MeanMajority: // Median and mean pooling for the majority class. // // The majority class may have a lot of noise. The noise may // itself cause a false majority class. An improvement can be // a density based clustering method. label_counts = new int[state.feature.num_classes_]; Array.Clear(label_counts, 0, label_counts.Length); List<int>[] label_sorted_x = new List<int>[state.feature.num_classes_]; List<int>[] label_sorted_y = new List<int>[state.feature.num_classes_]; List<int>[] label_sorted_depth = new List<int>[state.feature.num_classes_]; for (int i = 1; i < state.feature.num_classes_; i++) { label_sorted_x[i] = new List<int>(); label_sorted_y[i] = new List<int>(); label_sorted_depth[i] = new List<int>(); } for (int y = state.crop.Value.Y; y <= state.crop.Value.Y + state.crop.Value.Height; y++) { for (int x = state.crop.Value.X; x <= state.crop.Value.X + state.crop.Value.Width; x++) { int depth_index = Util.toID(x, y, width, height, kDepthStride); int predict_label = state.predict_labels_[depth_index]; label_counts[predict_label]++; if (predict_label != (int)HandGestureFormat.Background) { label_sorted_x[predict_label].Add(x); label_sorted_y[predict_label].Add(y); label_sorted_depth[predict_label].Add(state.depth[depth_index]); } } } max = Util.MaxNonBackground(label_counts); int max_index = max.Item1, max_value = max.Item2; int total_non_background = label_counts.Sum() - label_counts[0]; Console.WriteLine("Most common gesture is {0} (appears {1}/{2} times).", ((HandGestureFormat)max_index).ToString(), max_value, total_non_background); center = new System.Drawing.Point(); int center_depth = 0; if (max_value == 0) { center.X = width / 2; center.Y = height / 2; center_depth = 0; } else if (type == PoolType.MeanMajority) { center.X = (int)(label_sorted_x[max_index].Average()); center.Y = (int)(label_sorted_y[max_index].Average()); center_depth = (int)(label_sorted_depth[max_index].Average()); } else if (type == PoolType.MedianMajority) { label_sorted_x[max_index].Sort(); label_sorted_y[max_index].Sort(); label_sorted_depth[max_index].Sort(); center.X = (int)(label_sorted_x[max_index].ElementAt(max_value / 2)); center.Y = (int)(label_sorted_y[max_index].ElementAt(max_value / 2)); center_depth = (int)(label_sorted_depth[max_index].ElementAt(max_value / 2)); } gestures.Add(new Pooled(center, center_depth, (HandGestureFormat)max_index)); Console.WriteLine("Center: ({0}px, {1}px, {2}mm)", center.X, center.Y, center_depth); break; #endregion } return gestures; }