private int[] compute(double[][] points) { // first, select initial points double[][] seeds = createSeeds(points, 2 * Bandwidth); var maxcandidates = new ConcurrentStack <double[]>(); // construct map of the data tree = KDTree.FromData <int>(points, distance); // now, for each initial point if (UseParallelProcessing) { Parallel.For(0, seeds.Length, (index) => iterate(seeds, maxcandidates, index)); } else { for (int index = 0; index < seeds.Length; index++) { iterate(seeds, maxcandidates, index); } } // suppress non-maximum points double[][] maximum = cut ? maxcandidates.ToArray() : supress(seeds); // create a decision map using seeds int[] seedLabels = classifySeeds(seeds, maximum); tree = KDTree.FromData(seeds, seedLabels, distance); // create the cluster structure clusters = new MeanShiftClusterCollection(tree, maximum); // label each point return(clusters.Nearest(points)); }
/// <summary> /// Initializes a new instance of the <see cref="MeanShiftCluster"/> class. /// </summary> /// /// <param name="owner">The owner.</param> /// <param name="index">The cluster index.</param> /// public MeanShiftCluster(MeanShiftClusterCollection owner, int index) { this.owner = owner; this.index = index; }
/// <summary> /// Learns a model that can map the given inputs to the desired outputs. /// </summary> /// <param name="x">The model inputs.</param> /// <param name="weights">The weight of importance for each input sample.</param> /// <returns>A model that has learned how to produce suitable outputs /// given the input data <paramref name="x" />.</returns> public MeanShiftClusterCollection Learn(double[][] x, int[] weights = null) { if (weights == null) { weights = Vector.Ones <int>(x.Length); } if (x.Length != weights.Length) { throw new DimensionMismatchException("weights", "The weights and points vector must have the same dimension."); } // First of all, construct map of the original points. We will // be saving the weight of every point in the node of the tree. KDTree <int> tree = KDTree.FromData(x, weights, Distance); // Let's sample some points in the problem surface double[][] seeds = createSeeds(x, 2 * Bandwidth); // Now, we will duplicate those points and make them "move" // into this surface in the direction of the surface modes. double[][] current = seeds.MemberwiseClone(); // We will store any modes that we find here var maxima = new ConcurrentStack <double[]>(); // Optimization for uniform kernel Action <ICollection <NodeDistance <KDTreeNode <int> > >, double[]> func; if (kernel is UniformKernel) { func = uniform; } else { func = general; } // For each seed if (ParallelOptions.MaxDegreeOfParallelism != 1) { Parallel.For(0, current.Length, ParallelOptions, i => move(tree, current, i, maxima, func)); for (int i = 0; i < current.Length; i++) { supress(current, i, maxima); } } else { for (int i = 0; i < current.Length; i++) { move(tree, current, i, maxima, func); } } var modes = maxima.ToArray(); // At this point, the current points have moved into // the location of the modes of the surface. Now we // have to backtrack and check, for each mode, from // where those points departed from. int[] labels = classify(modes: modes, points: current); // Now we create a decision map using the original seed positions tree = KDTree.FromData(seeds, labels, Distance, inPlace: true); clusters = new MeanShiftClusterCollection(this, modes.Length, tree, modes); if (ComputeLabels || ComputeProportions) { int sum = 0; int[] counts = new int[modes.Length]; labels = new int[x.Length]; for (int i = 0; i < labels.Length; i++) { int j = tree.Nearest(x[i]).Value; labels[i] = j; counts[j] += weights[i]; sum += weights[i]; } for (int i = 0; i < counts.Length; i++) { clusters.Proportions[i] = counts[i] / (double)sum; } } return(clusters); }
/// <summary> /// Divides the input data into clusters. /// </summary> /// /// <param name="points">The data where to compute the algorithm.</param> /// <param name="threshold">The relative convergence threshold /// for the algorithm. Default is 1e-3.</param> /// <param name="maxIterations">The maximum number of iterations. Default is 100.</param> /// public int[] Compute(double[][] points, double threshold, int maxIterations = 100) { // first, select initial points double[][] seeds = createSeeds(points, 2 * Bandwidth); var maxcandidates = new ConcurrentStack <double[]>(); // construct map of the data tree = KDTree.FromData <int>(points, distance); // now, for each initial point global::Accord.Threading.Tasks.Parallel.For(0, seeds.Length, #if DEBUG new ParallelOptions() { MaxDegreeOfParallelism = 1 }, #endif (index) => { double[] point = seeds[index]; double[] mean = new double[point.Length]; double[] delta = new double[point.Length]; // we will keep moving it in the // direction of the density modes int iterations = 0; // until convergence or max iterations reached while (iterations < maxIterations) { iterations++; // compute the shifted mean computeMeanShift(point, mean); // extract the mean shift vector for (int j = 0; j < mean.Length; j++) { delta[j] = point[j] - mean[j]; } // update the point towards a mode for (int j = 0; j < mean.Length; j++) { point[j] = mean[j]; } // Check if we are already near any maximum point if (cut && nearest(point, maxcandidates) != null) { break; } // check for convergence: magnitude of the mean shift // vector converges to zero (Comaniciu 2002, page 606) if (Norm.Euclidean(delta) < threshold * Bandwidth) { break; } } if (cut) { double[] match = nearest(point, maxcandidates); if (match != null) { seeds[index] = match; } else { maxcandidates.Push(point); } } }); // suppress non-maximum points double[][] maximum = cut ? maxcandidates.ToArray() : supress(seeds); // create a decision map using seeds int[] seedLabels = classifySeeds(seeds, maximum); tree = KDTree.FromData(seeds, seedLabels, distance); // create the cluster structure clusters = new MeanShiftClusterCollection(tree, maximum); // label each point return(clusters.Nearest(points)); }