/// <summary> /// Using default values for many parameters, search many HilbertIndex objects, each based on a different permutation of the dimensions, and /// keep the one yielding the best Metric, which is the one that estimates the lowest value /// for the number of clusters. /// </summary> /// <param name="points">Points to index.</param> /// <param name="outlierSize">OutlierSize that discriminates between clusters worth counting and those that are not.</param> /// <param name="noiseSkipBy">NoiseSkipBy value to help smooth out calculations in the presence of noisy data.</param> /// <param name="reducedNoiseSkipBy">If few clusters, reduce NoiseSkipBy to this.</param> /// <param name="maxTrials">Max trials to attempt. This equals MaxIterations * ParallelTrials (apart from rounding).</param> /// <param name="maxIterationsWithoutImprovement">Max iterations without improvement. /// Stops searching early if no improvement is detected.</param> /// <param name="useSample">If true, use a random sample of points in each HilbertIndex tested, to save time. /// May yield a poorer result, but faster.</param> public static IndexFound Search(IList <HilbertPoint> points, int outlierSize, int noiseSkipBy, int reducedNoiseSkipBy, int maxTrials, int maxIterationsWithoutImprovement = 3, bool useSample = false, bool shouldCompact = false) { var parallel = 4; var optimizer = new OptimalIndex(outlierSize, noiseSkipBy, reducedNoiseSkipBy, ScrambleHalfStrategy) { MaxIterations = (maxTrials + (parallel / 2)) / parallel, MaxIterationsWithoutImprovement = maxIterationsWithoutImprovement, ParallelTrials = parallel, UseSample = useSample, ShouldCompact = shouldCompact }; return(optimizer.Search(points)); }
/// <summary> /// Perform unassisted classification of points. /// </summary> public Classification <UnsignedPoint, string> Classify() { // 3) Create multiple HilbertIndexes. // 4) Find best HilbertIndex and find the one that predicts the lowest number of clusters K (OptimalIndex). // 5) Set the characteristic merge distance S (MergeSquareDistance). //TODO: Support formation and use of more than one HilbertIndex, to respect IndexBudget.IndexCount. var useOptimalPermutation = true; UnsignedPoint[] hilbertOrderedPoints; Timer.Start("Find optimum Hilbert ordering"); if (!useOptimalPermutation) { var optimum = OptimalIndex.Search( HilbertPoints, IndexConfig.OutlierSize, IndexConfig.NoiseSkipBy, IndexConfig.ReducedNoiseSkipBy, IndexConfig.MaxTrials, IndexConfig.MaxIterationsWithoutImprovement, IndexConfig.UseSample, true ); hilbertOrderedPoints = HilbertOrderedPoints(optimum.SortedPointIndices.ToList()); MergeSquareDistance = optimum.MergeSquareDistance; } else { var optimum = OptimalPermutation.Search( Clusters.Points().ToList(), BitsPerDimension, IndexConfig.OutlierSize, IndexConfig.NoiseSkipBy, IndexConfig.ReducedNoiseSkipBy, IndexConfig.MaxTrials, IndexConfig.MaxIterationsWithoutImprovement, IndexConfig.UseSample, true ); hilbertOrderedPoints = optimum.SortedPoints.ToArray(); MergeSquareDistance = optimum.MergeSquareDistance; } Timer.Stop("Find optimum Hilbert ordering"); // 6) Pass over the points in Hilbert order. Every consescutive pair closer than the distance S is merged into the // same cluster. Timer.Start("Merge by Hilbert index"); MergeByHilbertIndex(hilbertOrderedPoints); Timer.Stop("Merge by Hilbert index"); // 7) Find the distance from the Centroid of each non-outlier cluster to every other large cluster (ClosestCluster). // 8) For the closest neighboring large clusters, probe deeper and find the pair of points, // one drawn from each of two clusters, that is closest and their separation s (square Cartesian distance). // 9) If a pair of clusters is closer than S (s ≤ S), merge them, transitively. Timer.Start("Merge neighboring large clusters"); var cc = new ClosestCluster <string>(Clusters); var closeClusterPairs = cc.FindClosestClusters(MaxNeighborsToCompare, MergeSquareDistance, OutlierSize, UseExactClusterDistance); var clusterMerges = 0; foreach (var pair in closeClusterPairs.Where(p => p.SquareDistance <= MergeSquareDistance)) { pair.Relabel(Clusters); if (Clusters.Merge(pair.Color1, pair.Color2)) { clusterMerges++; } } Timer.Stop("Merge neighboring large clusters"); // 10) Merge outliers with neighboring clusters. // For all the remaining outliers (small clusters), merge them with the nearest large cluster // unless their distance is too great (MergeSquareDistance * OutlierDistanceMultiplier). // Do not permit this phase to cause two large clusters to be joined to each other. Timer.Start("Merge outliers"); var maxOutlierMergeDistance = (long)(MergeSquareDistance * OutlierDistanceMultiplier); var outlierMerges = MergeOutliers(maxOutlierMergeDistance); Timer.Stop("Merge outliers"); var msg = $" {clusterMerges} Cluster merges, {outlierMerges} Outlier merges"; Logger.Info(msg); return(Clusters); }