public static IList <IndexFound> SearchMany(IList <HilbertPoint> points, int indexCount, int outlierSize, int noiseSkipBy, int reducedNoiseSkipBy, int maxTrials, int maxIterationsWithoutImprovement = 3, bool useSample = false, bool shouldCompact = false) { var parallel = 4; var optimizer = new OptimalIndex(outlierSize, noiseSkipBy, reducedNoiseSkipBy, ScrambleHalfStrategy) { MaxIterations = (maxTrials + (parallel / 2)) / parallel, MaxIterationsWithoutImprovement = maxIterationsWithoutImprovement, ParallelTrials = parallel, UseSample = useSample, ShouldCompact = shouldCompact }; return(optimizer.SearchMany(points, indexCount)); }
/// <summary> /// Perform unassisted classification of points. /// </summary> public Classification <UnsignedPoint, string> Classify() { // 3) Create multiple HilbertIndexes. // 4) Find best HilbertIndex and find the one that predicts the lowest number of clusters K (OptimalIndex). // 5) Set the characteristic merge distance S (MergeSquareDistance). //TODO: Support formation and use of more than one HilbertIndex, to respect IndexBudget.IndexCount. var useOptimalPermutation = true; UnsignedPoint[] hilbertOrderedPoints; Timer.Start("Find optimum Hilbert ordering"); if (!useOptimalPermutation) { var optimum = OptimalIndex.Search( HilbertPoints, IndexConfig.OutlierSize, IndexConfig.NoiseSkipBy, IndexConfig.ReducedNoiseSkipBy, IndexConfig.MaxTrials, IndexConfig.MaxIterationsWithoutImprovement, IndexConfig.UseSample, true ); hilbertOrderedPoints = HilbertOrderedPoints(optimum.SortedPointIndices.ToList()); MergeSquareDistance = optimum.MergeSquareDistance; } else { var optimum = OptimalPermutation.Search( Clusters.Points().ToList(), BitsPerDimension, IndexConfig.OutlierSize, IndexConfig.NoiseSkipBy, IndexConfig.ReducedNoiseSkipBy, IndexConfig.MaxTrials, IndexConfig.MaxIterationsWithoutImprovement, IndexConfig.UseSample, true ); hilbertOrderedPoints = optimum.SortedPoints.ToArray(); MergeSquareDistance = optimum.MergeSquareDistance; } Timer.Stop("Find optimum Hilbert ordering"); // 6) Pass over the points in Hilbert order. Every consescutive pair closer than the distance S is merged into the // same cluster. Timer.Start("Merge by Hilbert index"); MergeByHilbertIndex(hilbertOrderedPoints); Timer.Stop("Merge by Hilbert index"); // 7) Find the distance from the Centroid of each non-outlier cluster to every other large cluster (ClosestCluster). // 8) For the closest neighboring large clusters, probe deeper and find the pair of points, // one drawn from each of two clusters, that is closest and their separation s (square Cartesian distance). // 9) If a pair of clusters is closer than S (s ≤ S), merge them, transitively. Timer.Start("Merge neighboring large clusters"); var cc = new ClosestCluster <string>(Clusters); var closeClusterPairs = cc.FindClosestClusters(MaxNeighborsToCompare, MergeSquareDistance, OutlierSize, UseExactClusterDistance); var clusterMerges = 0; foreach (var pair in closeClusterPairs.Where(p => p.SquareDistance <= MergeSquareDistance)) { pair.Relabel(Clusters); if (Clusters.Merge(pair.Color1, pair.Color2)) { clusterMerges++; } } Timer.Stop("Merge neighboring large clusters"); // 10) Merge outliers with neighboring clusters. // For all the remaining outliers (small clusters), merge them with the nearest large cluster // unless their distance is too great (MergeSquareDistance * OutlierDistanceMultiplier). // Do not permit this phase to cause two large clusters to be joined to each other. Timer.Start("Merge outliers"); var maxOutlierMergeDistance = (long)(MergeSquareDistance * OutlierDistanceMultiplier); var outlierMerges = MergeOutliers(maxOutlierMergeDistance); Timer.Stop("Merge outliers"); var msg = $" {clusterMerges} Cluster merges, {outlierMerges} Outlier merges"; Logger.Info(msg); return(Clusters); }