Esempio n. 1
0
        public static IList <IndexFound> SearchMany(IList <HilbertPoint> points, int indexCount, int outlierSize, int noiseSkipBy, int reducedNoiseSkipBy, int maxTrials, int maxIterationsWithoutImprovement = 3, bool useSample = false, bool shouldCompact = false)
        {
            var parallel  = 4;
            var optimizer = new OptimalIndex(outlierSize, noiseSkipBy, reducedNoiseSkipBy, ScrambleHalfStrategy)
            {
                MaxIterations = (maxTrials + (parallel / 2)) / parallel,
                MaxIterationsWithoutImprovement = maxIterationsWithoutImprovement,
                ParallelTrials = parallel,
                UseSample      = useSample,
                ShouldCompact  = shouldCompact
            };

            return(optimizer.SearchMany(points, indexCount));
        }
        /// <summary>
        /// Perform unassisted classification of points.
        /// </summary>
        public Classification <UnsignedPoint, string> Classify()
        {
            //   3) Create multiple HilbertIndexes.
            //   4) Find best HilbertIndex and find the one that predicts the lowest number of clusters K (OptimalIndex).
            //   5) Set the characteristic merge distance S (MergeSquareDistance).
            //TODO: Support formation and use of more than one HilbertIndex, to respect IndexBudget.IndexCount.
            var useOptimalPermutation = true;

            UnsignedPoint[] hilbertOrderedPoints;

            Timer.Start("Find optimum Hilbert ordering");
            if (!useOptimalPermutation)
            {
                var optimum = OptimalIndex.Search(
                    HilbertPoints,
                    IndexConfig.OutlierSize,
                    IndexConfig.NoiseSkipBy,
                    IndexConfig.ReducedNoiseSkipBy,
                    IndexConfig.MaxTrials,
                    IndexConfig.MaxIterationsWithoutImprovement,
                    IndexConfig.UseSample,
                    true
                    );
                hilbertOrderedPoints = HilbertOrderedPoints(optimum.SortedPointIndices.ToList());
                MergeSquareDistance  = optimum.MergeSquareDistance;
            }
            else
            {
                var optimum = OptimalPermutation.Search(
                    Clusters.Points().ToList(),
                    BitsPerDimension,
                    IndexConfig.OutlierSize,
                    IndexConfig.NoiseSkipBy,
                    IndexConfig.ReducedNoiseSkipBy,
                    IndexConfig.MaxTrials,
                    IndexConfig.MaxIterationsWithoutImprovement,
                    IndexConfig.UseSample,
                    true
                    );
                hilbertOrderedPoints = optimum.SortedPoints.ToArray();
                MergeSquareDistance  = optimum.MergeSquareDistance;
            }
            Timer.Stop("Find optimum Hilbert ordering");

            //   6) Pass over the points in Hilbert order. Every consescutive pair closer than the distance S is merged into the
            //      same cluster.
            Timer.Start("Merge by Hilbert index");
            MergeByHilbertIndex(hilbertOrderedPoints);
            Timer.Stop("Merge by Hilbert index");

            //   7) Find the distance from the Centroid of each non-outlier cluster to every other large cluster (ClosestCluster).
            //   8) For the closest neighboring large clusters, probe deeper and find the pair of points,
            //      one drawn from each of two clusters, that is closest and their separation s (square Cartesian distance).
            //   9) If a pair of clusters is closer than S (s ≤ S), merge them, transitively.
            Timer.Start("Merge neighboring large clusters");
            var cc = new ClosestCluster <string>(Clusters);
            var closeClusterPairs = cc.FindClosestClusters(MaxNeighborsToCompare, MergeSquareDistance, OutlierSize, UseExactClusterDistance);
            var clusterMerges     = 0;

            foreach (var pair in closeClusterPairs.Where(p => p.SquareDistance <= MergeSquareDistance))
            {
                pair.Relabel(Clusters);
                if (Clusters.Merge(pair.Color1, pair.Color2))
                {
                    clusterMerges++;
                }
            }
            Timer.Stop("Merge neighboring large clusters");

            //  10) Merge outliers with neighboring clusters.
            //      For all the remaining outliers (small clusters), merge them with the nearest large cluster
            //      unless their distance is too great (MergeSquareDistance * OutlierDistanceMultiplier).
            //      Do not permit this phase to cause two large clusters to be joined to each other.
            Timer.Start("Merge outliers");
            var maxOutlierMergeDistance = (long)(MergeSquareDistance * OutlierDistanceMultiplier);
            var outlierMerges           = MergeOutliers(maxOutlierMergeDistance);

            Timer.Stop("Merge outliers");
            var msg = $"   {clusterMerges} Cluster merges, {outlierMerges} Outlier merges";

            Logger.Info(msg);
            return(Clusters);
        }