コード例 #1
0
        /// <summary>
        /// Merges the small outlier clusters with nearby larger clusters.
        /// </summary>
        /// <returns>The number of outlier clusters merged.</returns>
        /// <param name="maxOutlierMergeDistance">An outlier will only be merged if its distance from
        /// its nearest cluster does not exceed this square distance.</param>
        public int MergeOutliers(long maxOutlierMergeDistance)
        {
            var mergesDone        = 0;
            var cc                = new ClosestCluster <string>(Clusters);
            var closeOutlierPairs = cc.FindClosestOutliers(
                MaxNeighborsToCompare,
                maxOutlierMergeDistance,
                OutlierSize
                );

            foreach (var pair in closeOutlierPairs)
            {
                pair.Relabel(Clusters);
                // We do not want an outlier to cause the merger of two large clusters
                // if each of the large clusters is near the outlier but not near each other.
                // Thus, once the outlier is merged with the nearer of its neighbors,
                // it will be ruled out from firther merges.
                if (pair.CountOutliers(Clusters, OutlierSize) != 1)
                {
                    continue;
                }
                if (Clusters.Merge(pair.Color1, pair.Color2))
                {
                    mergesDone++;
                }
            }
            return(mergesDone);
        }
コード例 #2
0
        /// <summary>
        /// Merge the clusters containing two points if the distance separating them does not exceed MergeSquareDistance.
        /// The points given here may be HilbertPoints frmo a HilbertIndex or UnsignedPoints already present in the Classification.
        /// In case of the former, a lookup is performed based on the id to find the proper UnsignedPoint corresponding to the HilbertPoint.
        /// </summary>
        /// <returns><c>true</c>, if a new merge performed, <c>false</c> if too far to merge or already merged.</returns>
        /// <param name="p1">First point to compare.</param>
        /// <param name="p2">Second point.</param>
        /// <param name="maxSquareDistance">If a positive value, use this as the maximum distance permitted between points.
        /// Otherwise, use MergeSquareDistance.</param>
        private bool MergeIfNear(UnsignedPoint p1, UnsignedPoint p2, long maxSquareDistance = 0)
        {
            var p1InClusters = IdsToPoints[p1.UniqueId];
            var p2InClusters = IdsToPoints[p2.UniqueId];

            maxSquareDistance = (maxSquareDistance <= 0) ? MergeSquareDistance : maxSquareDistance;
            if (p1InClusters.SquareDistanceCompare(p2InClusters, maxSquareDistance) <= 0)
            {
                var c1 = Clusters.GetClassLabel(p1InClusters);
                var c2 = Clusters.GetClassLabel(p2InClusters);
                return(Clusters.Merge(c1, c2));
            }
            else
            {
                return(false);
            }
        }
コード例 #3
0
        /// <summary>
        /// Merge the clusters to which the two points belong, if their sizes permit.
        ///
        /// No more than one of the clusters may have a size greater than or equal to UnmergeableSize.
        /// </summary>
        /// <param name="p1">Point belonging to first cluster to merge.</param>
        /// <param name="p2">Point belonging to second cluster to merge.</param>
        /// <param name="forceMerge">If true and UnmergeableSize is the sole obstacle to the merge, perform the merge anyways.
        /// If false, honor UnmergeableSize.</param>
        /// <returns>True if the merge was performed successfully, false otherwise.</returns>
        private bool Merge(UnsignedPoint p1, UnsignedPoint p2, bool forceMerge = false)
        {
            var category1 = Clusters.GetClassLabel(p1);
            var category2 = Clusters.GetClassLabel(p2);

            if (category1.Equals(category2))
            {
                return(false);
            }
            var size1 = Clusters.PointsInClass(category1).Count;
            var size2 = Clusters.PointsInClass(category2).Count;

            if (size1 >= UnmergeableSize && size2 >= UnmergeableSize && !forceMerge)
            {
                return(false);
            }
            return(Clusters.Merge(category1, category2));
        }
コード例 #4
0
 public void Merge(Box box)
 {
     Defaults = Defaults.Join(box.Defaults);
     Clusters = Clusters.Merge(box.Clusters).ToList();
     Machines = Machines.Merge(box.Machines).ToList();
 }
コード例 #5
0
        /// <summary>
        /// Perform unassisted classification of points.
        /// </summary>
        public Classification <UnsignedPoint, string> Classify()
        {
            //   3) Create multiple HilbertIndexes.
            //   4) Find best HilbertIndex and find the one that predicts the lowest number of clusters K (OptimalIndex).
            //   5) Set the characteristic merge distance S (MergeSquareDistance).
            //TODO: Support formation and use of more than one HilbertIndex, to respect IndexBudget.IndexCount.
            var useOptimalPermutation = true;

            UnsignedPoint[] hilbertOrderedPoints;

            Timer.Start("Find optimum Hilbert ordering");
            if (!useOptimalPermutation)
            {
                var optimum = OptimalIndex.Search(
                    HilbertPoints,
                    IndexConfig.OutlierSize,
                    IndexConfig.NoiseSkipBy,
                    IndexConfig.ReducedNoiseSkipBy,
                    IndexConfig.MaxTrials,
                    IndexConfig.MaxIterationsWithoutImprovement,
                    IndexConfig.UseSample,
                    true
                    );
                hilbertOrderedPoints = HilbertOrderedPoints(optimum.SortedPointIndices.ToList());
                MergeSquareDistance  = optimum.MergeSquareDistance;
            }
            else
            {
                var optimum = OptimalPermutation.Search(
                    Clusters.Points().ToList(),
                    BitsPerDimension,
                    IndexConfig.OutlierSize,
                    IndexConfig.NoiseSkipBy,
                    IndexConfig.ReducedNoiseSkipBy,
                    IndexConfig.MaxTrials,
                    IndexConfig.MaxIterationsWithoutImprovement,
                    IndexConfig.UseSample,
                    true
                    );
                hilbertOrderedPoints = optimum.SortedPoints.ToArray();
                MergeSquareDistance  = optimum.MergeSquareDistance;
            }
            Timer.Stop("Find optimum Hilbert ordering");

            //   6) Pass over the points in Hilbert order. Every consescutive pair closer than the distance S is merged into the
            //      same cluster.
            Timer.Start("Merge by Hilbert index");
            MergeByHilbertIndex(hilbertOrderedPoints);
            Timer.Stop("Merge by Hilbert index");

            //   7) Find the distance from the Centroid of each non-outlier cluster to every other large cluster (ClosestCluster).
            //   8) For the closest neighboring large clusters, probe deeper and find the pair of points,
            //      one drawn from each of two clusters, that is closest and their separation s (square Cartesian distance).
            //   9) If a pair of clusters is closer than S (s ≤ S), merge them, transitively.
            Timer.Start("Merge neighboring large clusters");
            var cc = new ClosestCluster <string>(Clusters);
            var closeClusterPairs = cc.FindClosestClusters(MaxNeighborsToCompare, MergeSquareDistance, OutlierSize, UseExactClusterDistance);
            var clusterMerges     = 0;

            foreach (var pair in closeClusterPairs.Where(p => p.SquareDistance <= MergeSquareDistance))
            {
                pair.Relabel(Clusters);
                if (Clusters.Merge(pair.Color1, pair.Color2))
                {
                    clusterMerges++;
                }
            }
            Timer.Stop("Merge neighboring large clusters");

            //  10) Merge outliers with neighboring clusters.
            //      For all the remaining outliers (small clusters), merge them with the nearest large cluster
            //      unless their distance is too great (MergeSquareDistance * OutlierDistanceMultiplier).
            //      Do not permit this phase to cause two large clusters to be joined to each other.
            Timer.Start("Merge outliers");
            var maxOutlierMergeDistance = (long)(MergeSquareDistance * OutlierDistanceMultiplier);
            var outlierMerges           = MergeOutliers(maxOutlierMergeDistance);

            Timer.Stop("Merge outliers");
            var msg = $"   {clusterMerges} Cluster merges, {outlierMerges} Outlier merges";

            Logger.Info(msg);
            return(Clusters);
        }