/// <summary> /// Merges the small outlier clusters with nearby larger clusters. /// </summary> /// <returns>The number of outlier clusters merged.</returns> /// <param name="maxOutlierMergeDistance">An outlier will only be merged if its distance from /// its nearest cluster does not exceed this square distance.</param> public int MergeOutliers(long maxOutlierMergeDistance) { var mergesDone = 0; var cc = new ClosestCluster <string>(Clusters); var closeOutlierPairs = cc.FindClosestOutliers( MaxNeighborsToCompare, maxOutlierMergeDistance, OutlierSize ); foreach (var pair in closeOutlierPairs) { pair.Relabel(Clusters); // We do not want an outlier to cause the merger of two large clusters // if each of the large clusters is near the outlier but not near each other. // Thus, once the outlier is merged with the nearer of its neighbors, // it will be ruled out from firther merges. if (pair.CountOutliers(Clusters, OutlierSize) != 1) { continue; } if (Clusters.Merge(pair.Color1, pair.Color2)) { mergesDone++; } } return(mergesDone); }
/// <summary> /// Merge the clusters containing two points if the distance separating them does not exceed MergeSquareDistance. /// The points given here may be HilbertPoints frmo a HilbertIndex or UnsignedPoints already present in the Classification. /// In case of the former, a lookup is performed based on the id to find the proper UnsignedPoint corresponding to the HilbertPoint. /// </summary> /// <returns><c>true</c>, if a new merge performed, <c>false</c> if too far to merge or already merged.</returns> /// <param name="p1">First point to compare.</param> /// <param name="p2">Second point.</param> /// <param name="maxSquareDistance">If a positive value, use this as the maximum distance permitted between points. /// Otherwise, use MergeSquareDistance.</param> private bool MergeIfNear(UnsignedPoint p1, UnsignedPoint p2, long maxSquareDistance = 0) { var p1InClusters = IdsToPoints[p1.UniqueId]; var p2InClusters = IdsToPoints[p2.UniqueId]; maxSquareDistance = (maxSquareDistance <= 0) ? MergeSquareDistance : maxSquareDistance; if (p1InClusters.SquareDistanceCompare(p2InClusters, maxSquareDistance) <= 0) { var c1 = Clusters.GetClassLabel(p1InClusters); var c2 = Clusters.GetClassLabel(p2InClusters); return(Clusters.Merge(c1, c2)); } else { return(false); } }
/// <summary> /// Merge the clusters to which the two points belong, if their sizes permit. /// /// No more than one of the clusters may have a size greater than or equal to UnmergeableSize. /// </summary> /// <param name="p1">Point belonging to first cluster to merge.</param> /// <param name="p2">Point belonging to second cluster to merge.</param> /// <param name="forceMerge">If true and UnmergeableSize is the sole obstacle to the merge, perform the merge anyways. /// If false, honor UnmergeableSize.</param> /// <returns>True if the merge was performed successfully, false otherwise.</returns> private bool Merge(UnsignedPoint p1, UnsignedPoint p2, bool forceMerge = false) { var category1 = Clusters.GetClassLabel(p1); var category2 = Clusters.GetClassLabel(p2); if (category1.Equals(category2)) { return(false); } var size1 = Clusters.PointsInClass(category1).Count; var size2 = Clusters.PointsInClass(category2).Count; if (size1 >= UnmergeableSize && size2 >= UnmergeableSize && !forceMerge) { return(false); } return(Clusters.Merge(category1, category2)); }
public void Merge(Box box) { Defaults = Defaults.Join(box.Defaults); Clusters = Clusters.Merge(box.Clusters).ToList(); Machines = Machines.Merge(box.Machines).ToList(); }
/// <summary> /// Perform unassisted classification of points. /// </summary> public Classification <UnsignedPoint, string> Classify() { // 3) Create multiple HilbertIndexes. // 4) Find best HilbertIndex and find the one that predicts the lowest number of clusters K (OptimalIndex). // 5) Set the characteristic merge distance S (MergeSquareDistance). //TODO: Support formation and use of more than one HilbertIndex, to respect IndexBudget.IndexCount. var useOptimalPermutation = true; UnsignedPoint[] hilbertOrderedPoints; Timer.Start("Find optimum Hilbert ordering"); if (!useOptimalPermutation) { var optimum = OptimalIndex.Search( HilbertPoints, IndexConfig.OutlierSize, IndexConfig.NoiseSkipBy, IndexConfig.ReducedNoiseSkipBy, IndexConfig.MaxTrials, IndexConfig.MaxIterationsWithoutImprovement, IndexConfig.UseSample, true ); hilbertOrderedPoints = HilbertOrderedPoints(optimum.SortedPointIndices.ToList()); MergeSquareDistance = optimum.MergeSquareDistance; } else { var optimum = OptimalPermutation.Search( Clusters.Points().ToList(), BitsPerDimension, IndexConfig.OutlierSize, IndexConfig.NoiseSkipBy, IndexConfig.ReducedNoiseSkipBy, IndexConfig.MaxTrials, IndexConfig.MaxIterationsWithoutImprovement, IndexConfig.UseSample, true ); hilbertOrderedPoints = optimum.SortedPoints.ToArray(); MergeSquareDistance = optimum.MergeSquareDistance; } Timer.Stop("Find optimum Hilbert ordering"); // 6) Pass over the points in Hilbert order. Every consescutive pair closer than the distance S is merged into the // same cluster. Timer.Start("Merge by Hilbert index"); MergeByHilbertIndex(hilbertOrderedPoints); Timer.Stop("Merge by Hilbert index"); // 7) Find the distance from the Centroid of each non-outlier cluster to every other large cluster (ClosestCluster). // 8) For the closest neighboring large clusters, probe deeper and find the pair of points, // one drawn from each of two clusters, that is closest and their separation s (square Cartesian distance). // 9) If a pair of clusters is closer than S (s ≤ S), merge them, transitively. Timer.Start("Merge neighboring large clusters"); var cc = new ClosestCluster <string>(Clusters); var closeClusterPairs = cc.FindClosestClusters(MaxNeighborsToCompare, MergeSquareDistance, OutlierSize, UseExactClusterDistance); var clusterMerges = 0; foreach (var pair in closeClusterPairs.Where(p => p.SquareDistance <= MergeSquareDistance)) { pair.Relabel(Clusters); if (Clusters.Merge(pair.Color1, pair.Color2)) { clusterMerges++; } } Timer.Stop("Merge neighboring large clusters"); // 10) Merge outliers with neighboring clusters. // For all the remaining outliers (small clusters), merge them with the nearest large cluster // unless their distance is too great (MergeSquareDistance * OutlierDistanceMultiplier). // Do not permit this phase to cause two large clusters to be joined to each other. Timer.Start("Merge outliers"); var maxOutlierMergeDistance = (long)(MergeSquareDistance * OutlierDistanceMultiplier); var outlierMerges = MergeOutliers(maxOutlierMergeDistance); Timer.Stop("Merge outliers"); var msg = $" {clusterMerges} Cluster merges, {outlierMerges} Outlier merges"; Logger.Info(msg); return(Clusters); }