/// <summary> /// Compare every cluster to every other cluster and decide if we should merge them based on /// whether the radius of the combined cluster is less than the sum of the radii of the original clusters. /// </summary> /// <returns>True if any merges were performed, false otherwise.</returns> bool MergeByRadius() { int mergeCount = 0; if (MergeableShrinkage <= 0 || Clusters.NumPartitions == 1) { return(false); } Timer.Start("Merge by radius"); var Radii = new Dictionary <string, ClusterRadius>(); foreach (var label in Clusters.ClassLabels()) { Radii[label] = new ClusterRadius(Clusters.PointsInClass(label).ToList()); } var potentialMerges = new List <RadiusMergeCandidate>(); var minShrinkage = double.MaxValue; foreach (var label1 in Clusters.ClassLabels()) { foreach (var label2 in Clusters.ClassLabels().Where(label => label1.CompareTo(label) == -1)) { var potentialMerge = new RadiusMergeCandidate( Clusters, label1, Radii[label1], label2, Radii[label2] ); minShrinkage = Math.Min(minShrinkage, potentialMerge.Shrinkage); if (potentialMerge.Shrinkage <= MergeableShrinkage) { potentialMerges.Add(potentialMerge); } } } //TODO: Should we process merges from low shrinkage to high, and regenerate results after each merge? // This is in case merging A + B is allowed and B + C is allowed, but // after A + B are merged to form D, C + D are not a good merge. // For now, process all merges that pass the Shrinkage test. foreach (var potentialMerge in potentialMerges) { if (Merge(potentialMerge.Point1, potentialMerge.Point2, true)) { mergeCount++; } } Logger.Info($"{mergeCount} cluster pairs successfully merged by radius, with {potentialMerges.Count} expected."); Logger.Info($"Radius shrinkage values: Best {minShrinkage} vs Permitted {MergeableShrinkage}"); Timer.Stop("Merge by radius"); return(mergeCount > 0); }
/// <summary> /// Approximates the closest distance between every cluster and every other cluster. /// /// If there are currently K clusters, this will return at most K(K-1)/2 ClosestPairs, unsorted. /// If an upper limit on the square distance is supplied, fewer may be returned. /// </summary> /// <param name="maxSquareDistance">If omitted, no restriction on distance is applied. /// If supplied, no measurement of the closest distance between two colors will /// be returned if those two colors are farther apart than this distance. /// </param> /// <returns>ClosestPairs for every pair of colors, unsorted. /// If a distance is returned for colors "A" and "B", one will not be returned for colors "B" and "A", /// since the distance is symmetric.</returns> public IEnumerable <ClosestPair> FindAllClustersApproximately(long maxSquareDistance = long.MaxValue) { var colors = Clusters.ClassLabels().ToArray(); for (var i = 0; i < colors.Length; i++) { for (var j = i + 1; j < colors.Length; j++) { var closest = NearestPointFinder.FindNearestWithLabel(colors[i], colors[j]); var shortest = new ClosestPair(closest.SearchLabel, closest.SearchPoint, closest.NearLabel, closest.NearPoint, closest.Measure); yield return(shortest.Swap(colors[i])); } } }
/// <summary> /// Less efficient way to do the same thing as FindClusterApproximately. /// </summary> /// <param name="color1">Cluster id for the first cluster to compare.</param> /// <returns>Results that identify which other cluster is closest to the given cluster.</returns> public ClosestPair FindClusterIteratively(TLabel color1) { var shortestPair = new ClosestPair(); foreach (var color2 in Clusters.ClassLabels().Where(c => !c.Equals(color1))) { var closestForColor = FindPairApproximately(color1, color2); if (shortestPair.SquareDistance > closestForColor.SquareDistance) { shortestPair = closestForColor; } } return(shortestPair); }