/// <summary> /// Compare every cluster to every other cluster and decide if we should merge them based on /// whether the radius of the combined cluster is less than the sum of the radii of the original clusters. /// </summary> /// <returns>True if any merges were performed, false otherwise.</returns> bool MergeByRadius() { int mergeCount = 0; if (MergeableShrinkage <= 0 || Clusters.NumPartitions == 1) { return(false); } Timer.Start("Merge by radius"); var Radii = new Dictionary <string, ClusterRadius>(); foreach (var label in Clusters.ClassLabels()) { Radii[label] = new ClusterRadius(Clusters.PointsInClass(label).ToList()); } var potentialMerges = new List <RadiusMergeCandidate>(); var minShrinkage = double.MaxValue; foreach (var label1 in Clusters.ClassLabels()) { foreach (var label2 in Clusters.ClassLabels().Where(label => label1.CompareTo(label) == -1)) { var potentialMerge = new RadiusMergeCandidate( Clusters, label1, Radii[label1], label2, Radii[label2] ); minShrinkage = Math.Min(minShrinkage, potentialMerge.Shrinkage); if (potentialMerge.Shrinkage <= MergeableShrinkage) { potentialMerges.Add(potentialMerge); } } } //TODO: Should we process merges from low shrinkage to high, and regenerate results after each merge? // This is in case merging A + B is allowed and B + C is allowed, but // after A + B are merged to form D, C + D are not a good merge. // For now, process all merges that pass the Shrinkage test. foreach (var potentialMerge in potentialMerges) { if (Merge(potentialMerge.Point1, potentialMerge.Point2, true)) { mergeCount++; } } Logger.Info($"{mergeCount} cluster pairs successfully merged by radius, with {potentialMerges.Count} expected."); Logger.Info($"Radius shrinkage values: Best {minShrinkage} vs Permitted {MergeableShrinkage}"); Timer.Stop("Merge by radius"); return(mergeCount > 0); }
/// <summary> /// Assuming this ClusterRadius to be for a combination of two clusters, compare its radius to the /// sum of radii for the two clusters tentatively combined to form it and return the ratio as the shrinkage. /// /// This is to be used to help the DensityClassifier decide if two clusters it has carved out of a single original cluster /// should really be put back together. /// /// Instead of comparing the maximum radius or the mean radius, the two values are blended together. /// The reason is that outliers can inflate the radius of a cluster, while the mean could grossly understate /// the spatial extent of a cluster. /// </summary> /// <returns>The shrinkage. If the radius of the combined cluster exceeds the sum of the radii of its parts, /// this is a value greater than one. /// If the combination has a lower radius, this value is less than one. /// Only if the value is lower than one is it appropriate to merge the cluster. /// How much less than one is a good value is a matter for further research. /// </returns> /// <param name="part1">Statistics about the first component cluster.</param> /// <param name="part2">Statistics about the second component cluster.</param> public double Shrinkage(ClusterRadius part1, ClusterRadius part2) { var combined = this; var partsMaximumSum = part1.MaximumRadius + part2.MaximumRadius; var partsMeanSum = part1.MeanRadius + part2.MeanRadius; //TODO: Research how to weight the values. Use 2:1 weighting for now. 1:1 did not work as well as expected. var partsSumWeightedRadius = (2 * partsMaximumSum + partsMeanSum) / 3; var combinedWeightedRadius = (2 * combined.MaximumRadius + combined.MeanRadius) / 3; if (combinedWeightedRadius == 0) { return(0); } return(combinedWeightedRadius / partsSumWeightedRadius); }
public RadiusMergeCandidate( Classification <UnsignedPoint, string> clusters, string label1, ClusterRadius radius1, string label2, ClusterRadius radius2 ) { Label1 = label1; Point1 = clusters.PointsInClass(Label1).First(); Label2 = label2; Point2 = clusters.PointsInClass(Label2).First(); CombinedRadius = new ClusterRadius(clusters.PointsInClass(Label1), clusters.PointsInClass(Label2)); Shrinkage = CombinedRadius.Shrinkage(radius1, radius2); }