/// <summary> /// Finds exactly the cluster closest to the cluster whose label matches color1 and the points /// in each cluster that are closest, along with the square distance between them. /// </summary> /// <param name="color1">Label of the cluster whose nearest neighbor is being sought.</param> /// <returns>A point in the cluster corresponding to color1, the closest point to it /// from another cluster, the square distance between the points, and the label of the other cluster. /// NOTE: ClosestPair.Color1 will equal color1. /// </returns> public ClosestPair FindClusterExhaustively(TLabel color1) { var shortest = new ClosestPair(); foreach (var p1 in Clusters.PointsInClass(color1)) { foreach (var pc in Clusters.Points() .Select(p => new { Point = p, Color = Clusters.GetClassLabel(p) }) .Where(pc => !color1.Equals(pc.Color))) { var d = p1.Measure(pc.Point); if (d < shortest.SquareDistance) { shortest.SquareDistance = d; shortest.Color1 = color1; shortest.Point1 = p1; shortest.Color2 = pc.Color; shortest.Point2 = pc.Point; } } } //TODO: If there is only one cluster, the if statement above will not be triggered and shortest will // be ill-defined and cause a Null Pointer exception in Swap. return(shortest.Swap(color1)); }
/// <summary> /// Compare every cluster to every other cluster and decide if we should merge them based on /// whether the radius of the combined cluster is less than the sum of the radii of the original clusters. /// </summary> /// <returns>True if any merges were performed, false otherwise.</returns> bool MergeByRadius() { int mergeCount = 0; if (MergeableShrinkage <= 0 || Clusters.NumPartitions == 1) { return(false); } Timer.Start("Merge by radius"); var Radii = new Dictionary <string, ClusterRadius>(); foreach (var label in Clusters.ClassLabels()) { Radii[label] = new ClusterRadius(Clusters.PointsInClass(label).ToList()); } var potentialMerges = new List <RadiusMergeCandidate>(); var minShrinkage = double.MaxValue; foreach (var label1 in Clusters.ClassLabels()) { foreach (var label2 in Clusters.ClassLabels().Where(label => label1.CompareTo(label) == -1)) { var potentialMerge = new RadiusMergeCandidate( Clusters, label1, Radii[label1], label2, Radii[label2] ); minShrinkage = Math.Min(minShrinkage, potentialMerge.Shrinkage); if (potentialMerge.Shrinkage <= MergeableShrinkage) { potentialMerges.Add(potentialMerge); } } } //TODO: Should we process merges from low shrinkage to high, and regenerate results after each merge? // This is in case merging A + B is allowed and B + C is allowed, but // after A + B are merged to form D, C + D are not a good merge. // For now, process all merges that pass the Shrinkage test. foreach (var potentialMerge in potentialMerges) { if (Merge(potentialMerge.Point1, potentialMerge.Point2, true)) { mergeCount++; } } Logger.Info($"{mergeCount} cluster pairs successfully merged by radius, with {potentialMerges.Count} expected."); Logger.Info($"Radius shrinkage values: Best {minShrinkage} vs Permitted {MergeableShrinkage}"); Timer.Stop("Merge by radius"); return(mergeCount > 0); }
/// <summary> /// Searches for the point in the first cluster that is closest to a corresponding point in the second cluster /// and returns an approximate result. /// /// This finds the centroid C1 of the first cluster, then the point P2 in the second cluster closest to centroid C1, then the /// point P1 in the first cluster closest to P2. /// /// NOTE: If the two clusters overlap or are shaped irregularly, this is likely to return a poor result. /// If the clusters are spherical, the results are likely to be very good. /// </summary> /// <param name="color1">Indicates the first cluster to be searched.</param> /// <param name="color2">Indicates the second cluster to be searched.</param> /// <returns>An approximate result, inclusing one point from each cluster and the square of the distance between them.</returns> public ClosestPair FindPairByCentroids(TLabel color1, TLabel color2) { var points1 = Clusters.PointsInClass(color1); var points2 = Clusters.PointsInClass(color2); var c1 = UnsignedPoint.Centroid(points1); var p2 = points2 .OrderBy(p => c1.Measure(p)) .First() ; var closest = points1.Select(p1 => new ClosestPair(color1, p1, color2, p2, p1.Measure(p2))).OrderBy(cp => cp.SquareDistance).First(); return(closest.Swap(color1)); }
/// <summary> /// Merge the clusters to which the two points belong, if their sizes permit. /// /// No more than one of the clusters may have a size greater than or equal to UnmergeableSize. /// </summary> /// <param name="p1">Point belonging to first cluster to merge.</param> /// <param name="p2">Point belonging to second cluster to merge.</param> /// <param name="forceMerge">If true and UnmergeableSize is the sole obstacle to the merge, perform the merge anyways. /// If false, honor UnmergeableSize.</param> /// <returns>True if the merge was performed successfully, false otherwise.</returns> private bool Merge(UnsignedPoint p1, UnsignedPoint p2, bool forceMerge = false) { var category1 = Clusters.GetClassLabel(p1); var category2 = Clusters.GetClassLabel(p2); if (category1.Equals(category2)) { return(false); } var size1 = Clusters.PointsInClass(category1).Count; var size2 = Clusters.PointsInClass(category2).Count; if (size1 >= UnmergeableSize && size2 >= UnmergeableSize && !forceMerge) { return(false); } return(Clusters.Merge(category1, category2)); }
/// <summary> /// Finds exactly the two closest points (one of each color) and their square distance /// using an exhaustive algorithm that compares the distances of every point in one cluster /// to every point in the other. /// /// This compares points in two of the clusters, ignoring points in all other clusters. /// </summary> /// <param name="color1">Label of the first cluster.</param> /// <param name="color2">Label of the second cluster.</param> /// <returns>The point with Color1, the point with Color2 and the square distance between them.</returns> public ClosestPair FindPairExhaustively(TLabel color1, TLabel color2) { var shortestDistance = long.MaxValue; UnsignedPoint p1Shortest = null; UnsignedPoint p2Shortest = null; foreach (var p1 in Clusters.PointsInClass(color1)) { foreach (var p2 in Clusters.PointsInClass(color2)) { var d = p1.Measure(p2); if (d < shortestDistance) { shortestDistance = d; p1Shortest = p1; p2Shortest = p2; } } } return(new ClosestPair(color1, p1Shortest, color2, p2Shortest, shortestDistance).Swap(color1)); }