/// <summary>
        /// Compare every cluster to every other cluster and decide if we should merge them based on
        /// whether the radius of the combined cluster is less than the sum of the radii of the original clusters.
        /// </summary>
        /// <returns>True if any merges were performed, false otherwise.</returns>
        bool MergeByRadius()
        {
            int mergeCount = 0;

            if (MergeableShrinkage <= 0 || Clusters.NumPartitions == 1)
            {
                return(false);
            }
            Timer.Start("Merge by radius");
            var Radii = new Dictionary <string, ClusterRadius>();

            foreach (var label in Clusters.ClassLabels())
            {
                Radii[label] = new ClusterRadius(Clusters.PointsInClass(label).ToList());
            }
            var potentialMerges = new List <RadiusMergeCandidate>();
            var minShrinkage    = double.MaxValue;

            foreach (var label1 in Clusters.ClassLabels())
            {
                foreach (var label2 in Clusters.ClassLabels().Where(label => label1.CompareTo(label) == -1))
                {
                    var potentialMerge = new RadiusMergeCandidate(
                        Clusters,
                        label1,
                        Radii[label1],
                        label2,
                        Radii[label2]
                        );
                    minShrinkage = Math.Min(minShrinkage, potentialMerge.Shrinkage);
                    if (potentialMerge.Shrinkage <= MergeableShrinkage)
                    {
                        potentialMerges.Add(potentialMerge);
                    }
                }
            }
            //TODO: Should we process merges from low shrinkage to high, and regenerate results after each merge?
            //      This is in case merging A + B is allowed and B + C is allowed, but
            //      after A + B are merged to form D, C + D are not a good merge.
            //      For now, process all merges that pass the Shrinkage test.
            foreach (var potentialMerge in potentialMerges)
            {
                if (Merge(potentialMerge.Point1, potentialMerge.Point2, true))
                {
                    mergeCount++;
                }
            }
            Logger.Info($"{mergeCount} cluster pairs successfully merged by radius, with {potentialMerges.Count} expected.");
            Logger.Info($"Radius shrinkage values: Best {minShrinkage} vs Permitted {MergeableShrinkage}");
            Timer.Stop("Merge by radius");
            return(mergeCount > 0);
        }
Пример #2
0
        /// <summary>
        /// Assuming this ClusterRadius to be for a combination of two clusters, compare its radius to the
        /// sum of radii for the two clusters tentatively combined to form it and return the ratio as the shrinkage.
        ///
        /// This is to be used to help the DensityClassifier decide if two clusters it has carved out of a single original cluster
        /// should really be put back together.
        ///
        /// Instead of comparing the maximum radius or the mean radius, the two values are blended together.
        /// The reason is that outliers can inflate the radius of a cluster, while the mean could grossly understate
        /// the spatial extent of a cluster.
        /// </summary>
        /// <returns>The shrinkage. If the radius of the combined cluster exceeds the sum of the radii of its parts,
        /// this is a value greater than one.
        /// If the combination has a lower radius, this value is less than one.
        /// Only if the value is lower than one is it appropriate to merge the cluster.
        /// How much less than one is a good value is a matter for further research.
        /// </returns>
        /// <param name="part1">Statistics about the first component cluster.</param>
        /// <param name="part2">Statistics about the second component cluster.</param>
        public double Shrinkage(ClusterRadius part1, ClusterRadius part2)
        {
            var combined        = this;
            var partsMaximumSum = part1.MaximumRadius + part2.MaximumRadius;
            var partsMeanSum    = part1.MeanRadius + part2.MeanRadius;
            //TODO: Research how to weight the values. Use 2:1 weighting for now. 1:1 did not work as well as expected.
            var partsSumWeightedRadius = (2 * partsMaximumSum + partsMeanSum) / 3;
            var combinedWeightedRadius = (2 * combined.MaximumRadius + combined.MeanRadius) / 3;

            if (combinedWeightedRadius == 0)
            {
                return(0);
            }
            return(combinedWeightedRadius / partsSumWeightedRadius);
        }
 public RadiusMergeCandidate(
     Classification <UnsignedPoint, string> clusters,
     string label1,
     ClusterRadius radius1,
     string label2,
     ClusterRadius radius2
     )
 {
     Label1         = label1;
     Point1         = clusters.PointsInClass(Label1).First();
     Label2         = label2;
     Point2         = clusters.PointsInClass(Label2).First();
     CombinedRadius = new ClusterRadius(clusters.PointsInClass(Label1), clusters.PointsInClass(Label2));
     Shrinkage      = CombinedRadius.Shrinkage(radius1, radius2);
 }