Exemplo n.º 1
0
        /// <summary>
        /// Add noise points to the data and classify each noise point with the nearest cluster center.
        /// </summary>
        /// <param name="noisePointsToAdd">Number of noise points to add.</param>
        /// <param name="clusterCenters">Cluster centers for each cluster, where the key is the cluster id.</param>
        /// <param name="clusters">The noise points will be added to these clusters.</param>
        private void AddNoise(int noisePointsToAdd, Dictionary <string, UnsignedPoint> clusterCenters, Classification <UnsignedPoint, string> clusters)
        {
            if (noisePointsToAdd <= 0)
            {
                return;
            }
            var pccp    = new PolyChromaticClosestPoint <string> (clusters);
            var closest = new List <Tuple <String, String> > ();

            // Find the nearest neighboring cluster to each cluster.
            // We will be choosing random noise points positioned in the space between clusters that are near neighbors.
            foreach (var clusterId in clusters.ClassLabels())
            {
                var cp = pccp.FindClusterApproximately(clusterId).Swap(clusterId);
                closest.Add(new Tuple <string, string>(cp.Color1, cp.Color2));
            }

            // We need to pick random points from each cluster, so must convert from Sets to Lists for performance.
            var clustersAsLists = new Dictionary <string, List <UnsignedPoint> > ();

            foreach (var pair in clusters.LabelToPoints)
            {
                clustersAsLists [pair.Key] = pair.Value.ToList();
            }

            // Pick random pairs of clusters that are close neighbors.
            // Then pick a random point from each cluster and compute a weighted average of the two points.
            // This will construct noise points that tend to form a filament between two clusters.
            // Such connecting filaments pose the greatest likelihood of merging two distinct
            // clusters into one, the very error that must be compensated for by an improved algorithm.
            for (var i = 0; i < noisePointsToAdd; i++)
            {
                var whereToAdd = closest [r.Next(closest.Count)];
                // The weight will range from 0.18 to 0.82 so as to keep most noise points from being inside a cluster,
                // which would make them non-noisy.
                var weight1 = r.NextDouble() * 0.64 + 0.18;
                var weight2 = 1.0 - weight1;
                var c1      = clustersAsLists[whereToAdd.Item1];
                var c2      = clustersAsLists[whereToAdd.Item2];
                var p1      = c1[r.Next(c1.Count)];
                var p2      = c2[r.Next(c2.Count)];
                var vRandom = new int[Dimensions];
                for (var iDim = 0; iDim < vRandom.Length; iDim++)
                {
                    vRandom [iDim] = (int)(weight1 * p1.Coordinates [iDim] + weight2 * p2.Coordinates [iDim]);
                }
                var pRandom = new UnsignedPoint(vRandom);
                var d1      = c1.Select(p => pRandom.Measure(p)).Min();
                var d2      = c2.Select(p => pRandom.Measure(p)).Min();
                var cRandom = d1 < d2 ? whereToAdd.Item1 : whereToAdd.Item2;
                clusters.Add(pRandom, cRandom);
                Noise.Add(pRandom);
            }
        }