/// <summary> /// Add noise points to the data and classify each noise point with the nearest cluster center. /// </summary> /// <param name="noisePointsToAdd">Number of noise points to add.</param> /// <param name="clusterCenters">Cluster centers for each cluster, where the key is the cluster id.</param> /// <param name="clusters">The noise points will be added to these clusters.</param> private void AddNoise(int noisePointsToAdd, Dictionary <string, UnsignedPoint> clusterCenters, Classification <UnsignedPoint, string> clusters) { if (noisePointsToAdd <= 0) { return; } var pccp = new PolyChromaticClosestPoint <string> (clusters); var closest = new List <Tuple <String, String> > (); // Find the nearest neighboring cluster to each cluster. // We will be choosing random noise points positioned in the space between clusters that are near neighbors. foreach (var clusterId in clusters.ClassLabels()) { var cp = pccp.FindClusterApproximately(clusterId).Swap(clusterId); closest.Add(new Tuple <string, string>(cp.Color1, cp.Color2)); } // We need to pick random points from each cluster, so must convert from Sets to Lists for performance. var clustersAsLists = new Dictionary <string, List <UnsignedPoint> > (); foreach (var pair in clusters.LabelToPoints) { clustersAsLists [pair.Key] = pair.Value.ToList(); } // Pick random pairs of clusters that are close neighbors. // Then pick a random point from each cluster and compute a weighted average of the two points. // This will construct noise points that tend to form a filament between two clusters. // Such connecting filaments pose the greatest likelihood of merging two distinct // clusters into one, the very error that must be compensated for by an improved algorithm. for (var i = 0; i < noisePointsToAdd; i++) { var whereToAdd = closest [r.Next(closest.Count)]; // The weight will range from 0.18 to 0.82 so as to keep most noise points from being inside a cluster, // which would make them non-noisy. var weight1 = r.NextDouble() * 0.64 + 0.18; var weight2 = 1.0 - weight1; var c1 = clustersAsLists[whereToAdd.Item1]; var c2 = clustersAsLists[whereToAdd.Item2]; var p1 = c1[r.Next(c1.Count)]; var p2 = c2[r.Next(c2.Count)]; var vRandom = new int[Dimensions]; for (var iDim = 0; iDim < vRandom.Length; iDim++) { vRandom [iDim] = (int)(weight1 * p1.Coordinates [iDim] + weight2 * p2.Coordinates [iDim]); } var pRandom = new UnsignedPoint(vRandom); var d1 = c1.Select(p => pRandom.Measure(p)).Min(); var d2 = c2.Select(p => pRandom.Measure(p)).Min(); var cRandom = d1 < d2 ? whereToAdd.Item1 : whereToAdd.Item2; clusters.Add(pRandom, cRandom); Noise.Add(pRandom); } }