/// <summary>
 /// Verifies the exists.
 /// </summary>
 private void VerifyExists()
 {
     if (!Classification.Exists(_databasePath, _classificationName, out _errOut))
     {
         Classification.Add(_databasePath, _classificationName, out _errOut);
     }
 }
        public void AddTest()
        {
            VerifyDoesntExist();
            bool value = Classification.Add(_databasePath, _classificationName, out _errOut);

            General.HasTrueValue(value, _errOut);
        }
Beispiel #3
0
        /// <summary>
        /// Add noise points to the data and classify each noise point with the nearest cluster center.
        /// </summary>
        /// <param name="noisePointsToAdd">Number of noise points to add.</param>
        /// <param name="clusterCenters">Cluster centers for each cluster, where the key is the cluster id.</param>
        /// <param name="clusters">The noise points will be added to these clusters.</param>
        private void AddNoise(int noisePointsToAdd, Dictionary <string, UnsignedPoint> clusterCenters, Classification <UnsignedPoint, string> clusters)
        {
            if (noisePointsToAdd <= 0)
            {
                return;
            }
            var pccp    = new PolyChromaticClosestPoint <string> (clusters);
            var closest = new List <Tuple <String, String> > ();

            // Find the nearest neighboring cluster to each cluster.
            // We will be choosing random noise points positioned in the space between clusters that are near neighbors.
            foreach (var clusterId in clusters.ClassLabels())
            {
                var cp = pccp.FindClusterApproximately(clusterId).Swap(clusterId);
                closest.Add(new Tuple <string, string>(cp.Color1, cp.Color2));
            }

            // We need to pick random points from each cluster, so must convert from Sets to Lists for performance.
            var clustersAsLists = new Dictionary <string, List <UnsignedPoint> > ();

            foreach (var pair in clusters.LabelToPoints)
            {
                clustersAsLists [pair.Key] = pair.Value.ToList();
            }

            // Pick random pairs of clusters that are close neighbors.
            // Then pick a random point from each cluster and compute a weighted average of the two points.
            // This will construct noise points that tend to form a filament between two clusters.
            // Such connecting filaments pose the greatest likelihood of merging two distinct
            // clusters into one, the very error that must be compensated for by an improved algorithm.
            for (var i = 0; i < noisePointsToAdd; i++)
            {
                var whereToAdd = closest [r.Next(closest.Count)];
                // The weight will range from 0.18 to 0.82 so as to keep most noise points from being inside a cluster,
                // which would make them non-noisy.
                var weight1 = r.NextDouble() * 0.64 + 0.18;
                var weight2 = 1.0 - weight1;
                var c1      = clustersAsLists[whereToAdd.Item1];
                var c2      = clustersAsLists[whereToAdd.Item2];
                var p1      = c1[r.Next(c1.Count)];
                var p2      = c2[r.Next(c2.Count)];
                var vRandom = new int[Dimensions];
                for (var iDim = 0; iDim < vRandom.Length; iDim++)
                {
                    vRandom [iDim] = (int)(weight1 * p1.Coordinates [iDim] + weight2 * p2.Coordinates [iDim]);
                }
                var pRandom = new UnsignedPoint(vRandom);
                var d1      = c1.Select(p => pRandom.Measure(p)).Min();
                var d2      = c2.Select(p => pRandom.Measure(p)).Min();
                var cRandom = d1 < d2 ? whereToAdd.Item1 : whereToAdd.Item2;
                clusters.Add(pRandom, cRandom);
                Noise.Add(pRandom);
            }
        }
Beispiel #4
0
        /// <summary>
        /// Generate random points clumped into individual, well-separated, Gaussian clusters with optional uniform noise added.
        ///
        /// </summary>
        /// <returns>Points that are grouped into clusters and stored in a Classification.</returns>
        public Classification <UnsignedPoint, string> MakeClusters()
        {
            var clusters = new Classification <UnsignedPoint, string>();

            r = new FastRandom();
            //var z = new ZigguratGaussianSampler();
            var farthestDistanceFromClusterCenter = 0.0;

            var minDistance     = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions);
            var centerGenerator = new DiffuseGenerator(Dimensions, minDistance)
            {
                // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated.
                Minimum = MaxDistanceStdDev,
                Maximum = MaxCoordinate - MaxDistanceStdDev
            };
            var iCluster       = 0;
            var clusterCenters = new Dictionary <string, UnsignedPoint> ();

            foreach (var clusterCenter in centerGenerator.Take(ClusterCount).Where(ctr => ctr != null))
            {
                var centerPoint = new UnsignedPoint(clusterCenter);
                // The cluster size may be random, or come from ClusterSizes.
                int clusterSize;
                if (ClusterSizes.Length > 0)
                {
                    clusterSize = ClusterSizes[iCluster % ClusterSizes.Length];
                }
                else
                {
                    clusterSize = r.Next(MinClusterSize, MaxClusterSize);
                }
                var pointGenerator = new EllipsoidalGenerator(clusterCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions);
                var clusterId      = iCluster.ToString();
                foreach (var iPoint in Enumerable.Range(1, clusterSize))
                {
                    UnsignedPoint p;
                    clusters.Add(
                        p = new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])),
                        clusterId
                        );
                    var distance = Math.Sqrt(centerPoint.Measure(p));
                    farthestDistanceFromClusterCenter = Math.Max(farthestDistanceFromClusterCenter, distance);
                }
                clusterCenters[clusterId] = centerPoint;
                iCluster++;
            }
            AddNoise((int)Math.Floor(clusters.NumPoints * NoisePercentage / 100), clusterCenters, clusters);
            Debug.WriteLine("Test data: Farthest Distance from center = {0:N2}. Minimum Distance Permitted between Clusters = {1:N2}. Max Standard Deviation = {2}",
                            farthestDistanceFromClusterCenter,
                            minDistance,
                            MaxDistanceStdDev
                            );
            return(clusters);
        }
        /// <summary>
        /// Make a Classification of N-Dimensional data where the inputs are arrays of integers and the final element in each matrix
        /// is the number of its category.
        /// </summary>
        /// <param name="pointsPlusClass">Data to classify.</param>
        /// <returns>A Classification of the points.</returns>
        public static Classification <UnsignedPoint, string> MakeClassification(IList <int[]> pointsPlusClass)
        {
            var dimensions = pointsPlusClass[0].Length - 1; // The last number for each point is its category.
            var c          = new Classification <UnsignedPoint, string>();

            foreach (var pointPlusClass in pointsPlusClass)
            {
                var point = new UnsignedPoint(pointPlusClass.Take(dimensions).ToArray());
                c.Add(point, pointPlusClass[dimensions].ToString(CultureInfo.InvariantCulture));
            }
            return(c);
        }
Beispiel #6
0
        /// <summary>
        /// Generate random points clumped into individual, well-separated, chains of Gaussian clusters.
        /// Each chain consists of individiual Gaussian clusters that overlap.
        /// </summary>
        /// <returns>Points that are grouped into clusters and stored in a Classification.</returns>
        public Classification <UnsignedPoint, string> MakeChains(int chainLength)
        {
            var clusters = new Classification <UnsignedPoint, string>();

            r = new FastRandom();

            var minDistance     = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions);
            var centerGenerator = new ChainGenerator(Dimensions, minDistance)
            {
                // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated.
                Minimum = MaxDistanceStdDev,
                Maximum = MaxCoordinate - MaxDistanceStdDev
            };
            var segmentLength = (int)(MinDistanceStdDev * Math.Sqrt(Dimensions) / 3);
            var iCluster      = 0;

            foreach (var chain in centerGenerator.Chains(chainLength, segmentLength).Take(ClusterCount).Where(chain => chain.Any()))
            {
                var centerPoints = chain.Select(center => new UnsignedPoint(center)).ToList();
                // The cluster size may be random, or come from ClusterSizes.
                int clusterSize;
                if (ClusterSizes.Length > 0)
                {
                    clusterSize = ClusterSizes[iCluster % ClusterSizes.Length];
                }
                else
                {
                    clusterSize = r.Next(MinClusterSize, MaxClusterSize);
                }
                // Having decided on an overall cluster size, each segment gets an even number of points.
                var segmentSize = clusterSize / chainLength;
                var clusterId   = iCluster.ToString();
                // Each point generator is for a different segment of a chain.
                foreach (var pointGenerator in chain
                         .Select(segmentCenter =>
                                 new EllipsoidalGenerator(segmentCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions))
                         )
                {
                    foreach (var iPoint in Enumerable.Range(1, segmentSize))
                    {
                        clusters.Add(
                            new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])),
                            clusterId
                            );
                    }
                }
                iCluster++;
            }
            return(clusters);
        }
Beispiel #7
0
        /// <summary>
        /// Create two random clusters that may be separated from one another by enough distance
        /// that they do not overlap, or be partly overlapping, or fully overlapping.
        ///
        /// NOTE: This type of setup is to test divisive clustering, that divides two partly mixed gaussians.
        /// </summary>
        /// <param name="overlapPercent">A number from zero to 100.
        /// If zero, the clusters do not overlap at all.
        /// If fifty, then the clusters partly overlap.
        /// If 100, the clusters have the same center, so are indistinguishable.</param>
        /// <returns>The two clusters.</returns>
        public Classification <UnsignedPoint, string> TwoClusters(double overlapPercent)
        {
            var clusters = new Classification <UnsignedPoint, string>();

            r = new FastRandom();
            var farthestDistanceFromClusterCenter = 0.0;

            var minDistance     = EllipsoidalGenerator.MinimumSeparation(MaxDistanceStdDev, Dimensions);
            var centerGenerator = new DiffuseGenerator(Dimensions, minDistance)
            {
                // Keep the centers of the clusters away from the edge, so that points do not go out of bounds and have their coordinates truncated.
                // Keep the maximum coordinate farther away, because we will pick the second point by shifting one coordinate
                // in the higher direction.
                Minimum = MaxDistanceStdDev,
                Maximum = MaxCoordinate - MaxDistanceStdDev - (int)minDistance
            };
            var iCluster       = 0;
            var clusterCenter1 = centerGenerator.Take(1).FirstOrDefault();
            var clusterCenter2 = (int[])clusterCenter1.Clone();

            clusterCenter2[0] += (int)(minDistance * (100.0 - overlapPercent) / 100.0);
            var centers = new[] { clusterCenter1, clusterCenter2 };

            foreach (var clusterCenter in centers)
            {
                var centerPoint    = new UnsignedPoint(clusterCenter);
                var clusterSize    = r.Next(MinClusterSize, MaxClusterSize);
                var pointGenerator = new EllipsoidalGenerator(clusterCenter, RandomDoubles(Dimensions, MinDistanceStdDev, MaxDistanceStdDev, r), Dimensions);
                var clusterId      = iCluster.ToString();
                foreach (var iPoint in Enumerable.Range(1, clusterSize))
                {
                    UnsignedPoint p;
                    clusters.Add(
                        p = new UnsignedPoint(pointGenerator.Generate(new int[Dimensions])),
                        clusterId
                        );
                    var distance = Math.Sqrt(centerPoint.Measure(p));
                    farthestDistanceFromClusterCenter = Math.Max(farthestDistanceFromClusterCenter, distance);
                }
                iCluster++;
            }
            //TODO: Go back and recluster the points. Put each point into the cluster whose centroid
            //      it is nearest. Thus, if two clusters partly overlap, the points from one will be pushed into the other.
            return(clusters);
        }