Generate() public method

Generates.
public Generate ( Descriptor descriptor, IEnumerable examples, int k, IDistance metric = null ) : numl.Unsupervised.Cluster
descriptor Descriptor The descriptor.
examples IEnumerable The examples.
k int The int to process.
metric IDistance (Optional) the metric.
return numl.Unsupervised.Cluster
Example #1
0
        public void Test_Feed_KMeans()
        {
            int groups = 4;
            Feed[] feeds = Feed.GetData();
            Descriptor descriptor = Descriptor.Create<Feed>();
            KMeans kmeans = new KMeans();
            kmeans.Descriptor = descriptor;

            int[] grouping = kmeans.Generate(feeds, groups, new CosineDistance());

            for (int i = 0; i < grouping.Length; i++)
                feeds[i].Cluster = grouping[i];
        }
Example #2
0
        public void Test_Object_KMeans(int size)
        {
            Matrix X = GenerateData(size);
            var objects = X.GetRows()
                           .Select(v => new AB { A = v[0], B = v[1] })
                           .ToArray();

            var descriptor = Descriptor.Create<AB>();

            KMeans model = new KMeans();
            var clusters = model.Generate(descriptor, objects, 2, new EuclidianDistance());
            Assert.AreEqual(2, clusters.Children.Length);
            Assert.AreEqual(size, clusters[0].Members.Length);
            Assert.AreEqual(size, clusters[1].Members.Length);
        }
Example #3
0
        public void Test_Numerical_KMeans(int size)
        {
            Matrix X = GenerateData(size);

            KMeans model = new KMeans();
            var assignment = model.Generate(X, 2, new EuclidianDistance());
            Assert.AreEqual(size * 2, assignment.Length);
            var a1 = assignment.First();
            var a2 = assignment.Last();
            for (int i = 0; i < size * 2; i++)
            {
                if (i < size)
                    Assert.AreEqual(a1, assignment[i]);
                else
                    Assert.AreEqual(a2, assignment[i]);
            }
        }
Example #4
0
        /// <summary>Generates.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="k">The int to process.</param>
        public void Generate(Matrix X, int k)
        {
            int n = X.Rows;
            int d = X.Cols;
            
            /***********************
             * initialize parameters
             ***********************/
            // convergence params
            var log_probability = 0d;
            var probability_difference = double.MaxValue;
            var mu_difference = double.MaxValue;

            // initialize centers with KMeans
            KMeans kmeans = new KMeans();
            var asgn = kmeans.Generate(X, k, new EuclidianDistance());

            // tentative centers
            var mu_k = kmeans.Centers;

            // initial covariances (stored as diag(cov) 1 of k)
            var sg_k = new Matrix(k, d);
            for (int i = 0; i < k; i++)
            {
                var indices = asgn.Select((a, b) => new Tuple<int, int>(a, b)).Where(t => t.Item1 == i).Select(t => t.Item2);
                var matrix = X.Slice(indices, VectorType.Row);
                sg_k[i] = matrix.CovarianceDiag();
            }

            // mixing coefficient
            var pi_k = asgn
                        .OrderBy(i => i)
                        .GroupBy(j => j)
                        .Select(g => (double)g.Count() / (double)asgn.Length)
                        .ToVector();

            int max_iter = 100;
            do
            {
                /***********************
                 * Expectation Step
                 ***********************/
                // responsibilty matrix: how much is gaussian k responsible for this point x
                var z_nk = new Matrix(n, k);
                for (int i = 0; i < n; i++)
                {
                    //  pi_j * N(x_n | mu_j, sigma_j)
                    for (int j = 0; j < k; j++)
                        z_nk[i, j] = pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]);

                    var dn = z_nk[i].Sum();

                    if(dn == 0)
                        Console.WriteLine("Uh oh....");

                    z_nk[i].Each(z => z / dn);
                }

                /***********************
                 * Maximization Step
                 ***********************/
                var N_k = z_nk.Sum(VectorType.Row);

                var mu_k_new = new Matrix(mu_k.Rows, mu_k.Cols);
                for (int i = 0; i < k; i++)
                {
                    var sum = Vector.Zeros(d);
                    for (int j = 0; j < n; j++)
                        sum += z_nk[j, i] * X[j];
                    mu_k_new[i] = sum / N_k[i];
                }

                var sg_k_new = new Matrix(k, d);
                for (int i = 0; i < k; i++)
                {
                    var sum = Vector.Zeros(d);
                    for (int j = 0; j < n; j++)
                        sum += z_nk[j, i] * (X[j] - mu_k_new[i]).Each(s => s * s);
                    sg_k_new[i] = sum / N_k[i];
                }

                var pi_k_new = N_k / n;

                /***********************
                 * Convergence Check
                 ***********************/
                var new_log_prob = 0d;
                for (int i = 0; i < n; i++)
                {
                    var acc = 0d;
                    //  pi_j * N(x_n | mu_j, sigma_j)
                    for (int j = 0; j < k; j++)
                        acc += pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]);

                    new_log_prob += System.Math.Log(acc, System.Math.E);
                }

                // log likelihood differences
                probability_difference = System.Math.Abs(log_probability - new_log_prob);
                Console.WriteLine("Log Likelihoods (Total Points: {0}, k={1}, d={2})\nO: {3}\nN: {4}\nDifference: {5}\n", n, k, d, log_probability, new_log_prob, probability_difference);
                log_probability = new_log_prob;


                // centers differences
                mu_difference = mu_k.GetRows()
                                .Zip(mu_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 })
                                .Sum(a => (a.V1 - a.V2).Norm());

                Console.WriteLine("Centers:\nO: {0}\nN: {1}\nDifference: {2}\n", mu_k, mu_k_new, mu_difference);
                mu_k = mu_k_new;

                // covariance differences
                double diff = sg_k.GetRows()
                            .Zip(sg_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 })
                            .Sum(a => (a.V1 - a.V2).Norm());

                Console.WriteLine("Covariance:\nO: {0}\nN: {1}\nDifference: {2}\n", sg_k, sg_k_new, diff);
                sg_k = sg_k_new;

                // mixing differences
                diff = (pi_k - pi_k_new).Each(s => System.Math.Abs(s)).Sum();
                Console.WriteLine("Mixing Coeffs:\nO: {0}\nN: {1}\nDifference: {2}\n", pi_k, pi_k_new, diff);
                pi_k = pi_k_new;

                Console.WriteLine("-------------------------------------------------------------");

            } while (probability_difference > .0000000001 && mu_difference > .0000000001 && --max_iter >= 0);
        }
Example #5
0
        /// <summary>Generates.</summary>
        /// <param name="X">The Matrix to process.</param>
        /// <param name="k">The int to process.</param>
        public void Generate(Matrix X, int k)
        {
            int n = X.Rows;
            int d = X.Cols;

            /***********************
            * initialize parameters
            ***********************/
            // convergence params
            var log_probability        = 0d;
            var probability_difference = double.MaxValue;
            var mu_difference          = double.MaxValue;

            // initialize centers with KMeans
            KMeans kmeans = new KMeans();
            var    asgn   = kmeans.Generate(X, k, new EuclidianDistance());

            // tentative centers
            var mu_k = kmeans.Centers;

            // initial covariances (stored as diag(cov) 1 of k)
            var sg_k = new Matrix(k, d);

            for (int i = 0; i < k; i++)
            {
                var indices = asgn.Select((a, b) => new Tuple <int, int>(a, b)).Where(t => t.Item1 == i).Select(t => t.Item2);
                var matrix  = X.Slice(indices, VectorType.Row);
                sg_k[i] = matrix.CovarianceDiag();
            }

            // mixing coefficient
            var pi_k = asgn
                       .OrderBy(i => i)
                       .GroupBy(j => j)
                       .Select(g => (double)g.Count() / (double)asgn.Length)
                       .ToVector();

            int max_iter = 100;

            do
            {
                /***********************
                * Expectation Step
                ***********************/
                // responsibilty matrix: how much is gaussian k responsible for this point x
                var z_nk = new Matrix(n, k);
                for (int i = 0; i < n; i++)
                {
                    //  pi_j * N(x_n | mu_j, sigma_j)
                    for (int j = 0; j < k; j++)
                    {
                        z_nk[i, j] = pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]);
                    }

                    var dn = z_nk[i].Sum();

                    if (dn == 0)
                    {
                        Console.WriteLine("Uh oh....");
                    }

                    z_nk[i].Each(z => z / dn);
                }

                /***********************
                * Maximization Step
                ***********************/
                var N_k = z_nk.Sum(VectorType.Row);

                var mu_k_new = new Matrix(mu_k.Rows, mu_k.Cols);
                for (int i = 0; i < k; i++)
                {
                    var sum = Vector.Zeros(d);
                    for (int j = 0; j < n; j++)
                    {
                        sum += z_nk[j, i] * X[j];
                    }
                    mu_k_new[i] = sum / N_k[i];
                }

                var sg_k_new = new Matrix(k, d);
                for (int i = 0; i < k; i++)
                {
                    var sum = Vector.Zeros(d);
                    for (int j = 0; j < n; j++)
                    {
                        sum += z_nk[j, i] * (X[j] - mu_k_new[i]).Each(s => s * s);
                    }
                    sg_k_new[i] = sum / N_k[i];
                }

                var pi_k_new = N_k / n;

                /***********************
                * Convergence Check
                ***********************/
                var new_log_prob = 0d;
                for (int i = 0; i < n; i++)
                {
                    var acc = 0d;
                    //  pi_j * N(x_n | mu_j, sigma_j)
                    for (int j = 0; j < k; j++)
                    {
                        acc += pi_k[j] * Normal(X[i], mu_k[j], sg_k[j]);
                    }

                    new_log_prob += System.Math.Log(acc, System.Math.E);
                }

                // log likelihood differences
                probability_difference = System.Math.Abs(log_probability - new_log_prob);
                Console.WriteLine("Log Likelihoods (Total Points: {0}, k={1}, d={2})\nO: {3}\nN: {4}\nDifference: {5}\n", n, k, d, log_probability, new_log_prob, probability_difference);
                log_probability = new_log_prob;


                // centers differences
                mu_difference = mu_k.GetRows()
                                .Zip(mu_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 })
                                .Sum(a => (a.V1 - a.V2).Norm());

                Console.WriteLine("Centers:\nO: {0}\nN: {1}\nDifference: {2}\n", mu_k, mu_k_new, mu_difference);
                mu_k = mu_k_new;

                // covariance differences
                double diff = sg_k.GetRows()
                              .Zip(sg_k_new.GetRows(), (v1, v2) => new { V1 = v1, V2 = v2 })
                              .Sum(a => (a.V1 - a.V2).Norm());

                Console.WriteLine("Covariance:\nO: {0}\nN: {1}\nDifference: {2}\n", sg_k, sg_k_new, diff);
                sg_k = sg_k_new;

                // mixing differences
                diff = (pi_k - pi_k_new).Each(s => System.Math.Abs(s)).Sum();
                Console.WriteLine("Mixing Coeffs:\nO: {0}\nN: {1}\nDifference: {2}\n", pi_k, pi_k_new, diff);
                pi_k = pi_k_new;

                Console.WriteLine("-------------------------------------------------------------");
            } while (probability_difference > .0000000001 && mu_difference > .0000000001 && --max_iter >= 0);
        }