Beispiel #1
0
        public static double WithinClusterVariance(ClusterItem[][] clusters, double[][] centroids, int numSamples, Distance distance)
        {
            double var = 0;

            for (int i = 0; i < clusters.Length; i++)
            {
                for (int j = 0; j < clusters[i].Length; j++)
                {
                    var += distance.Similarity(clusters[i][j].InputVector, centroids[clusters[i][j].Label]);
                }
            }
            var = var / numSamples * 1.0;
            return(var);
        }
Beispiel #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="centroids"></param>
        /// <param name="clusters"></param>
        /// <param name="numOfInputs"></param>
        /// <param name="distance"></param>
        /// <returns></returns>
        public static double ICL(double[][] centroids, ClusterItem[][] clusters, int numOfInputs, Distance distance)
        {
            int Q = clusters[0][0].InputVector.Length; // Number of parameters
            // group by cluster label

            int K = clusters.Length;    // No. of clusters

            double ICL = ComputeLikelihood(clusters, centroids, numOfInputs, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(numOfInputs)));
            double sum1 = 0, sum2 = 0;

            for (int i = 0; i < numOfInputs; i++)
            {
                sum1 += Math.Log(i + ((K + 2.0) / 2.0));
            }
            for (int k = 0; k < K; k++)
            {
                if (clusters[k] != null)
                {
                    for (int j = 0; j < clusters[k].Length; j++)
                    {
                        sum2 += Math.Log(j + (3.0 / 2.0));
                    }
                }
            }
            return(ICL - sum1 + sum2);
        }
Beispiel #3
0
        private static double ComputeLikelihood(ClusterItem[][] clusters, double[][] centroids, int numSamples, Distance distance)
        {
            int N = numSamples;      // No. of input vectors

            // Calculate Within Cluster Variance
            double variance = WithinClusterVariance(clusters, centroids, N, distance);

            double[] diff       = null;
            double   likelihood = 0;

            for (int i = 0; i < clusters.Length; i++)
            {
                for (int j = 0; j < clusters[i].Length; j++)
                {
                    diff        = VectorSubtraction(clusters[i][j].InputVector, centroids[clusters[i][j].Label]);
                    likelihood += Math.Log(Math.Exp(-DotProduct(diff, diff) / (2.0 * variance)) / Math.Sqrt(2.0 * Math.PI * variance));
                }
            }

            return(likelihood);
        }
Beispiel #4
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="centroids"></param>
        /// <param name="clusters"></param>
        /// <param name="numOfInputs"></param>
        /// <param name="distance"></param>
        /// <returns></returns>
        public static double BIC(double[][] centroids, ClusterItem[][] clusters, int numOfInputs, Distance distance)
        {
            int Q = clusters[0][0].InputVector.Length; // Number of parameters
            // group by cluster label

            int K = clusters.Length;    // No. of clusters

            return(ComputeLikelihood(clusters, centroids, numOfInputs, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(numOfInputs))));
        }
Beispiel #5
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="centroids"></param>
        /// <param name="inputs"></param>
        /// <param name="labels"></param>
        /// <param name="distance"></param>
        /// <returns></returns>
        public static double ICL(double[][] centroids, double[][] inputs, int[] labels, Distance distance)
        {
            int N = inputs.Length;      // No. of input vectors
            int Q = inputs[0].Length;

            // group by cluster label
            int[]           legends  = null;
            ClusterItem[][] clusters = GroupByCluster(inputs, labels, out legends);
            int             K        = clusters.Length; // No. of clusters

            double ICL = ComputeLikelihood(clusters, centroids, N, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(N)));
            double sum1 = 0, sum2 = 0;

            for (int i = 0; i < N; i++)
            {
                sum1 += Math.Log(i + ((K + 2.0) / 2.0));
            }
            for (int k = 0; k < K; k++)
            {
                if (clusters[k] != null)
                {
                    for (int j = 0; j < clusters[k].Length; j++)
                    {
                        sum2 += Math.Log(j + (3.0 / 2.0));
                    }
                }
            }
            return(ICL - sum1 + sum2);
        }
Beispiel #6
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="centroids"></param>
        /// <param name="inputs"></param>
        /// <param name="labels"></param>
        /// <param name="distance"></param>
        /// <returns></returns>
        public static double BIC(double[][] centroids, double[][] inputs, int[] labels, Distance distance)
        {
            int N = inputs.Length;      // No. of input vectors
            int Q = inputs[0].Length;

            // group by cluster label
            int[]           legends  = null;
            ClusterItem[][] clusters = GroupByCluster(inputs, labels, out legends);
            int             K        = clusters.Length; // No. of clusters

            return(ComputeLikelihood(clusters, centroids, N, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(N))));
        }
Beispiel #7
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="centroids"></param>
        /// <param name="clusters"></param>
        /// <param name="numOfInputs"></param>
        /// <param name="distance"></param>
        /// <param name="aic"></param>
        /// <param name="bic"></param>
        /// <param name="icl"></param>
        public static void InformationCriteria(double[][] centroids, ClusterItem[][] clusters, int numOfInputs, Distance distance, out double aic, out double bic, out double icl)
        {
            int Q = clusters[0][0].InputVector.Length; // Number of parameters
            // group by cluster label

            int    K   = clusters.Length; // No. of clusters
            double llk = ComputeLikelihood(clusters, centroids, numOfInputs, distance);

            aic = llk - ((K * Q) + 1);
            bic = llk - (((((K * Q) + 1) / 2.0) * Math.Log(numOfInputs)));
            icl = bic;
            double sum1 = 0, sum2 = 0;

            for (int i = 0; i < numOfInputs; i++)
            {
                sum1 += Math.Log(i + ((K + 2.0) / 2.0));
            }
            for (int k = 0; k < K; k++)
            {
                if (clusters[k] != null)
                {
                    for (int j = 0; j < clusters[k].Length; j++)
                    {
                        sum2 += Math.Log(j + (3.0 / 2.0));
                    }
                }
            }
            icl = icl - sum1 + sum2;
        }
Beispiel #8
0
        // Implemented based on equations from the following publication:
        // Feature-Space Clustering for fMRI Meta-Analysis
        //by Cyril Goutte, Lars Kai Hansen, Matthew G. Liptrot and Egill Rostrup
        // Accessed via https://www.ncbi.nlm.nih.gov/pubmed/11376501

        /// <summary>
        ///
        /// </summary>
        /// <param name="centroids"></param>
        /// <param name="inputs"></param>
        /// <param name="labels"></param>
        /// <param name="distance"></param>
        /// <param name="aic"></param>
        /// <param name="bic"></param>
        /// <param name="icl"></param>
        public static void InformationCriteria(double[][] centroids, double[][] inputs, int[] labels, Distance distance, out double aic, out double bic, out double icl)
        {
            int N = inputs.Length;      // No. of input vectors
            int Q = inputs[0].Length;

            // group by cluster label
            int[]           legends  = null;
            ClusterItem[][] clusters = GroupByCluster(inputs, labels, out legends);
            int             K        = clusters.Length; // No. of clusters

            double likelihood = ComputeLikelihood(clusters, centroids, N, distance);

            aic = likelihood - ((K * Q) + 1);
            bic = likelihood - (((((K * Q) + 1) / 2.0) * Math.Log(N)));
            icl = bic;
            double sum1 = 0, sum2 = 0;

            for (int i = 0; i < N; i++)
            {
                sum1 += Math.Log(i + ((K + 2.0) / 2.0));
            }
            for (int k = 0; k < K; k++)
            {
                if (clusters[k] != null)
                {
                    for (int j = 0; j < clusters[k].Length; j++)
                    {
                        sum2 += Math.Log(j + (3.0 / 2.0));
                    }
                }
            }
            icl = icl - sum1 + sum2;
        }
Beispiel #9
0
 private static double AverageInterDissimilar(double[] currInput, ClusterItem[] cluster, Distance distance)
 {
     double[] d = new double[cluster.Length];
     for (int i = 0; i < d.Length; i++)
     {
         d[i] = distance.Similarity(currInput, cluster[i].InputVector);
     }
     return(d.Average());
 }
Beispiel #10
0
 private static double AverageDissimilar(double[] currInput, ClusterItem[] cluster, Distance distance)
 {
     double[] d = new double[cluster.Length];
     for (int i = 0; i < d.Length; i++)
     {
         d[i] = distance.Similarity(currInput, cluster[i].InputVector);
     }
     return(d.Sum() / (d.Length - 1)); // remove one element => itself
 }