public static double WithinClusterVariance(ClusterItem[][] clusters, double[][] centroids, int numSamples, Distance distance) { double var = 0; for (int i = 0; i < clusters.Length; i++) { for (int j = 0; j < clusters[i].Length; j++) { var += distance.Similarity(clusters[i][j].InputVector, centroids[clusters[i][j].Label]); } } var = var / numSamples * 1.0; return(var); }
/// <summary> /// /// </summary> /// <param name="centroids"></param> /// <param name="clusters"></param> /// <param name="numOfInputs"></param> /// <param name="distance"></param> /// <returns></returns> public static double ICL(double[][] centroids, ClusterItem[][] clusters, int numOfInputs, Distance distance) { int Q = clusters[0][0].InputVector.Length; // Number of parameters // group by cluster label int K = clusters.Length; // No. of clusters double ICL = ComputeLikelihood(clusters, centroids, numOfInputs, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(numOfInputs))); double sum1 = 0, sum2 = 0; for (int i = 0; i < numOfInputs; i++) { sum1 += Math.Log(i + ((K + 2.0) / 2.0)); } for (int k = 0; k < K; k++) { if (clusters[k] != null) { for (int j = 0; j < clusters[k].Length; j++) { sum2 += Math.Log(j + (3.0 / 2.0)); } } } return(ICL - sum1 + sum2); }
private static double ComputeLikelihood(ClusterItem[][] clusters, double[][] centroids, int numSamples, Distance distance) { int N = numSamples; // No. of input vectors // Calculate Within Cluster Variance double variance = WithinClusterVariance(clusters, centroids, N, distance); double[] diff = null; double likelihood = 0; for (int i = 0; i < clusters.Length; i++) { for (int j = 0; j < clusters[i].Length; j++) { diff = VectorSubtraction(clusters[i][j].InputVector, centroids[clusters[i][j].Label]); likelihood += Math.Log(Math.Exp(-DotProduct(diff, diff) / (2.0 * variance)) / Math.Sqrt(2.0 * Math.PI * variance)); } } return(likelihood); }
/// <summary> /// /// </summary> /// <param name="centroids"></param> /// <param name="clusters"></param> /// <param name="numOfInputs"></param> /// <param name="distance"></param> /// <returns></returns> public static double BIC(double[][] centroids, ClusterItem[][] clusters, int numOfInputs, Distance distance) { int Q = clusters[0][0].InputVector.Length; // Number of parameters // group by cluster label int K = clusters.Length; // No. of clusters return(ComputeLikelihood(clusters, centroids, numOfInputs, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(numOfInputs)))); }
/// <summary> /// /// </summary> /// <param name="centroids"></param> /// <param name="inputs"></param> /// <param name="labels"></param> /// <param name="distance"></param> /// <returns></returns> public static double ICL(double[][] centroids, double[][] inputs, int[] labels, Distance distance) { int N = inputs.Length; // No. of input vectors int Q = inputs[0].Length; // group by cluster label int[] legends = null; ClusterItem[][] clusters = GroupByCluster(inputs, labels, out legends); int K = clusters.Length; // No. of clusters double ICL = ComputeLikelihood(clusters, centroids, N, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(N))); double sum1 = 0, sum2 = 0; for (int i = 0; i < N; i++) { sum1 += Math.Log(i + ((K + 2.0) / 2.0)); } for (int k = 0; k < K; k++) { if (clusters[k] != null) { for (int j = 0; j < clusters[k].Length; j++) { sum2 += Math.Log(j + (3.0 / 2.0)); } } } return(ICL - sum1 + sum2); }
/// <summary> /// /// </summary> /// <param name="centroids"></param> /// <param name="inputs"></param> /// <param name="labels"></param> /// <param name="distance"></param> /// <returns></returns> public static double BIC(double[][] centroids, double[][] inputs, int[] labels, Distance distance) { int N = inputs.Length; // No. of input vectors int Q = inputs[0].Length; // group by cluster label int[] legends = null; ClusterItem[][] clusters = GroupByCluster(inputs, labels, out legends); int K = clusters.Length; // No. of clusters return(ComputeLikelihood(clusters, centroids, N, distance) - (((((K * Q) + 1) / 2.0) * Math.Log(N)))); }
/// <summary> /// /// </summary> /// <param name="centroids"></param> /// <param name="clusters"></param> /// <param name="numOfInputs"></param> /// <param name="distance"></param> /// <param name="aic"></param> /// <param name="bic"></param> /// <param name="icl"></param> public static void InformationCriteria(double[][] centroids, ClusterItem[][] clusters, int numOfInputs, Distance distance, out double aic, out double bic, out double icl) { int Q = clusters[0][0].InputVector.Length; // Number of parameters // group by cluster label int K = clusters.Length; // No. of clusters double llk = ComputeLikelihood(clusters, centroids, numOfInputs, distance); aic = llk - ((K * Q) + 1); bic = llk - (((((K * Q) + 1) / 2.0) * Math.Log(numOfInputs))); icl = bic; double sum1 = 0, sum2 = 0; for (int i = 0; i < numOfInputs; i++) { sum1 += Math.Log(i + ((K + 2.0) / 2.0)); } for (int k = 0; k < K; k++) { if (clusters[k] != null) { for (int j = 0; j < clusters[k].Length; j++) { sum2 += Math.Log(j + (3.0 / 2.0)); } } } icl = icl - sum1 + sum2; }
// Implemented based on equations from the following publication: // Feature-Space Clustering for fMRI Meta-Analysis //by Cyril Goutte, Lars Kai Hansen, Matthew G. Liptrot and Egill Rostrup // Accessed via https://www.ncbi.nlm.nih.gov/pubmed/11376501 /// <summary> /// /// </summary> /// <param name="centroids"></param> /// <param name="inputs"></param> /// <param name="labels"></param> /// <param name="distance"></param> /// <param name="aic"></param> /// <param name="bic"></param> /// <param name="icl"></param> public static void InformationCriteria(double[][] centroids, double[][] inputs, int[] labels, Distance distance, out double aic, out double bic, out double icl) { int N = inputs.Length; // No. of input vectors int Q = inputs[0].Length; // group by cluster label int[] legends = null; ClusterItem[][] clusters = GroupByCluster(inputs, labels, out legends); int K = clusters.Length; // No. of clusters double likelihood = ComputeLikelihood(clusters, centroids, N, distance); aic = likelihood - ((K * Q) + 1); bic = likelihood - (((((K * Q) + 1) / 2.0) * Math.Log(N))); icl = bic; double sum1 = 0, sum2 = 0; for (int i = 0; i < N; i++) { sum1 += Math.Log(i + ((K + 2.0) / 2.0)); } for (int k = 0; k < K; k++) { if (clusters[k] != null) { for (int j = 0; j < clusters[k].Length; j++) { sum2 += Math.Log(j + (3.0 / 2.0)); } } } icl = icl - sum1 + sum2; }
private static double AverageInterDissimilar(double[] currInput, ClusterItem[] cluster, Distance distance) { double[] d = new double[cluster.Length]; for (int i = 0; i < d.Length; i++) { d[i] = distance.Similarity(currInput, cluster[i].InputVector); } return(d.Average()); }
private static double AverageDissimilar(double[] currInput, ClusterItem[] cluster, Distance distance) { double[] d = new double[cluster.Length]; for (int i = 0; i < d.Length; i++) { d[i] = distance.Similarity(currInput, cluster[i].InputVector); } return(d.Sum() / (d.Length - 1)); // remove one element => itself }