/// <summary> /// hypothesis testing for more than two classes using ANOVA /// /// Given that: /// H_0 : mu_1 = mu_2 = ... mu_k (where k is the number of classes) /// H_A : mu != null_value /// /// p-value = Pr(> f) is the probability of at least as large a ratio between the "between" and "within" group variablity if in fact the means of all groups are equal /// if(p-value < significance_level) reject H_0 /// </summary> /// <param name="groupedSample">The sample groupped based on the classes</param> /// <param name="pValue"></param> /// <param name="significance_level">p-value = Pr(> f) is the probability of at least as large a ratio between the "between" and "within" group variablity if in fact the means of all groups are equal</param> /// <returns>True if H_0 is rejected; False if H_0 is failed to be rejected</returns> public static bool RunANOVA(double[] totalSample, int[] grpCat, out ANOVA output, double significance_level = 0.05) { output = new ANOVA(); Dictionary <int, List <double> > groupedSample = new Dictionary <int, List <double> >(); for (int i = 0; i < totalSample.Length; ++i) { int grpId = grpCat[i]; double sampleVal = totalSample[i]; List <double> grp = null; if (groupedSample.ContainsKey(grpId)) { grp = groupedSample[grpId]; } else { grp = new List <double>(); groupedSample[grpId] = grp; } grp.Add(sampleVal); } double grand_mean; //Sum of squares measures the total variablity output.SST = GetSST(totalSample, out grand_mean); //sum of squares total output.SSG = GetSSG(groupedSample, grand_mean); //sum of squares group, which is known as explained variablity (explained by the group variable) output.SSE = output.SST - output.SSG; //sum of squares error, which is known as unexplained variability (unexplained by the group variable, due to other reasons) //Degrees of freedom output.dfT = totalSample.Length - 1; //degrees of freedom total output.dfG = groupedSample.Count - 1; //degrees of freedom group output.dfE = output.dfT - output.dfG; // degrees of freedom error //Mean squares measures variability between and within groups, calculated as the total variability (sum of squares) scaled by the associated degrees of freedom output.MSG = output.SSG / output.dfG; // mean squares group : between group variability output.MSE = output.SSE / output.dfE; // mean squares error : within group variablity output.Intercepts = GetIntercepts(GetMeanWithinGroup(groupedSample)); //f statistic: ratio of the between group variablity and within group variablity output.F = output.MSG / output.MSE; try { //p-value = Pr(> f) is the probability of at least as large a ratio between the "between" and "within" group variablity if in fact the means of all groups are equal output.pValue = 1 - FDistribution.GetPercentile(output.F, output.dfG, output.dfE); } catch { } return(output.RejectH0 = output.pValue < significance_level); }
/// <summary> /// Return a matrix of reject H_0, for which rejectH0Matrix[i][j] = true if the pairwise comparison provide enough evidence that group[i] and group[j] does not have the same mean /// /// Hypotheses for a pair of groups, group[i] and group[j]: /// H_0 : mu_i = mu_j /// H_A : mu_i != mu_j /// </summary> /// <param name="groupedSample">sampled groupped by classes</param> /// <param name="significance_level">significance level for the test</param> /// <returns>RejectH0 matrix: rejctH0Matrix[i][j] = true if the test provide enough evidence that group[i] and group[j] does not have the same mean</returns> public static bool[][] RejectH0(double[] sample, int[] grpCat, double significance_level = 0.05) { ANOVA anova_output; ANOVA.RunANOVA(sample, grpCat, out anova_output, significance_level); Dictionary <int, List <double> > groupedSample = new Dictionary <int, List <double> >(); for (int i = 0; i < sample.Length; ++i) { int grpId = grpCat[i]; double sampleVal = sample[i]; List <double> grp = null; if (groupedSample.ContainsKey(grpId)) { grp = groupedSample[grpId]; } else { grp = new List <double>(); groupedSample[grpId] = grp; } grp.Add(sampleVal); } int k = groupedSample.Count; // number of groups double alpha_adj = BonferroniCorrection(significance_level, k); bool[][] rejectH0Matrix = new bool[k][]; for (int i = 0; i < k; ++k) { rejectH0Matrix[i] = new bool[k]; } List <int> groupIdList = groupedSample.Keys.ToList(); for (int i = 0; i < k - 1; ++i) { List <double> group1 = groupedSample[groupIdList[i]]; for (int j = i + 1; j < k; ++j) { List <double> group2 = groupedSample[groupIdList[j]]; double pValue = PairwiseCompare(group1, group2, anova_output); bool reject_H0 = pValue < alpha_adj; rejectH0Matrix[i][j] = reject_H0; rejectH0Matrix[j][i] = reject_H0; } } return(rejectH0Matrix); }
/// <summary> /// Pairwise comparison of group1 and group2 /// </summary> /// <param name="group1">random sample from class 1</param> /// <param name="group2">random sample from class 2</param> /// <param name="anova">parameters obtained after ANOVA</param> /// <returns>p-value = P(observed or more extreme values | H_0 is true)</returns> public static double PairwiseCompare(List <double> group1, List <double> group2, ANOVA anova) { double x_bar1 = Mean.GetMean(group1); double x_bar2 = Mean.GetMean(group2); int n1 = group1.Count; int n2 = group2.Count; int null_value = 0; double t = GetTStatistic(x_bar1, x_bar2, n1, n2, null_value, anova.MSE); double pValue = GetPValue(t, anova.dfE); return(pValue); }