예제 #1
0
        /// <summary>
        /// hypothesis testing for more than two classes using ANOVA
        ///
        /// Given that:
        /// H_0 : mu_1 = mu_2 = ... mu_k (where k is the number of classes)
        /// H_A : mu != null_value
        ///
        /// p-value = Pr(> f) is the probability of at least as large a ratio between the "between" and "within" group variablity if in fact the means of all groups are equal
        /// if(p-value < significance_level) reject H_0
        /// </summary>
        /// <param name="groupedSample">The sample groupped based on the classes</param>
        /// <param name="pValue"></param>
        /// <param name="significance_level">p-value = Pr(> f) is the probability of at least as large a ratio between the "between" and "within" group variablity if in fact the means of all groups are equal</param>
        /// <returns>True if H_0 is rejected; False if H_0 is failed to be rejected</returns>
        public static bool RunANOVA(double[] totalSample, int[] grpCat, out ANOVA output, double significance_level = 0.05)
        {
            output = new ANOVA();

            Dictionary <int, List <double> > groupedSample = new Dictionary <int, List <double> >();

            for (int i = 0; i < totalSample.Length; ++i)
            {
                int           grpId     = grpCat[i];
                double        sampleVal = totalSample[i];
                List <double> grp       = null;
                if (groupedSample.ContainsKey(grpId))
                {
                    grp = groupedSample[grpId];
                }
                else
                {
                    grp = new List <double>();
                    groupedSample[grpId] = grp;
                }
                grp.Add(sampleVal);
            }
            double grand_mean;

            //Sum of squares measures the total variablity
            output.SST = GetSST(totalSample, out grand_mean); //sum of squares total
            output.SSG = GetSSG(groupedSample, grand_mean);   //sum of squares group, which is known as explained variablity (explained by the group variable)
            output.SSE = output.SST - output.SSG;             //sum of squares error, which is known as unexplained variability (unexplained by the group variable, due to other reasons)

            //Degrees of freedom
            output.dfT = totalSample.Length - 1;  //degrees of freedom total
            output.dfG = groupedSample.Count - 1; //degrees of freedom group
            output.dfE = output.dfT - output.dfG; // degrees of freedom error

            //Mean squares measures variability between and within groups, calculated as the total variability (sum of squares) scaled by the associated degrees of freedom
            output.MSG = output.SSG / output.dfG; // mean squares group : between group variability
            output.MSE = output.SSE / output.dfE; // mean squares error : within group variablity

            output.Intercepts = GetIntercepts(GetMeanWithinGroup(groupedSample));

            //f statistic: ratio of the between group variablity and within group variablity
            output.F = output.MSG / output.MSE;

            try
            {
                //p-value = Pr(> f) is the probability of at least as large a ratio between the "between" and "within" group variablity if in fact the means of all groups are equal
                output.pValue = 1 - FDistribution.GetPercentile(output.F, output.dfG, output.dfE);
            }
            catch
            {
            }

            return(output.RejectH0 = output.pValue < significance_level);
        }
        /// <summary>
        /// Return a matrix of reject H_0, for which rejectH0Matrix[i][j] = true if the pairwise comparison provide enough evidence that group[i] and group[j] does not have the same mean
        ///
        /// Hypotheses for a pair of groups, group[i] and group[j]:
        /// H_0 : mu_i = mu_j
        /// H_A : mu_i != mu_j
        /// </summary>
        /// <param name="groupedSample">sampled groupped by classes</param>
        /// <param name="significance_level">significance level for the test</param>
        /// <returns>RejectH0 matrix: rejctH0Matrix[i][j] = true if the test provide enough evidence that group[i] and group[j] does not have the same mean</returns>
        public static bool[][] RejectH0(double[] sample, int[] grpCat, double significance_level = 0.05)
        {
            ANOVA anova_output;

            ANOVA.RunANOVA(sample, grpCat, out anova_output, significance_level);

            Dictionary <int, List <double> > groupedSample = new Dictionary <int, List <double> >();

            for (int i = 0; i < sample.Length; ++i)
            {
                int           grpId     = grpCat[i];
                double        sampleVal = sample[i];
                List <double> grp       = null;
                if (groupedSample.ContainsKey(grpId))
                {
                    grp = groupedSample[grpId];
                }
                else
                {
                    grp = new List <double>();
                    groupedSample[grpId] = grp;
                }
                grp.Add(sampleVal);
            }

            int    k         = groupedSample.Count; // number of groups
            double alpha_adj = BonferroniCorrection(significance_level, k);

            bool[][] rejectH0Matrix = new bool[k][];
            for (int i = 0; i < k; ++k)
            {
                rejectH0Matrix[i] = new bool[k];
            }

            List <int> groupIdList = groupedSample.Keys.ToList();

            for (int i = 0; i < k - 1; ++i)
            {
                List <double> group1 = groupedSample[groupIdList[i]];
                for (int j = i + 1; j < k; ++j)
                {
                    List <double> group2    = groupedSample[groupIdList[j]];
                    double        pValue    = PairwiseCompare(group1, group2, anova_output);
                    bool          reject_H0 = pValue < alpha_adj;
                    rejectH0Matrix[i][j] = reject_H0;
                    rejectH0Matrix[j][i] = reject_H0;
                }
            }
            return(rejectH0Matrix);
        }
        /// <summary>
        /// Pairwise comparison of group1 and group2
        /// </summary>
        /// <param name="group1">random sample from class 1</param>
        /// <param name="group2">random sample from class 2</param>
        /// <param name="anova">parameters obtained after ANOVA</param>
        /// <returns>p-value = P(observed or more extreme values | H_0 is true)</returns>
        public static double PairwiseCompare(List <double> group1, List <double> group2, ANOVA anova)
        {
            double x_bar1 = Mean.GetMean(group1);
            double x_bar2 = Mean.GetMean(group2);
            int    n1     = group1.Count;
            int    n2     = group2.Count;

            int    null_value = 0;
            double t          = GetTStatistic(x_bar1, x_bar2, n1, n2, null_value, anova.MSE);
            double pValue     = GetPValue(t, anova.dfE);

            return(pValue);
        }