Exemplo n.º 1
0
        /// <summary>
        /// Two-sided or one-sided test for whether statitics of two variables are equal in the true population, var1 and var2 are paired and dependent
        ///
        /// Hypotheses are:
        /// H_0: mu_var1 = mu_var2
        /// H_1: mu_var1 != mu_var2
        ///
        /// The hypotheses can be written as
        /// H_0: mu_var1 - mu_var2 = 0
        /// H_1: mu_var1 - mu_var2 != 0
        ///
        /// By Central Limt Theorem:
        /// sample_mean_var1 - sample_mean_var2 ~ N(0, SE), where null_value = 0 and SE is the standard error of the sampling distribution
        ///
        /// p-value = (sample_mean is at least ||null_value-point_estimate|| away from the null_value) | mu = null_value)
        /// </summary>
        /// <param name="sample_for_paired_data">a random sample consisting data paired together, var1 and var2, var1 and var2 are not independent</param>
        /// <param name="one_sided">True if the test is one-sided</param>
        /// <param name="significance_level"></param>
        /// <returns></returns>
        public bool RejectH0_PairedData(Tuple <double, double>[] sample_for_paired_data, out double pValue, double significance_level = 0.05, bool one_sided = false, bool useStudentT = false)
        {
            int sample_size = sample_for_paired_data.Length;

            double[] diff = new double[sample_size];
            for (int i = 0; i < sample_size; ++i)
            {
                diff[i] = sample_for_paired_data[i].Item1 - sample_for_paired_data[i].Item2;
            }
            double point_estimate = Mean.GetMean(diff);
            double null_value     = 0;
            double SE             = StandardError.GetStandardError(diff);
            double test_statistic = System.Math.Abs(point_estimate - null_value) / SE;

            double percentile = 0;

            if (sample_for_paired_data.Length < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution
            {
                percentile = StudentT.GetPercentile(test_statistic, sample_for_paired_data.Length - 1);
            }
            else
            {
                percentile = Gaussian.GetPercentile(test_statistic);
            }

            pValue = (1 - percentile) * (one_sided ? 1 : 2);
            return(pValue < significance_level);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Two-sided or one-sided test for a single statistic
        ///
        /// Given that:
        /// H_0 : mu = null_value
        /// H_A : mu != null_value
        ///
        /// By Central Limit Theorem:
        /// sample_mean ~ N(mu, SE)
        ///
        /// p-value = (sample_mean is at least ||null_value-point_estimate|| away from the null_value) | mu = null_value)
        /// if(p-value < significance_level) reject H_0
        /// </summary>
        /// <param name="point_estimate">point estimate of the population statistics (e.g., sample mean, sample median, etc.)</param>
        /// <param name="null_value"></param>
        /// <param name="SE">standard error of the population statistics</param>
        /// <param name="significance_level"></param>
        /// <param name="one_sided"></param>
        /// <returns></returns>
        public static bool RejectH0(double point_estimate, double null_value, double SE, int sampleSize, out double pValue, double significance_level = 0.05, bool one_sided = false, bool useStudentT = false)
        {
            double test_statistic = System.Math.Abs(point_estimate - null_value) / SE; //This assumes that H_0 is true, that is, the true population mean, mu = null_value

            double percentile = 0;

            if (sampleSize < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution
            {
                percentile = StudentT.GetPercentile(test_statistic, sampleSize - 1);
            }
            else
            {
                percentile = Gaussian.GetPercentile(test_statistic);
            }

            pValue = (1 - percentile) * (one_sided ? 1 : 2);
            return(pValue < significance_level);
        }
Exemplo n.º 3
0
        /// <summary>
        /// The p-values are P(observed or more extreme coefficients != 0 | true coefficient mean is 0)
        /// </summary>
        /// <param name="CoeffPointEstimates">point estimates of the predictor coefficients</param>
        /// <param name="CoeffSEs">standard errors of the predicator coefficients</param>
        /// <param name="n">number of training records</param>
        /// <param name="one_sided">whether the t distribution is one-sided</param>
        /// <returns>p-values</returns>
        public static double[] CalcPValues(double[] CoeffPointEstimates, double[] CoeffSEs, int n, bool one_sided = false)
        {
            double null_value = 0;
            int    k          = CoeffPointEstimates.Length;

            double[] pValues = new double[k];
            int      df      = n - 1;

            for (int i = 0; i < k; ++i)
            {
                double t      = (CoeffPointEstimates[i] - null_value) / CoeffSEs[i];
                double pValue = (1 - StudentT.GetPercentile(System.Math.Abs(t), df)) * (one_sided ? 1 : 2);

                pValues[i] = pValue;
            }

            return(pValues);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Two-sided or one-sided test for whether statitics of two variables are equal in the true population, var1 and var2 are independent
        ///
        /// Hypotheses are:
        /// H_0: mu_var1 = mu_var2
        /// H_1: mu_var1 != mu_var2
        ///
        /// The hypotheses can be written as
        /// H_0: mu_var1 - mu_var2 = 0
        /// H_1: mu_var1 - mu_var2 != 0
        ///
        /// By Central Limt Theorem:
        /// sample_mean_var1 - sample_mean_var2 ~ N(0, SE), where null_value = 0 and SE is the standard error of the sampling distribution
        ///
        /// p-value = (sample_mean is at least ||null_value-point_estimate|| away from the null_value) | mu = null_value)
        /// </summary>
        /// <param name="sample_for_var1">value sample for variable 1</param>
        /// <param name="sample_for_var2">value sample for variable 2</param>
        /// <param name="one_sided">True if the test is one-sided</param>
        /// <param name="significance_level"></param>
        /// <returns></returns>
        public bool RejectH0(double[] sample_for_var1, double[] sample_for_var2, out double pValue, double significance_level = 0.05, bool one_sided = false, bool useStudentT = false)
        {
            double pointEstimate  = Mean.GetMean(sample_for_var1) - Mean.GetMean(sample_for_var2);
            double null_value     = 0;
            double SE             = StandardError.GetStandardError(sample_for_var1, sample_for_var2);
            double test_statistic = System.Math.Abs(pointEstimate - null_value) / SE;

            double percentile = 0;

            if (sample_for_var1.Length < 30 || sample_for_var2.Length < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution
            {
                int df = System.Math.Min(sample_for_var1.Length - 1, sample_for_var2.Length - 1);
                percentile = StudentT.GetPercentile(test_statistic, df);
            }
            else
            {
                percentile = Gaussian.GetPercentile(test_statistic);
            }

            pValue = (1 - percentile) * (one_sided ? 1 : 2);
            return(pValue < significance_level);
        }
Exemplo n.º 5
0
 /// <summary>
 /// Return the p-value from the Student's distribution
 /// </summary>
 /// <param name="t"></param>
 /// <param name="dfE">degrees of freedom error obtained after ANOVA</param>
 /// <returns>p-value = P(observed or more extreme values | H_0 is true)</returns>
 private static double GetPValue(double t, int dfE)
 {
     return(StudentT.GetPercentile(System.Math.Abs(t), dfE));
 }