/// <summary> /// Two-sided or one-sided test for whether statitics of two variables are equal in the true population, var1 and var2 are paired and dependent /// /// Hypotheses are: /// H_0: mu_var1 = mu_var2 /// H_1: mu_var1 != mu_var2 /// /// The hypotheses can be written as /// H_0: mu_var1 - mu_var2 = 0 /// H_1: mu_var1 - mu_var2 != 0 /// /// By Central Limt Theorem: /// sample_mean_var1 - sample_mean_var2 ~ N(0, SE), where null_value = 0 and SE is the standard error of the sampling distribution /// /// p-value = (sample_mean is at least ||null_value-point_estimate|| away from the null_value) | mu = null_value) /// </summary> /// <param name="sample_for_paired_data">a random sample consisting data paired together, var1 and var2, var1 and var2 are not independent</param> /// <param name="one_sided">True if the test is one-sided</param> /// <param name="significance_level"></param> /// <returns></returns> public bool RejectH0_PairedData(Tuple <double, double>[] sample_for_paired_data, out double pValue, double significance_level = 0.05, bool one_sided = false, bool useStudentT = false) { int sample_size = sample_for_paired_data.Length; double[] diff = new double[sample_size]; for (int i = 0; i < sample_size; ++i) { diff[i] = sample_for_paired_data[i].Item1 - sample_for_paired_data[i].Item2; } double point_estimate = Mean.GetMean(diff); double null_value = 0; double SE = StandardError.GetStandardError(diff); double test_statistic = System.Math.Abs(point_estimate - null_value) / SE; double percentile = 0; if (sample_for_paired_data.Length < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution { percentile = StudentT.GetPercentile(test_statistic, sample_for_paired_data.Length - 1); } else { percentile = Gaussian.GetPercentile(test_statistic); } pValue = (1 - percentile) * (one_sided ? 1 : 2); return(pValue < significance_level); }
/// <summary> /// Two-sided or one-sided test for a single statistic /// /// Given that: /// H_0 : mu = null_value /// H_A : mu != null_value /// /// By Central Limit Theorem: /// sample_mean ~ N(mu, SE) /// /// p-value = (sample_mean is at least ||null_value-point_estimate|| away from the null_value) | mu = null_value) /// if(p-value < significance_level) reject H_0 /// </summary> /// <param name="point_estimate">point estimate of the population statistics (e.g., sample mean, sample median, etc.)</param> /// <param name="null_value"></param> /// <param name="SE">standard error of the population statistics</param> /// <param name="significance_level"></param> /// <param name="one_sided"></param> /// <returns></returns> public static bool RejectH0(double point_estimate, double null_value, double SE, int sampleSize, out double pValue, double significance_level = 0.05, bool one_sided = false, bool useStudentT = false) { double test_statistic = System.Math.Abs(point_estimate - null_value) / SE; //This assumes that H_0 is true, that is, the true population mean, mu = null_value double percentile = 0; if (sampleSize < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution { percentile = StudentT.GetPercentile(test_statistic, sampleSize - 1); } else { percentile = Gaussian.GetPercentile(test_statistic); } pValue = (1 - percentile) * (one_sided ? 1 : 2); return(pValue < significance_level); }
/// <summary> /// The p-values are P(observed or more extreme coefficients != 0 | true coefficient mean is 0) /// </summary> /// <param name="CoeffPointEstimates">point estimates of the predictor coefficients</param> /// <param name="CoeffSEs">standard errors of the predicator coefficients</param> /// <param name="n">number of training records</param> /// <param name="one_sided">whether the t distribution is one-sided</param> /// <returns>p-values</returns> public static double[] CalcPValues(double[] CoeffPointEstimates, double[] CoeffSEs, int n, bool one_sided = false) { double null_value = 0; int k = CoeffPointEstimates.Length; double[] pValues = new double[k]; int df = n - 1; for (int i = 0; i < k; ++i) { double t = (CoeffPointEstimates[i] - null_value) / CoeffSEs[i]; double pValue = (1 - StudentT.GetPercentile(System.Math.Abs(t), df)) * (one_sided ? 1 : 2); pValues[i] = pValue; } return(pValues); }
/// <summary> /// Two-sided or one-sided test for whether statitics of two variables are equal in the true population, var1 and var2 are independent /// /// Hypotheses are: /// H_0: mu_var1 = mu_var2 /// H_1: mu_var1 != mu_var2 /// /// The hypotheses can be written as /// H_0: mu_var1 - mu_var2 = 0 /// H_1: mu_var1 - mu_var2 != 0 /// /// By Central Limt Theorem: /// sample_mean_var1 - sample_mean_var2 ~ N(0, SE), where null_value = 0 and SE is the standard error of the sampling distribution /// /// p-value = (sample_mean is at least ||null_value-point_estimate|| away from the null_value) | mu = null_value) /// </summary> /// <param name="sample_for_var1">value sample for variable 1</param> /// <param name="sample_for_var2">value sample for variable 2</param> /// <param name="one_sided">True if the test is one-sided</param> /// <param name="significance_level"></param> /// <returns></returns> public bool RejectH0(double[] sample_for_var1, double[] sample_for_var2, out double pValue, double significance_level = 0.05, bool one_sided = false, bool useStudentT = false) { double pointEstimate = Mean.GetMean(sample_for_var1) - Mean.GetMean(sample_for_var2); double null_value = 0; double SE = StandardError.GetStandardError(sample_for_var1, sample_for_var2); double test_statistic = System.Math.Abs(pointEstimate - null_value) / SE; double percentile = 0; if (sample_for_var1.Length < 30 || sample_for_var2.Length < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution { int df = System.Math.Min(sample_for_var1.Length - 1, sample_for_var2.Length - 1); percentile = StudentT.GetPercentile(test_statistic, df); } else { percentile = Gaussian.GetPercentile(test_statistic); } pValue = (1 - percentile) * (one_sided ? 1 : 2); return(pValue < significance_level); }
/// <summary> /// Return the p-value from the Student's distribution /// </summary> /// <param name="t"></param> /// <param name="dfE">degrees of freedom error obtained after ANOVA</param> /// <returns>p-value = P(observed or more extreme values | H_0 is true)</returns> private static double GetPValue(double t, int dfE) { return(StudentT.GetPercentile(System.Math.Abs(t), dfE)); }