/// <summary> /// Return the confidence interval of the population mean (measured on a continuous random variable) given a random sample /// /// Note that this is for a variable whose values are continuous /// </summary> /// <param name="sampleMean">point estimate sample mean given by the random sample</param> /// <param name="sampleStdDev">point estimate sample standard deviation given by the random sample</param> /// <param name="sampleSize">size of the random sample</param> /// <param name="confidence_level"></param> /// <returns></returns> public static double[] GetConfidenceInterval(double sampleMean, double sampleStdDev, int sampleSize, double confidence_level, bool useStudentT = false) { double standard_error = StandardError.GetStandardError(sampleStdDev, sampleSize); double[] confidence_interval = new double[2]; double p1 = (1 - confidence_level) / 2.0; double p2 = 1 - p1; double critical_value1 = 0; double critical_value2 = 0; if (sampleSize < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution { int df = sampleSize - 1; critical_value1 = StudentT.GetQuantile(p1, df); critical_value2 = StudentT.GetQuantile(p2, df); } else { critical_value1 = Gaussian.GetQuantile(p1); critical_value2 = Gaussian.GetQuantile(p2); } confidence_interval[0] = sampleMean + critical_value1 * standard_error; confidence_interval[1] = sampleMean + critical_value2 * standard_error; return(confidence_interval); }
/// <summary> /// Get the confidence interval for the difference between two classes /// /// Note that this is for variables with continuous values /// </summary> /// <param name="sample_for_var1">random sample drawn for class 1</param> /// <param name="sample_for_var2">random sample drawn for class 2</param> /// <param name="confidence_level">confidencen level</param> /// <returns>The confidence interval for the difference between two classes in the population given the confidence level</returns> public static double[] GetConfidenceIntervalForDiff(double[] sample_for_var1, double[] sample_for_var2, double confidence_level, bool useStudentT = false, double correlation = 0) { double point_estimate, SE; LinearCombination.Diff(sample_for_var1, sample_for_var2, correlation, out point_estimate, out SE); double p1 = (1 - confidence_level) / 2; double p2 = 1 - p1; double critical_value1 = 0; double critical_value2 = 0; if (sample_for_var1.Length < 30 || sample_for_var2.Length < 30 || useStudentT) //if sample size is smaller than 30, then CLT for population statistics such as sample mean no longer holds and Student's t distribution should be used in place of the normal distribution { int df = System.Math.Min(sample_for_var1.Length - 1, sample_for_var2.Length - 1); critical_value1 = StudentT.GetQuantile(p1, df); critical_value2 = StudentT.GetQuantile(p2, df); } else { critical_value1 = Gaussian.GetQuantile(p1); critical_value2 = Gaussian.GetQuantile(p2); } return(new double[] { point_estimate + critical_value1 * SE, point_estimate + critical_value2 * SE }); }
/// <summary> /// Return the confidence interval of the population mean at a given confidence level, given the point estimate sample mean are known from multiple groups / classes /// /// Note that each class should be a continuous variable. /// </summary> /// <param name="sampleMeans">point estimate sample means from different groups/classes</param> /// <param name="sampleStdDev">point estimate sample standard deviations from different groups / classes</param> /// <param name="sampleSizes">sample size from different classes</param> /// <param name="confidence_level">The given confidence level</param> /// <param name="useStudentT">whether student t should be used for test statistic</param> /// <returns>The confidence level of the population mean at the given confidence level</returns> public static double[] GetConfidenceInterval(double[] sampleMeans, double[] sampleStdDev, int[] sampleSizes, double confidence_level, bool useStudentT = false) { double[] standardErrors = new double[sampleMeans.Length]; for (int i = 0; i < sampleMeans.Length; ++i) { standardErrors[i] = StandardError.GetStandardError(sampleStdDev[i], sampleSizes[i]); } double standardError = StandardError.GetStandardErrorForWeightAverages(sampleSizes, standardErrors); double sampleMean = Mean.GetMeanForWeightedAverage(sampleMeans, sampleSizes); double p1 = (1 - confidence_level) / 2.0; double p2 = 1 - p1; bool shouldUseStudentT = useStudentT; if (!shouldUseStudentT) { for (int i = 0; i < sampleSizes.Length; ++i) { if (sampleSizes[i] < 30) { shouldUseStudentT = true; break; } } } double critical_value1 = 0; double critical_value2 = 0; if (shouldUseStudentT) { int smallestSampleSize = int.MaxValue; for (int i = 0; i < sampleSizes.Length; ++i) { if (sampleSizes[i] < smallestSampleSize) { smallestSampleSize = sampleSizes[i]; } } int df = smallestSampleSize - 1; critical_value1 = StudentT.GetQuantile(p1, df); critical_value2 = StudentT.GetQuantile(p2, df); } else { critical_value1 = Gaussian.GetQuantile(p1); critical_value2 = Gaussian.GetQuantile(p2); } double[] confidence_interval = new double[2]; confidence_interval[0] = sampleMean + critical_value1 * standardError; confidence_interval[1] = sampleMean + critical_value2 * standardError; return(confidence_interval); }