/// <summary> /// Return the confidence interval of the population mean at a given confidence level, given the point estimate sample mean are known from multiple groups / classes /// /// Note that each class should be a continuous variable. /// </summary> /// <param name="sampleMeans">point estimate sample means from different groups/classes</param> /// <param name="sampleStdDev">point estimate sample standard deviations from different groups / classes</param> /// <param name="sampleSizes">sample size from different classes</param> /// <param name="confidence_level">The given confidence level</param> /// <param name="useStudentT">whether student t should be used for test statistic</param> /// <returns>The confidence level of the population mean at the given confidence level</returns> public static double[] GetConfidenceInterval(double[] sampleMeans, double[] sampleStdDev, int[] sampleSizes, double confidence_level, bool useStudentT = false) { double[] standardErrors = new double[sampleMeans.Length]; for (int i = 0; i < sampleMeans.Length; ++i) { standardErrors[i] = StandardError.GetStandardError(sampleStdDev[i], sampleSizes[i]); } double standardError = StandardError.GetStandardErrorForWeightAverages(sampleSizes, standardErrors); double sampleMean = Mean.GetMeanForWeightedAverage(sampleMeans, sampleSizes); double p1 = (1 - confidence_level) / 2.0; double p2 = 1 - p1; bool shouldUseStudentT = useStudentT; if (!shouldUseStudentT) { for (int i = 0; i < sampleSizes.Length; ++i) { if (sampleSizes[i] < 30) { shouldUseStudentT = true; break; } } } double critical_value1 = 0; double critical_value2 = 0; if (shouldUseStudentT) { int smallestSampleSize = int.MaxValue; for (int i = 0; i < sampleSizes.Length; ++i) { if (sampleSizes[i] < smallestSampleSize) { smallestSampleSize = sampleSizes[i]; } } int df = smallestSampleSize - 1; critical_value1 = StudentT.GetQuantile(p1, df); critical_value2 = StudentT.GetQuantile(p2, df); } else { critical_value1 = Gaussian.GetQuantile(p1); critical_value2 = Gaussian.GetQuantile(p2); } double[] confidence_interval = new double[2]; confidence_interval[0] = sampleMean + critical_value1 * standardError; confidence_interval[1] = sampleMean + critical_value2 * standardError; return(confidence_interval); }
/// <summary> /// Calculate the confidence interval for the proportion of SUCCESS in the population at a given confidence interval, given the point estimate proprotions are known from multiple groups /// /// Note that this is only for categorical variable with two levels : SUCCESS, FAILURE /// </summary> /// <param name="proportions">The point estimate proportion of SUCESS obtained from multiple groups</param> /// <param name="sampleSizes">The sample size of each group</param> /// <param name="confidence_level">The given confidence interval</param> /// <returns>The confidence interval for the proportion of SUCCESS in the population at the given confidence level</returns> public static double[] GetConfidenceInterval(double[] proportions, int[] sampleSizes, double confidence_level, bool useSimulation = false, int simulationCount = 500) { double p1 = (1 - confidence_level) / 2; double p2 = 1 - p1; bool shouldUseSimulation = useSimulation; if (!shouldUseSimulation) { for (int i = 0; i < sampleSizes.Length; ++i) { int n_i = sampleSizes[i]; int expected_success_count = (int)(proportions[i] * n_i); int expected_failure_count = (int)((1 - proportions[i]) * n_i); if (expected_failure_count < 10 || expected_success_count < 10) { shouldUseSimulation = true; break; } } } if (shouldUseSimulation) { double sucess_count = 0; double total_count = 0; for (int i = 0; i < sampleSizes.Length; ++i) { int n_i = sampleSizes[i]; sucess_count += proportions[i] * n_i; total_count += n_i; } double p_hat = sucess_count / total_count; double[] sampleProportions = new double[simulationCount]; int simulationSampleSize = (int)System.Math.Max(10 / p_hat, 10 / (1 - p_hat)) * 2; for (int i = 0; i < simulationCount; ++i) { int successCount = 0; for (int j = 0; j < simulationSampleSize; ++j) { if (DistributionModel.GetUniform() <= p_hat) { successCount++; } } sampleProportions[i] = (double)successCount / simulationSampleSize; } double proportion_mu = Mean.GetMean(sampleProportions); double proportion_sigma = StdDev.GetStdDev(sampleProportions, proportion_mu); return(new double[] { proportion_mu + Gaussian.GetPercentile(p1) * proportion_sigma, proportion_mu + Gaussian.GetQuantile(p2) * proportion_sigma }); } else { double[] standardErrors = new double[proportions.Length]; for (int i = 0; i < proportions.Length; ++i) { standardErrors[i] = StandardError.GetStandardErrorForProportion(proportions[i], sampleSizes[i]); } double standardError = StandardError.GetStandardErrorForWeightAverages(sampleSizes, standardErrors); double sampleMean = Mean.GetMeanForWeightedAverage(proportions, sampleSizes); double critical_value1 = 0; double critical_value2 = 0; critical_value1 = Gaussian.GetQuantile(p1); critical_value2 = Gaussian.GetQuantile(p2); double[] confidence_interval = new double[2]; confidence_interval[0] = sampleMean + critical_value1 * standardError; confidence_interval[1] = sampleMean + critical_value2 * standardError; return(confidence_interval); } }