public void TTestConstructorTest2() { // Example from Larson & Farber, Elementary Statistics: Picturing the world. /* * A random sample of 17 police officers in Brownsville has a mean annual * income of $35,800 and a standard deviation of $7,800. In Greensville, * a random sample of 18 police officers has a mean annual income of $35,100 * and a standard deviation of $7,375. Test the claim at a = 0.01 that the * mean annual incomes in the two cities are not the same. Assume the * population variances are equal. */ double mean1 = 35800; double stdDev1 = 7800; double var1 = stdDev1 * stdDev1; int n1 = 17; double mean2 = 35100; double stdDev2 = 7375; double var2 = stdDev2 * stdDev2; int n2 = 18; TwoSampleTTest test = new TwoSampleTTest(mean1, var1, n1, mean2, var2, n2, assumeEqualVariances: true, alternate: TwoSampleHypothesis.ValuesAreDifferent); Assert.AreEqual(33, test.DegreesOfFreedom); Assert.AreEqual(2564.92, test.StandardError, 1e-3); Assert.AreEqual(0.273, test.Statistic, 1e-3); test.Size = 0.01; Assert.IsFalse(test.Significant); }
public void TTestPowerAnalysisConstructorTest4() { // Example from http://www.ats.ucla.edu/stat/stata/dae/t_test_power2.htm, // tested against G*Power results double meanA = 0; double meanB = 10; double sdA = 15; double sdB = 17; double varA = sdA * sdA; double varB = sdB * sdB; { var priori = TwoSampleTTestPowerAnalysis.GetSampleSize( 10, varA, varB, 1.0, 0.8, 0.05 ); Assert.AreEqual(41, Math.Truncate(priori.Samples1)); Assert.AreEqual(41, Math.Truncate(priori.Samples2)); } { TwoSampleTTest test = new TwoSampleTTest( meanA, varA, 30, meanB, varB, 30); Assert.AreEqual(0.661222, test.Analysis.Power, 1e-6); } { TwoSampleTTest test = new TwoSampleTTest( meanA, varA, 20, meanB, varB, 40); Assert.AreEqual(0.6102516, test.Analysis.Power, 1e-6); } { var priori = TwoSampleTTestPowerAnalysis.GetSampleSize( 10, varA, varB, 1.0, 0.8, 0.07 ); Assert.AreEqual(37, Math.Truncate(priori.Samples1)); Assert.AreEqual(37, Math.Truncate(priori.Samples2)); } }
public void SampleSizeTest1() { // Example from http://udel.edu/~mcdonald/statttest.html // Computed using R's function power.t.test double mean1 = 3.2; double mean2 = 0; double var1 = System.Math.Pow(4.3, 2); double var2 = System.Math.Pow(4.3, 2); double alpha = 0.05; double power = 0.80; TwoSampleTTest test = new TwoSampleTTest( mean1: mean1, var1: var1, samples1: 10, mean2: mean2, var2: var2, samples2: 10, assumeEqualVariances: true, alternate: TwoSampleHypothesis.ValuesAreDifferent); var target = (TwoSampleTTestPowerAnalysis)test.Analysis.Clone(); target.Power = power; target.Size = alpha; target.ComputeSamples(1); double actual = target.Samples1; double expected = 29.33682; Assert.AreEqual(expected, actual, 1e-5); }
protected override void EndProcessing() { var hypo = TestingHelper.GetTwoSampleHypothesis(Alternate); TwoSampleTTest test; if (ParameterSetName == "Pipeline") { var samples = _data.GroupBy(CategoryName).ToDoubleJaggedArrayOf(ValueName); if (samples.Length != 2) { WriteError(new ErrorRecord(new RuntimeException("The number of categories is not two"), "", ErrorCategory.InvalidArgument, null)); return; } test = new TwoSampleTTest(samples[0], samples[1], AssumeEqualVariances, HypothesizedDifference, hypo); } else { test = new TwoSampleTTest(Sample1, Sample2, AssumeEqualVariances, HypothesizedDifference, hypo); } test.Size = Size; WriteObject(test); }
/// <summary> /// Creates a new <see cref="TTestPowerAnalysis"/>. /// </summary> /// /// <param name="test">The test to create the analysis for.</param> /// public TTestPowerAnalysis(TwoSampleTTest test) : base(test.Tail) { this.Power = test.Analysis.Power; this.Size = test.Analysis.Size; this.Effect = test.Analysis.Effect; this.Samples = test.Analysis.Samples; }
public GeneralHypothesisTest(string firstLabel, DescriptiveResult sample1, string secondLabel, DescriptiveResult sample2, double hypothesizedDifference = 0, TwoSampleHypothesis alternate = TwoSampleHypothesis.ValuesAreDifferent) { int samples1 = sample1.Count; int samples2 = sample2.Count; HypothesizedDifference = Math.Abs(hypothesizedDifference); var s1 = new SampleInfo { Name = firstLabel, Count = sample1.Count, Mean = sample1.Mean, StdDev = sample1.StdDev }; var s2 = new SampleInfo { Name = secondLabel, Count = sample2.Count, Mean = sample2.Mean, StdDev = sample2.StdDev }; Result = new ComparisonResult { FirstSample = s1, SecondSample = s2, Hypothesis = alternate, HypothesizedDifference = HypothesizedDifference }; if (samples1 < 30 || samples2 < 30) { _tTest = new TwoSampleTTest(sample1, sample2, false, HypothesizedDifference, alternate); Result.Confidence = _tTest.Confidence; Result.ObservedDifference = _tTest.ObservedDifference; Result.Significant = _tTest.Significant; Result.Size = _tTest.Size; Result.StandardError = _tTest.StandardError; } else { _zTest = new TwoSampleZTest(sample1, sample2, HypothesizedDifference, alternate); Result.Confidence = _zTest.Confidence; Result.ObservedDifference = _zTest.ObservedDifference; Result.Significant = _zTest.Significant; Result.Size = _zTest.Size; Result.StandardError = _zTest.StandardError; } }
public void TTestPowerAnalysisConstructorTest4() { // Example from http://www.ats.ucla.edu/stat/stata/dae/t_test_power2.htm, // tested against G*Power results double meanA = 0; double meanB = 10; double sdA = 15; double sdB = 17; double varA = sdA * sdA; double varB = sdB * sdB; { var priori = TwoSampleTTestPowerAnalysis.GetSampleSize(10, variance1: varA, variance2: varB, power: 0.8); Assert.AreEqual(41, Math.Truncate(priori.Samples1)); Assert.AreEqual(41, Math.Truncate(priori.Samples2)); } { TwoSampleTTest test = new TwoSampleTTest( meanA, varA, 30, meanB, varB, 30); Assert.AreEqual(0.661222, test.Analysis.Power, 1e-6); } { TwoSampleTTest test = new TwoSampleTTest( meanA, varA, 20, meanB, varB, 40); Assert.AreEqual(0.6102516, test.Analysis.Power, 1e-6); } { var priori = TwoSampleTTestPowerAnalysis.GetSampleSize(10, variance1: varA, variance2: varB, power: 0.8, alpha: 0.07); Assert.AreEqual(37, Math.Truncate(priori.Samples1)); Assert.AreEqual(37, Math.Truncate(priori.Samples2)); } }
public void TTestConstructorTest() { // Example from http://en.wikipedia.org/wiki/Student%27s_t-test#Two-sample_T.C2.A02_test double[] sample1 = { 30.02, 29.99, 30.11, 29.97, 30.01, 29.99 }; double[] sample2 = { 29.89, 29.93, 29.72, 29.98, 30.02, 29.98 }; TwoSampleTTest test; // Unequal variances test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: false); Assert.AreEqual(0.0485, test.StandardError, 1e-4); Assert.AreEqual(0.095, test.ObservedDifference, 1e-10); Assert.AreEqual(7.03, test.DegreesOfFreedom, 1e-3); Assert.AreEqual(0.091, test.PValue, 0.001); test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: false, alternate: TwoSampleHypothesis.FirstValueIsGreaterThanSecond); Assert.AreEqual(0.0485, test.StandardError, 1e-4); Assert.AreEqual(0.095, test.ObservedDifference, 1e-10); Assert.AreEqual(7.03, test.DegreesOfFreedom, 1e-3); Assert.AreEqual(0.045, test.PValue, 0.001); // Equal variances test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: true); Assert.AreEqual(0.0485, test.StandardError, 1e-4); Assert.AreEqual(0.095, test.ObservedDifference, 1e-10); Assert.AreEqual(10, test.DegreesOfFreedom); Assert.AreEqual(0.078, test.PValue, 0.001); test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: true, alternate: TwoSampleHypothesis.FirstValueIsGreaterThanSecond); Assert.AreEqual(0.0485, test.StandardError, 1e-4); Assert.AreEqual(0.095, test.ObservedDifference, 1e-10); Assert.AreEqual(10, test.DegreesOfFreedom); Assert.AreEqual(0.038, test.PValue, 0.0015); }
public TwoSampleHypothesisTestResult TestHypothesis(IEnumerable <double> sample1, IEnumerable <double> sample2, double hypothesizedDifference, TwoSampleHypothesis alternateHypothesis, double alpha) { var test = new TwoSampleTTest( sample1.ToArray(), sample2.ToArray(), // TODO: P1 - Is false okay here? Will it get the variances from the inputs? Or should we use true? assumeEqualVariances: false, hypothesizedDifference: hypothesizedDifference, alternate: alternateHypothesis); test.Size = alpha; return(new TwoSampleHypothesisTestResult( test.Significant, test.GetConfidenceInterval(1 - alpha), test.ObservedDifference)); }
/// <summary> /// Calculates the N Value for each protein based on its intensity values in two conditions. /// the permutationTesting boolean is used to determine if we are caluclating permuted or observed N Values. /// /// The N value is a metric which combines the pValue and the magnitude change (through the LogFoldChange) of /// the protein amongst the two conditions in order to enable significance classifcation of the protein based /// on the same principle of Volcano Plots (Volcano plots allow to identify signifcantly different proteins in large data /// sets comprising of replicate data by plotting the signifance statistic on the y axis, which is the pValue here, /// and the magnitude chanbge on the x axis, which is the logFoldChange here) /// </summary> public List <double> GetNValueUsingTTest(List <double> proteinFirstConditionIntensityValues, List <double> proteinSecondConditionIntensityValues, double sOValue, bool permutationTesting) { double mean1 = proteinFirstConditionIntensityValues.Average(); double mean2 = proteinSecondConditionIntensityValues.Average(); double stdDev1 = CalculateProteinIntensityValuesStandardDeviation(proteinFirstConditionIntensityValues, mean1); double stdDev2 = CalculateProteinIntensityValuesStandardDeviation(proteinSecondConditionIntensityValues, mean2); double variance1 = stdDev1 * stdDev1; double variance2 = stdDev2 * stdDev2; // the f-test is used to determine whether the variance of the two intensityvalue popluations are equal // the significant variable gets whether the null hypothesis can be rejected bool significant = new FTest(variance1, variance2, proteinFirstConditionIntensityValues.Count - 1, proteinSecondConditionIntensityValues.Count - 1).Significant; // Create two tailed t test to get p values TwoSampleTTest ttest = new TwoSampleTTest(mean1, variance1, proteinFirstConditionIntensityValues.Count, mean2, variance2, proteinSecondConditionIntensityValues.Count, !significant); double pValue = ttest.PValue; double logpValue = -Math.Log10(pValue); double logfoldChange = mean2 - mean1; // compute N-Value for protein double nValue = (logpValue * (logfoldChange * logfoldChange - sOValue * sOValue)) / ((logfoldChange) * (logfoldChange)); if (!permutationTesting) { List <double> proteinStatistics = new List <double>(); proteinStatistics.Add(nValue); proteinStatistics.Add(pValue); proteinStatistics.Add(logfoldChange); return(proteinStatistics); //nValues.Add(nValue); ///logFoldChange.Add(logfoldChange); //pValues.Add(pValue); } else { List <double> proteinStatistics = new List <double>(); proteinStatistics.Add(nValue); return(proteinStatistics); //nValues.Add(nValue); } }
/// <summary> /// Calculates the N Value for each protein based on its intensity values in two conditions /// </summary> /// //need explanation on what an N value is public void GetNValueUsingTTest(List <double> proteinFirstConditionIntensityValues, List <double> proteinSecondConditionIntensityValues, List <double> actualNValues, List <double> actualPValues, List <double> actualLogFoldChange, double sOValue) { //why are these being cloned? double[] firstConditionIntensityValues = new double[proteinFirstConditionIntensityValues.Count]; double[] secondConditionIntensityValues = new double[proteinSecondConditionIntensityValues.Count]; for (int i = 0; i < proteinFirstConditionIntensityValues.Count; i++) { firstConditionIntensityValues[i] = proteinFirstConditionIntensityValues[i]; } for (int i = 0; i < proteinSecondConditionIntensityValues.Count; i++) { secondConditionIntensityValues[i] = proteinSecondConditionIntensityValues[i]; } //sometimes less is more with variable names. For example, "mean1" vs "firstConditionIntensityMean" improves readability without diminishing clarity double firstConditionIntensityMean = CalculateProteinMeanIntensityValue(firstConditionIntensityValues); double secondConditionIntensityMean = CalculateProteinMeanIntensityValue(secondConditionIntensityValues); double firstConditionIntensityStandardDev = CalculateProteinIntensityValuesStandardDeviation(firstConditionIntensityValues, firstConditionIntensityMean); double secondConditionIntensityStandardDev = CalculateProteinIntensityValuesStandardDeviation(secondConditionIntensityValues, secondConditionIntensityMean); double firstConditionIntensityVariance = firstConditionIntensityStandardDev * firstConditionIntensityStandardDev; double secondConditionIntensityVariance = secondConditionIntensityStandardDev * secondConditionIntensityStandardDev; //don't need to save "ftest" if you're never going to use it again //write a note here what the purpose of the f-test is. It's not as well known as the t-test var ftest = new FTest(firstConditionIntensityVariance, secondConditionIntensityVariance, proteinFirstConditionIntensityValues.Count - 1, proteinSecondConditionIntensityValues.Count - 1); bool significant = ftest.Significant; // gets whether null hypothesis can be rejected // Create two tailed t test to get p values TwoSampleTTest ttest = new TwoSampleTTest(firstConditionIntensityValues, secondConditionIntensityValues, !significant); double pValue = ttest.PValue; double logpValue = -Math.Log10(pValue); double logfoldChange = secondConditionIntensityMean - firstConditionIntensityMean; //need a note on where this came from double nValue = (logpValue * (logfoldChange * logfoldChange - sOValue * sOValue)) / ((logfoldChange) * (logfoldChange)); //these are interesting names that imply the existence of fake nvalues, foldchanges, and pvalues. //how about "allNValues"? actualNValues.Add(nValue); actualLogFoldChange.Add(logfoldChange); actualPValues.Add(pValue); }
public void TTestPowerAnalysisConstructorTest() { // Declare two samples double[] A = { 5.0, 6.0, 7.9, 6.95, 5.3, 10.0, 7.48, 9.4, 7.6, 8.0, 6.22 }; double[] B = { 5.0, 1.6, 5.75, 5.80, 2.9, 8.88, 4.56, 2.4, 5.0, 10.0 }; double meanA = A.Mean(); double meanB = B.Mean(); double varA = A.Variance(); double varB = B.Variance(); double sdA = A.StandardDeviation(); double sdB = B.StandardDeviation(); double sigma = Math.Sqrt((varA + varB) / 2.0); Assert.AreEqual(7.259, meanA, 1e-3); Assert.AreEqual(5.189, meanB, 1e-3); Assert.AreEqual(2.492289, varA, 1e-6); Assert.AreEqual(7.091476, varB, 1e-6); Assert.AreEqual(1.5786985, sdA, 1e-6); Assert.AreEqual(2.6629826, sdB, 1e-6); // Perform a hypothesis test TwoSampleTTest test = new TwoSampleTTest(A, B, assumeEqualVariances: false); Assert.AreEqual(14.351, test.DegreesOfFreedom, 1e-3); Assert.AreEqual(2.14, test.Statistic, 1e-3); Assert.AreEqual(0.04999, test.PValue, 1e-5); Assert.AreEqual(0.00013662, test.Confidence.Min, 1e-6); Assert.AreEqual(4.14004519, test.Confidence.Max, 1e-6); Assert.IsTrue(test.Significant); test = new TwoSampleTTest(A, B, assumeEqualVariances: true); Assert.AreEqual(19, test.DegreesOfFreedom, 1e-3); Assert.AreEqual(2.1921894, test.Statistic, 1e-3); Assert.AreEqual(0.0410, test.PValue, 1e-4); Assert.AreEqual(0.09364214, test.Confidence.Min, 1e-6); Assert.AreEqual(4.04653967, test.Confidence.Max, 1e-6); Assert.IsTrue(test.Significant); // Check the actual power of the test... Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6); // Check how much effect we are trying to detect Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6); // So, what is the minimal difference we can detect? Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6); // Create an a posteriori analysis of the experiment var analysis = new TwoSampleTTestPowerAnalysis(test); analysis.Power = 0.8; // With 80% power, how much analysis.ComputeEffect(); // effect could we really detect? Assert.AreEqual(1.29051411, analysis.Effect, 1e-6); // Create an a priori power analysis so we can determine the sample // size needed to detect at least a difference of 2 points in the // student mean grades with at least 80% power: analysis = TwoSampleTTestPowerAnalysis.GetSampleSize(1, variance1: varA, variance2: varB, power: 0.8); Assert.AreEqual(0.4568219, analysis.Effect, 1e-6); // Check how many samples we would need to detect this effect with 80% power Assert.AreEqual(77, Math.Ceiling(analysis.Samples1)); Assert.AreEqual(77, Math.Ceiling(analysis.Samples2)); }
public void TTestPowerAnalysisConstructorTest() { // Let's say we have two samples, and we would like to know whether those // samples have the same mean. For this, we can perform a two sample T-Test: double[] A = { 5.0, 6.0, 7.9, 6.95, 5.3, 10.0, 7.48, 9.4, 7.6, 8.0, 6.22 }; double[] B = { 5.0, 1.6, 5.75, 5.80, 2.9, 8.88, 4.56, 2.4, 5.0, 10.0 }; // Perform the test, assuming the samples have unequal variances var test = new TwoSampleTTest(A, B, assumeEqualVariances: false); double df = test.DegreesOfFreedom; // d.f. = 14.351 double t = test.Statistic; // t = 2.14 double p = test.PValue; // p = 0.04999 bool significant = test.Significant; // true // The test gave us an indication that the samples may // indeed have come from different distributions (whose // mean value is actually distinct from each other). // Now, we would like to perform an _a posteriori_ analysis of the // test. When doing an a posteriori analysis, we can not change some // characteristics of the test (because it has been already done), // but we can measure some important features that may indicate // whether the test is trustworthy or not. // One of the first things would be to check for the test's power. // A test's power is 1 minus the probability of rejecting the null // hypothesis when the null hypothesis is actually false. It is // the other side of the coin when we consider that the P-value // is the probability of rejecting the null hypothesis when the // null hypothesis is actually true. // Ideally, this should be a high value: double power = test.Analysis.Power; // 0.5376260 // Check how much effect we are trying to detect double effect = test.Analysis.Effect; // 0.94566 // With this power, that is the minimal difference we can spot? double sigma = Math.Sqrt(test.Variance); double thres = test.Analysis.Effect * sigma; // 2.0700909090909 // This means that, using our test, the smallest difference that // we could detect with some confidence would be something around // 2 standard deviations. If we would like to say the samples are // different when they are less than 2 std. dev. apart, we would // need to do repeat our experiment differently. // Another way to create the power analysis is to pass // the test to the t-test power analysis constructor: // Create an a posteriori analysis of the experiment var analysis = new TwoSampleTTestPowerAnalysis(test); // When creating a power analysis, we have three things we can // change. We can always freely configure two of those things // and then ask the analysis to give us the third. // Those are: double e = analysis.Effect; // the test's minimum detectable effect size (0.94566) double n = analysis.TotalSamples; // the number of samples in the test (21 or (11 + 10)) double b = analysis.Power; // the probability of committing a type-2 error (0.53) // Let's say we would like to create a test with 80% power. analysis.Power = 0.8; analysis.ComputeEffect(); // what effect could we detect? double detectableEffect = analysis.Effect; // we would detect a difference of 1.290514 // However, to achieve this 80%, we would need to redo our experiment // more carefully. Assuming we are going to redo our experiment, we will // have more freedom about what we can change and what we can not. For // better addressing those points, we will create an a priori analysis // of the experiment: // We would like to know how many samples we would need to gather in // order to achieve a 80% power test which can detect an effect size // of one standard deviation: // analysis = TwoSampleTTestPowerAnalysis.GetSampleSize ( variance1: A.Variance(), variance2: B.Variance(), delta: 1.0, // the minimum detectable difference we want power: 0.8 // the test power that we want ); // How many samples would we need in order to see the effect we need? int n1 = (int)Math.Ceiling(analysis.Samples1); // 77 int n2 = (int)Math.Ceiling(analysis.Samples2); // 77 // According to our power analysis, we would need at least 77 // observations in each sample in order to see the effect we // need with the required 80% power. Assert.AreEqual(1.2905141186795861, detectableEffect); Assert.AreEqual(0.45682188621283815, analysis.Effect, 1e-6); Assert.AreEqual(2.0700909090909088, thres); Assert.AreEqual(0.53762605885988846, power); Assert.AreEqual(77, n1); Assert.AreEqual(77, n1); double meanA = A.Mean(); double meanB = B.Mean(); double varA = A.Variance(); double varB = B.Variance(); double sdA = A.StandardDeviation(); double sdB = B.StandardDeviation(); double sigma2 = Math.Sqrt((varA + varB) / 2.0); Assert.AreEqual(sigma2, sigma); Assert.AreEqual(7.259, meanA, 1e-3); Assert.AreEqual(5.189, meanB, 1e-3); Assert.AreEqual(2.492289, varA, 1e-6); Assert.AreEqual(7.091476, varB, 1e-6); Assert.AreEqual(1.5786985, sdA, 1e-6); Assert.AreEqual(2.6629826, sdB, 1e-6); Assert.AreEqual(14.351, df, 1e-3); Assert.AreEqual(2.14, t, 1e-3); Assert.AreEqual(0.04999, p, 1e-5); Assert.AreEqual(0.00013662, test.Confidence.Min, 1e-6); Assert.AreEqual(4.14004519, test.Confidence.Max, 1e-6); Assert.AreEqual(4.7918828787878791, test.Variance); Assert.IsTrue(test.Significant); Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6); Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6); Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6); test = new TwoSampleTTest(A, B, assumeEqualVariances: true); Assert.AreEqual(19, test.DegreesOfFreedom, 1e-3); Assert.AreEqual(2.1921894, test.Statistic, 1e-3); Assert.AreEqual(0.0410, test.PValue, 1e-4); Assert.AreEqual(0.09364214, test.Confidence.Min, 1e-6); Assert.AreEqual(4.04653967, test.Confidence.Max, 1e-6); Assert.IsTrue(test.Significant); // Check the actual power of the test... Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6); // Check how much effect we are trying to detect Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6); // So, what is the minimal difference we can detect? Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6); // Create an a posteriori analysis of the experiment analysis = new TwoSampleTTestPowerAnalysis(test); analysis.Power = 0.8; // With 80% power, how much analysis.ComputeEffect(); // effect could we really detect? Assert.AreEqual(1.29051411, analysis.Effect, 1e-6); // Create an a priori power analysis so we can determine the sample // size needed to detect at least a difference of 2 points in the // student mean grades with at least 80% power: analysis = TwoSampleTTestPowerAnalysis.GetSampleSize(1, variance1: varA, variance2: varB, power: 0.8); Assert.AreEqual(0.4568219, analysis.Effect, 1e-6); // Check how many samples we would need to detect this effect with 80% power Assert.AreEqual(77, Math.Ceiling(analysis.Samples1)); Assert.AreEqual(77, Math.Ceiling(analysis.Samples2)); }
public static Welch TTest(double[] x, double[] y, int radius) { var ttest = new TwoSampleTTest(x, y, false); ttest.Size = 0.01; return(new Welch { N1 = x.Length, N2 = y.Length, Radius = radius, PValue = ttest.PValue, Statistic = ttest.Statistic, Significant = ttest.Significant, Size = ttest.Size, DegreesOfFreedom = ttest.DegreesOfFreedom, EstimatedValue1 = ttest.EstimatedValue1, EstimatedValue2 = ttest.EstimatedValue2, CriticalValue = ttest.CriticalValue, }); /*double sumX = 0.0; * double sumY = 0.0; * for (int i = 0; i < x.Length; ++i) * { * sumX += x[i]; * } * for (int i = 0; i < y.Length; ++i) * { * sumY += y[i]; * } * * int n1 = x.Length; * int n2 = y.Length; * double meanX = sumX / n1; * double meanY = sumY / n2; * * double sumXminusMeanSquared = 0.0; // Calculate variances * double sumYminusMeanSquared = 0.0; * for (int i = 0; i < n1; ++i) * { * sumXminusMeanSquared += (x[i] - meanX) * (x[i] - meanX); * } * for (int i = 0; i < n2; ++i) * { * sumYminusMeanSquared += (y[i] - meanY) * (y[i] - meanY); * } * double varX = sumXminusMeanSquared / (n1 - 1); * double varY = sumYminusMeanSquared / (n2 - 1); * * double top = (meanX - meanY); * double bot = Math.Sqrt((varX / n1) + (varY / n2)); * double t = top / bot; * * double num = ((varX / n1) + (varY / n2)) * ((varX / n1) + (varY / n2)); * double denomLeft = ((varX / n1) * (varX / n1)) / (n1 - 1); * double denomRight = ((varY / n2) * (varY / n2)) / (n2 - 1); * double denom = denomLeft + denomRight; * double df = num / denom; * double p = Student(t, df); * * return new Welch { * Pval = p, * DF = df, * MeanX = meanX, * MeanY = meanY, * };*/ }
public Statistics CalculateTTest(Physician phys) { bool isValid = false; double levenesValue = 0; double pValue_E = 0; double pValue_U = 0; double pValue = 0; var statistics = new Statistics { IsValid = false }; var physData = _context.IndicatorsData .Where(p => p.PayrollID == phys.ID) .Select(p => new { value = p.NumeratorValue }); if (physData.Count() >= 5) { #region Build Physician Array var physArray = Util.BuildArray(physData); #endregion #region Build Peers Array var peersData = _context.IndicatorsData .Where(p => (p.PayrollID != phys.ID) && (p.OppePhysicianSubGroupID == phys.SubGroupID)) .Select(p => new { value = p.NumeratorValue }); var peersArray = Util.BuildArray(peersData); if (peersData.Count() >= 5) { statistics.IsValid = true; isValid = true; } else { statistics.IsValid = false; isValid = false; } #endregion if (isValid) { #region Levene's Test double[][] doubleArray = new double[][] { physArray, peersArray }; var levenesTest = new LeveneTest(doubleArray); levenesValue = levenesTest.PValue; #endregion #region T-Test Equal Variances assumed var TTestEqualVariance = new TwoSampleTTest(physArray, peersArray, assumeEqualVariances: true); pValue_E = TTestEqualVariance.PValue; #endregion #region T-Test UnEqual Variances assumed var TTestUnEqualVariance = new TwoSampleTTest(physArray, peersArray, assumeEqualVariances: false); pValue_U = TTestUnEqualVariance.PValue; #endregion pValue = levenesValue > 0.05 ? pValue_E : pValue_U; } #region Populating Statistics statistics.IsValid = true; statistics.PayrollID = phys.ID; statistics.NumeratorSum = physArray.Sum(); statistics.DenominatorSum = physArray.Count(); statistics.Mean = Measures.Mean(physArray); statistics.StandardDeviation = Measures.StandardDeviation(physArray); if (isValid) { statistics.PeerNumeratorSum = peersArray.Sum(); statistics.PeerDenominatorSum = peersArray.Count(); statistics.PeerMean = Measures.Mean(peersArray); statistics.PeerStandardDeviation = Measures.StandardDeviation(peersArray); statistics.LevenesTest = levenesValue; statistics.PValue_EqualVariances = pValue_E; statistics.PValue_UnequalVariances = pValue_U; statistics.PValue = pValue; } #endregion } return(statistics); }
/// <summary> /// Generates fake N Value's for each protein based on its intensity values in two conditions using Permutation Testing /// </summary> /// //alright, you win, I guess there are fake nvalues. Maybe a better nomenclature would be observedNValues vs permutedNValues? /// There's a lot of code duplication between this method and GetNValueUsingTTest. Try creating permutations and then calling GetNValueUsingTTest for each one. public void GetNValueUsingPermutationtests(List <double> proteinFirstConditionIntensityValues, List <double> proteinSecondConditionIntensityValues, List <double> permutedNValues, double sOValue) { //why are these being cloned? double[] firstConditionIntensityValues = new double[proteinFirstConditionIntensityValues.Count]; double[] secondConditionIntensityValues = new double[proteinSecondConditionIntensityValues.Count]; for (int i = 0; i < proteinFirstConditionIntensityValues.Count; i++) { firstConditionIntensityValues[i] = proteinFirstConditionIntensityValues[i]; } for (int i = 0; i < proteinSecondConditionIntensityValues.Count; i++) { secondConditionIntensityValues[i] = proteinSecondConditionIntensityValues[i]; } int[] indicesOfFirstConditionIntensityValues = new int[firstConditionIntensityValues.Length]; int[] indicesOfSecondConditionIntensityValues = new int[secondConditionIntensityValues.Length]; for (int i = 0; i < proteinFirstConditionIntensityValues.Count; i++) { indicesOfFirstConditionIntensityValues[i] = i; } for (int i = 0; i < proteinSecondConditionIntensityValues.Count; i++) { indicesOfSecondConditionIntensityValues[i] = i; } List <List <int> > allTwoIndiciesCombinationsFromFirstCondition = GenerateAllCombinationsOfTwoIndices(indicesOfFirstConditionIntensityValues); List <List <int> > allTwoIndiciesCombinationsFromSecondCondition = GenerateAllCombinationsOfTwoIndices(indicesOfSecondConditionIntensityValues); int count = 0; foreach (var twoIndiciesCombinationEntryFromFirstCondition in allTwoIndiciesCombinationsFromFirstCondition) { foreach (var twoIndiciesCombinationEntryFromSecondCondition in allTwoIndiciesCombinationsFromSecondCondition) { // these the new arrays which will be made after swapping intensity values between the two conditions double[] swappedFirstConditionIntensityValues = new double[firstConditionIntensityValues.Length]; double[] swappedSecondConditionIntensityValues = new double[secondConditionIntensityValues.Length]; int swappedFirstConditionArrayTracker = 0; int swappedSecondConditionArrayTracker = 0; int[] indiciesToSwapFromFirstCondition = new int[2]; int[] indiciesToSwapFromSecondCondition = new int[2]; int removeIndiciesFirstConditionTracker = 0; int removeIndiciesSecondConditionTracker = 0; // store the indices, corresponding to intensity values, to be swapped from first condition foreach (var index in twoIndiciesCombinationEntryFromFirstCondition) { indiciesToSwapFromFirstCondition[removeIndiciesFirstConditionTracker] = index; removeIndiciesFirstConditionTracker++; } // store the indices, corresponding to intensity values, to be swapped from second condition foreach (var index in twoIndiciesCombinationEntryFromSecondCondition) { indiciesToSwapFromSecondCondition[removeIndiciesSecondConditionTracker] = index; removeIndiciesSecondConditionTracker++; } // add the intensity values to be swapped from first condition the second condition for (int j = 0; j < indiciesToSwapFromFirstCondition.Count(); j++) { swappedSecondConditionIntensityValues[swappedSecondConditionArrayTracker] = firstConditionIntensityValues[indiciesToSwapFromFirstCondition[j]]; swappedSecondConditionArrayTracker++; } // add the intensity values to be swapped from second condition the first condition for (int j = 0; j < indiciesToSwapFromSecondCondition.Count(); j++) { swappedFirstConditionIntensityValues[swappedFirstConditionArrayTracker] = secondConditionIntensityValues[indiciesToSwapFromSecondCondition[j]]; swappedFirstConditionArrayTracker++; } // now we add the remaining intensity values from the first condition to the swappedFirstCondition Array for (int j = 0; j < firstConditionIntensityValues.Count(); j++) { if (indiciesToSwapFromFirstCondition.Contains(j)) { continue; } swappedFirstConditionIntensityValues[swappedFirstConditionArrayTracker] = firstConditionIntensityValues[j]; swappedFirstConditionArrayTracker++; } // now we add the remaining intensity values from the second condition to the swappedSecondCondition Array for (int j = 0; j < secondConditionIntensityValues.Count(); j++) { if (indiciesToSwapFromSecondCondition.Contains(j)) { continue; } swappedSecondConditionIntensityValues[swappedSecondConditionArrayTracker] = secondConditionIntensityValues[j]; swappedSecondConditionArrayTracker++; } // at this stage we have the newly made swapped arrays with mixture of groups. // need to proceed with T tests for these groups to generate fake p values. double firstConditionIntensityMean = CalculateProteinMeanIntensityValue(swappedFirstConditionIntensityValues); double secondConditionIntensityMean = CalculateProteinMeanIntensityValue(swappedSecondConditionIntensityValues); double firstConditionIntensityStandardDev = CalculateProteinIntensityValuesStandardDeviation(swappedFirstConditionIntensityValues, firstConditionIntensityMean); double secondConditionIntensityStandardDev = CalculateProteinIntensityValuesStandardDeviation(swappedSecondConditionIntensityValues, secondConditionIntensityMean); double firstConditionIntensityVariance = firstConditionIntensityStandardDev * firstConditionIntensityStandardDev; double secondConditionIntensityVariance = secondConditionIntensityStandardDev * secondConditionIntensityStandardDev; var ftest = new FTest(firstConditionIntensityVariance, secondConditionIntensityVariance, swappedFirstConditionIntensityValues.Length - 1, swappedSecondConditionIntensityValues.Length - 1); bool significant = ftest.Significant; // gets whether null hypothesis can be rejected // Create two tailed t test to get p values TwoSampleTTest ttest = new TwoSampleTTest(swappedFirstConditionIntensityValues, swappedSecondConditionIntensityValues, !significant); double pValue = ttest.PValue; double logpValue = -Math.Log10(pValue); double logfoldChange = secondConditionIntensityMean - firstConditionIntensityMean; permutedNValues.Add((logpValue * (logfoldChange * logfoldChange - sOValue * sOValue)) / ((logfoldChange) * (logfoldChange))); } count++; if (count == 2) { break; } } }
public Statistics CalculateStats(PValueDbContext context, Physician item) { var phys = context.Indicator_12 .Where(p => p.PayrollID == item.ID) .Select(p => new { num = p.NumeratorValue }); var physCount = phys.Count(); // string physNum = ""; double[] x1 = new double[physCount]; double physNumVal = 0; double physDenVal = 0; double physMean = 0; int i = 0; foreach (var phy in phys) { x1[i] = phy.num; // physNum += physNum == "" ? phy.num.ToString() : " " + phy.num.ToString(); physNumVal += phy.num; physDenVal++; i++; } if (physDenVal != 0) { physMean = physNumVal / physDenVal; } var peers = context.Indicator_12 .Where(p => p.OppePhysicianSubGroupID == item.SubGroupID) .Where(p => p.PayrollID != item.ID) .Select(p => new { num = p.NumeratorValue }); var peersCount = peers.Count(); double[] x2 = new double[peersCount]; // string peersNum = ""; double peersNumVal = 0; double peersDenVal = 0; double peersMean = 0; double levenesTest = 0; double pValueA = 0; double pValueNA = 0; if (peers.Count() >= 5) { int j = 0; foreach (var peer in peers) { x2[j] = peer.num; peersNumVal += peer.num; peersDenVal++; j++; } if (peersDenVal != 0) { peersMean = peersNumVal / peersDenVal; } #region Levene's Test double[][] X = new double[][] { x1, x2 }; var levenes = new LeveneTest(X); levenesTest = levenes.PValue; #endregion Console.WriteLine($"Grp = {item.SubGroupID} - Phys = {item.ID}"); #region T-Test Equal variances assumed var ta = new TwoSampleTTest(x1, x2, assumeEqualVariances: true); pValueA = ta.PValue; #endregion #region T-Test Equal variances not assumed var tna = new TwoSampleTTest(x1, x2, assumeEqualVariances: false); pValueNA = tna.PValue; #endregion } var stats = new Statistics { Count = physCount, PayrollID = item.ID, NumeratorSum = physNumVal, DenominatorSum = physDenVal, Mean = physMean, StandardDeviation = Measures.StandardDeviation(x1), PeerCount = peersCount, PeerNumeratorSum = peersNumVal, PeerDenominatorSum = peersDenVal, PeerMean = peersMean, PeerStandardDeviation = Measures.StandardDeviation(x2), LevenesTest = levenesTest, PValue_EqualVariances = pValueA, PValue_UnequalVariances = pValueNA, PValue = levenesTest > 0.05 ? pValueA : pValueNA }; return(stats); }