Exemple #1
0
        public void TTestConstructorTest2()
        {
            // Example from Larson & Farber, Elementary Statistics: Picturing the world.

            /*
             * A random sample of 17 police officers in Brownsville has a mean annual
             * income of $35,800 and a standard deviation of $7,800.  In Greensville,
             * a random sample of 18 police officers has a mean annual income of $35,100
             * and a standard deviation of $7,375.  Test the claim at a = 0.01 that the
             * mean annual incomes in the two cities are not the same.  Assume the
             * population variances are equal.
             */

            double mean1   = 35800;
            double stdDev1 = 7800;
            double var1    = stdDev1 * stdDev1;
            int    n1      = 17;

            double mean2   = 35100;
            double stdDev2 = 7375;
            double var2    = stdDev2 * stdDev2;
            int    n2      = 18;

            TwoSampleTTest test = new TwoSampleTTest(mean1, var1, n1, mean2, var2, n2,
                                                     assumeEqualVariances: true,
                                                     alternate: TwoSampleHypothesis.ValuesAreDifferent);

            Assert.AreEqual(33, test.DegreesOfFreedom);
            Assert.AreEqual(2564.92, test.StandardError, 1e-3);
            Assert.AreEqual(0.273, test.Statistic, 1e-3);

            test.Size = 0.01;

            Assert.IsFalse(test.Significant);
        }
        public void TTestPowerAnalysisConstructorTest4()
        {
            // Example from http://www.ats.ucla.edu/stat/stata/dae/t_test_power2.htm,
            // tested against G*Power results

            double meanA = 0;
            double meanB = 10;

            double sdA = 15;
            double sdB = 17;

            double varA = sdA * sdA;
            double varB = sdB * sdB;


            {
                var priori = TwoSampleTTestPowerAnalysis.GetSampleSize(
                    10,
                    varA,
                    varB,
                    1.0,
                    0.8,
                    0.05
                    );

                Assert.AreEqual(41, Math.Truncate(priori.Samples1));
                Assert.AreEqual(41, Math.Truncate(priori.Samples2));
            }

            {
                TwoSampleTTest test = new TwoSampleTTest(
                    meanA, varA, 30,
                    meanB, varB, 30);

                Assert.AreEqual(0.661222, test.Analysis.Power, 1e-6);
            }

            {
                TwoSampleTTest test = new TwoSampleTTest(
                    meanA, varA, 20,
                    meanB, varB, 40);

                Assert.AreEqual(0.6102516, test.Analysis.Power, 1e-6);
            }


            {
                var priori = TwoSampleTTestPowerAnalysis.GetSampleSize(
                    10,
                    varA,
                    varB,
                    1.0,
                    0.8,
                    0.07
                    );

                Assert.AreEqual(37, Math.Truncate(priori.Samples1));
                Assert.AreEqual(37, Math.Truncate(priori.Samples2));
            }
        }
Exemple #3
0
        public void SampleSizeTest1()
        {
            // Example from http://udel.edu/~mcdonald/statttest.html
            // Computed using R's function power.t.test

            double mean1 = 3.2;
            double mean2 = 0;
            double var1  = System.Math.Pow(4.3, 2);
            double var2  = System.Math.Pow(4.3, 2);
            double alpha = 0.05;
            double power = 0.80;

            TwoSampleTTest test = new TwoSampleTTest(
                mean1: mean1, var1: var1, samples1: 10,
                mean2: mean2, var2: var2, samples2: 10,
                assumeEqualVariances: true, alternate: TwoSampleHypothesis.ValuesAreDifferent);

            var target = (TwoSampleTTestPowerAnalysis)test.Analysis.Clone();

            target.Power = power;
            target.Size  = alpha;

            target.ComputeSamples(1);
            double actual = target.Samples1;

            double expected = 29.33682;

            Assert.AreEqual(expected, actual, 1e-5);
        }
        protected override void EndProcessing()
        {
            var hypo = TestingHelper.GetTwoSampleHypothesis(Alternate);

            TwoSampleTTest test;

            if (ParameterSetName == "Pipeline")
            {
                var samples = _data.GroupBy(CategoryName).ToDoubleJaggedArrayOf(ValueName);
                if (samples.Length != 2)
                {
                    WriteError(new ErrorRecord(new RuntimeException("The number of categories is not two"), "", ErrorCategory.InvalidArgument, null));
                    return;
                }
                test = new TwoSampleTTest(samples[0], samples[1], AssumeEqualVariances, HypothesizedDifference, hypo);
            }
            else
            {
                test = new TwoSampleTTest(Sample1, Sample2, AssumeEqualVariances, HypothesizedDifference, hypo);
            }

            test.Size = Size;

            WriteObject(test);
        }
 /// <summary>
 ///   Creates a new <see cref="TTestPowerAnalysis"/>.
 /// </summary>
 ///
 /// <param name="test">The test to create the analysis for.</param>
 ///
 public TTestPowerAnalysis(TwoSampleTTest test)
     : base(test.Tail)
 {
     this.Power   = test.Analysis.Power;
     this.Size    = test.Analysis.Size;
     this.Effect  = test.Analysis.Effect;
     this.Samples = test.Analysis.Samples;
 }
 /// <summary>
 ///   Creates a new <see cref="TTestPowerAnalysis"/>.
 /// </summary>
 /// 
 /// <param name="test">The test to create the analysis for.</param>
 /// 
 public TTestPowerAnalysis(TwoSampleTTest test)
     : base(test.Tail)
 {
     this.Power = test.Analysis.Power;
     this.Size = test.Analysis.Size;
     this.Effect = test.Analysis.Effect;
     this.Samples = test.Analysis.Samples;
 }
        public GeneralHypothesisTest(string firstLabel, DescriptiveResult sample1, string secondLabel,
            DescriptiveResult sample2, double hypothesizedDifference = 0,
            TwoSampleHypothesis alternate = TwoSampleHypothesis.ValuesAreDifferent)
        {
            int samples1 = sample1.Count;
            int samples2 = sample2.Count;
            HypothesizedDifference = Math.Abs(hypothesizedDifference);

            var s1 = new SampleInfo
                     {
                         Name = firstLabel,
                         Count = sample1.Count,
                         Mean = sample1.Mean,
                         StdDev = sample1.StdDev
                     };

            var s2 = new SampleInfo
                     {
                         Name = secondLabel,
                         Count = sample2.Count,
                         Mean = sample2.Mean,
                         StdDev = sample2.StdDev
                     };

            Result = new ComparisonResult
                     {
                         FirstSample = s1,
                         SecondSample = s2,
                         Hypothesis = alternate,
                         HypothesizedDifference = HypothesizedDifference
                     };

            if (samples1 < 30 || samples2 < 30)
            {
                _tTest = new TwoSampleTTest(sample1, sample2, false, HypothesizedDifference, alternate);
                Result.Confidence = _tTest.Confidence;
                Result.ObservedDifference = _tTest.ObservedDifference;
                Result.Significant = _tTest.Significant;
                Result.Size = _tTest.Size;
                Result.StandardError = _tTest.StandardError;
            }
            else
            {
                _zTest = new TwoSampleZTest(sample1, sample2, HypothesizedDifference, alternate);
                Result.Confidence = _zTest.Confidence;
                Result.ObservedDifference = _zTest.ObservedDifference;
                Result.Significant = _zTest.Significant;
                Result.Size = _zTest.Size;
                Result.StandardError = _zTest.StandardError;
            }
        }
        public void TTestPowerAnalysisConstructorTest4()
        {
            // Example from http://www.ats.ucla.edu/stat/stata/dae/t_test_power2.htm,
            // tested against G*Power results

            double meanA = 0;
            double meanB = 10;

            double sdA = 15;
            double sdB = 17;

            double varA = sdA * sdA;
            double varB = sdB * sdB;


            {
                var priori = TwoSampleTTestPowerAnalysis.GetSampleSize(10,
                    variance1: varA, variance2: varB, power: 0.8);

                Assert.AreEqual(41, Math.Truncate(priori.Samples1));
                Assert.AreEqual(41, Math.Truncate(priori.Samples2));
            }

            {
                TwoSampleTTest test = new TwoSampleTTest(
                    meanA, varA, 30,
                    meanB, varB, 30);

                Assert.AreEqual(0.661222, test.Analysis.Power, 1e-6);
            }

            {
                TwoSampleTTest test = new TwoSampleTTest(
                    meanA, varA, 20,
                    meanB, varB, 40);

                Assert.AreEqual(0.6102516, test.Analysis.Power, 1e-6);
            }


            {
                var priori = TwoSampleTTestPowerAnalysis.GetSampleSize(10,
                variance1: varA, variance2: varB, power: 0.8, alpha: 0.07);

                Assert.AreEqual(37, Math.Truncate(priori.Samples1));
                Assert.AreEqual(37, Math.Truncate(priori.Samples2));
            }
        }
        public void TTestConstructorTest()
        {
            // Example from http://en.wikipedia.org/wiki/Student%27s_t-test#Two-sample_T.C2.A02_test

            double[] sample1 = { 30.02, 29.99, 30.11, 29.97, 30.01, 29.99 };
            double[] sample2 = { 29.89, 29.93, 29.72, 29.98, 30.02, 29.98 };

            TwoSampleTTest test;

            // Unequal variances
            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: false);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(7.03, test.DegreesOfFreedom, 1e-3);

            Assert.AreEqual(0.091, test.PValue, 0.001);

            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: false,
                alternate: TwoSampleHypothesis.FirstValueIsGreaterThanSecond);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(7.03, test.DegreesOfFreedom, 1e-3);

            Assert.AreEqual(0.045, test.PValue, 0.001);

            // Equal variances
            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: true);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(10, test.DegreesOfFreedom);

            Assert.AreEqual(0.078, test.PValue, 0.001);

            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: true,
                alternate: TwoSampleHypothesis.FirstValueIsGreaterThanSecond);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(10, test.DegreesOfFreedom);

            Assert.AreEqual(0.038, test.PValue, 0.0015);
        }
Exemple #10
0
        public void TTestConstructorTest()
        {
            // Example from http://en.wikipedia.org/wiki/Student%27s_t-test#Two-sample_T.C2.A02_test

            double[] sample1 = { 30.02, 29.99, 30.11, 29.97, 30.01, 29.99 };
            double[] sample2 = { 29.89, 29.93, 29.72, 29.98, 30.02, 29.98 };

            TwoSampleTTest test;

            // Unequal variances
            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: false);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(7.03, test.DegreesOfFreedom, 1e-3);

            Assert.AreEqual(0.091, test.PValue, 0.001);

            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: false,
                                      alternate: TwoSampleHypothesis.FirstValueIsGreaterThanSecond);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(7.03, test.DegreesOfFreedom, 1e-3);

            Assert.AreEqual(0.045, test.PValue, 0.001);

            // Equal variances
            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: true);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(10, test.DegreesOfFreedom);

            Assert.AreEqual(0.078, test.PValue, 0.001);

            test = new TwoSampleTTest(sample1, sample2, assumeEqualVariances: true,
                                      alternate: TwoSampleHypothesis.FirstValueIsGreaterThanSecond);

            Assert.AreEqual(0.0485, test.StandardError, 1e-4);
            Assert.AreEqual(0.095, test.ObservedDifference, 1e-10);
            Assert.AreEqual(10, test.DegreesOfFreedom);

            Assert.AreEqual(0.038, test.PValue, 0.0015);
        }
        public TwoSampleHypothesisTestResult TestHypothesis(IEnumerable <double> sample1, IEnumerable <double> sample2, double hypothesizedDifference,
                                                            TwoSampleHypothesis alternateHypothesis,
                                                            double alpha)
        {
            var test = new TwoSampleTTest(
                sample1.ToArray(),
                sample2.ToArray(),
                // TODO: P1 - Is false okay here? Will it get the variances from the inputs? Or should we use true?
                assumeEqualVariances: false,
                hypothesizedDifference: hypothesizedDifference,
                alternate: alternateHypothesis);

            test.Size = alpha;
            return(new TwoSampleHypothesisTestResult(
                       test.Significant,
                       test.GetConfidenceInterval(1 - alpha),
                       test.ObservedDifference));
        }
        /// <summary>
        /// Calculates the N Value for each protein based on its intensity values in two conditions.
        /// the permutationTesting boolean is used to determine if we are caluclating permuted or observed N Values.
        ///
        /// The N value is a metric which combines the pValue and the magnitude change (through the LogFoldChange) of
        /// the protein amongst the two conditions in order to enable significance classifcation of the protein based
        /// on the same principle of Volcano Plots (Volcano plots allow to identify signifcantly different proteins in large data
        /// sets comprising of replicate data by plotting the signifance statistic on the y axis, which is the pValue here,
        /// and the magnitude chanbge on the x axis, which is the logFoldChange here)
        /// </summary>
        public List <double> GetNValueUsingTTest(List <double> proteinFirstConditionIntensityValues, List <double> proteinSecondConditionIntensityValues,
                                                 double sOValue, bool permutationTesting)
        {
            double mean1     = proteinFirstConditionIntensityValues.Average();
            double mean2     = proteinSecondConditionIntensityValues.Average();
            double stdDev1   = CalculateProteinIntensityValuesStandardDeviation(proteinFirstConditionIntensityValues, mean1);
            double stdDev2   = CalculateProteinIntensityValuesStandardDeviation(proteinSecondConditionIntensityValues, mean2);
            double variance1 = stdDev1 * stdDev1;
            double variance2 = stdDev2 * stdDev2;

            // the f-test is used to determine whether the variance of the two intensityvalue popluations are equal
            // the significant variable gets whether the null hypothesis can be rejected
            bool significant = new FTest(variance1, variance2, proteinFirstConditionIntensityValues.Count - 1, proteinSecondConditionIntensityValues.Count - 1).Significant;

            // Create two tailed t test to get p values
            TwoSampleTTest ttest = new TwoSampleTTest(mean1, variance1, proteinFirstConditionIntensityValues.Count,
                                                      mean2, variance2, proteinSecondConditionIntensityValues.Count, !significant);

            double pValue        = ttest.PValue;
            double logpValue     = -Math.Log10(pValue);
            double logfoldChange = mean2 - mean1;

            // compute N-Value for protein
            double nValue = (logpValue * (logfoldChange * logfoldChange - sOValue * sOValue)) / ((logfoldChange) * (logfoldChange));

            if (!permutationTesting)
            {
                List <double> proteinStatistics = new List <double>();
                proteinStatistics.Add(nValue);
                proteinStatistics.Add(pValue);
                proteinStatistics.Add(logfoldChange);
                return(proteinStatistics);
                //nValues.Add(nValue);
                ///logFoldChange.Add(logfoldChange);
                //pValues.Add(pValue);
            }
            else
            {
                List <double> proteinStatistics = new List <double>();
                proteinStatistics.Add(nValue);
                return(proteinStatistics);
                //nValues.Add(nValue);
            }
        }
        /// <summary>
        /// Calculates the N Value for each protein based on its intensity values in two conditions
        /// </summary>
        /// //need explanation on what an N value is
        public void GetNValueUsingTTest(List <double> proteinFirstConditionIntensityValues, List <double> proteinSecondConditionIntensityValues,
                                        List <double> actualNValues, List <double> actualPValues, List <double> actualLogFoldChange, double sOValue)
        {
            //why are these being cloned?
            double[] firstConditionIntensityValues  = new double[proteinFirstConditionIntensityValues.Count];
            double[] secondConditionIntensityValues = new double[proteinSecondConditionIntensityValues.Count];

            for (int i = 0; i < proteinFirstConditionIntensityValues.Count; i++)
            {
                firstConditionIntensityValues[i] = proteinFirstConditionIntensityValues[i];
            }

            for (int i = 0; i < proteinSecondConditionIntensityValues.Count; i++)
            {
                secondConditionIntensityValues[i] = proteinSecondConditionIntensityValues[i];
            }

            //sometimes less is more with variable names. For example, "mean1" vs "firstConditionIntensityMean" improves readability without diminishing clarity
            double firstConditionIntensityMean         = CalculateProteinMeanIntensityValue(firstConditionIntensityValues);
            double secondConditionIntensityMean        = CalculateProteinMeanIntensityValue(secondConditionIntensityValues);
            double firstConditionIntensityStandardDev  = CalculateProteinIntensityValuesStandardDeviation(firstConditionIntensityValues, firstConditionIntensityMean);
            double secondConditionIntensityStandardDev = CalculateProteinIntensityValuesStandardDeviation(secondConditionIntensityValues, secondConditionIntensityMean);
            double firstConditionIntensityVariance     = firstConditionIntensityStandardDev * firstConditionIntensityStandardDev;
            double secondConditionIntensityVariance    = secondConditionIntensityStandardDev * secondConditionIntensityStandardDev;

            //don't need to save "ftest" if you're never going to use it again
            //write a note here what the purpose of the f-test is. It's not as well known as the t-test
            var  ftest       = new FTest(firstConditionIntensityVariance, secondConditionIntensityVariance, proteinFirstConditionIntensityValues.Count - 1, proteinSecondConditionIntensityValues.Count - 1);
            bool significant = ftest.Significant; // gets whether null hypothesis can be rejected

            // Create two tailed t test to get p values
            TwoSampleTTest ttest         = new TwoSampleTTest(firstConditionIntensityValues, secondConditionIntensityValues, !significant);
            double         pValue        = ttest.PValue;
            double         logpValue     = -Math.Log10(pValue);
            double         logfoldChange = secondConditionIntensityMean - firstConditionIntensityMean;
            //need a note on where this came from
            double nValue = (logpValue * (logfoldChange * logfoldChange - sOValue * sOValue)) / ((logfoldChange) * (logfoldChange));

            //these are interesting names that imply the existence of fake nvalues, foldchanges, and pvalues.
            //how about "allNValues"?
            actualNValues.Add(nValue);
            actualLogFoldChange.Add(logfoldChange);
            actualPValues.Add(pValue);
        }
        public void TTestPowerAnalysisConstructorTest()
        {
            // Declare two samples
            double[] A = { 5.0, 6.0, 7.9, 6.95, 5.3, 10.0, 7.48, 9.4, 7.6, 8.0, 6.22 };
            double[] B = { 5.0, 1.6, 5.75, 5.80, 2.9, 8.88, 4.56, 2.4, 5.0, 10.0 };

            double meanA = A.Mean();
            double meanB = B.Mean();

            double varA = A.Variance();
            double varB = B.Variance();

            double sdA = A.StandardDeviation();
            double sdB = B.StandardDeviation();

            double sigma = Math.Sqrt((varA + varB) / 2.0);

            Assert.AreEqual(7.259, meanA, 1e-3);
            Assert.AreEqual(5.189, meanB, 1e-3);

            Assert.AreEqual(2.492289, varA, 1e-6);
            Assert.AreEqual(7.091476, varB, 1e-6);

            Assert.AreEqual(1.5786985, sdA, 1e-6);
            Assert.AreEqual(2.6629826, sdB, 1e-6);

            // Perform a hypothesis test
            TwoSampleTTest test = new TwoSampleTTest(A, B, assumeEqualVariances: false);

            Assert.AreEqual(14.351, test.DegreesOfFreedom, 1e-3);
            Assert.AreEqual(2.14, test.Statistic, 1e-3);
            Assert.AreEqual(0.04999, test.PValue, 1e-5);
            Assert.AreEqual(0.00013662, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.14004519, test.Confidence.Max, 1e-6);
            Assert.IsTrue(test.Significant);

            test = new TwoSampleTTest(A, B, assumeEqualVariances: true);

            Assert.AreEqual(19, test.DegreesOfFreedom, 1e-3);
            Assert.AreEqual(2.1921894, test.Statistic, 1e-3);
            Assert.AreEqual(0.0410, test.PValue, 1e-4);
            Assert.AreEqual(0.09364214, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.04653967, test.Confidence.Max, 1e-6);
            Assert.IsTrue(test.Significant);

            // Check the actual power of the test...
            Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6);

            // Check how much effect we are trying to detect
            Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6);

            // So, what is the minimal difference we can detect?
            Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6);

            // Create an a posteriori analysis of the experiment
            var analysis = new TwoSampleTTestPowerAnalysis(test);

            analysis.Power = 0.8;     // With 80% power, how much
            analysis.ComputeEffect(); // effect could we really detect?

            Assert.AreEqual(1.29051411, analysis.Effect, 1e-6);



            // Create an a priori power analysis so we can determine the sample
            // size needed to detect at least a difference of 2 points in the
            // student mean grades with at least 80% power:

            analysis = TwoSampleTTestPowerAnalysis.GetSampleSize(1,
              variance1: varA, variance2: varB, power: 0.8);

            Assert.AreEqual(0.4568219, analysis.Effect, 1e-6);

            // Check how many samples we would need to detect this effect with 80% power

            Assert.AreEqual(77, Math.Ceiling(analysis.Samples1));
            Assert.AreEqual(77, Math.Ceiling(analysis.Samples2));
        }
        public void SampleSizeTest1()
        {
            // Example from http://udel.edu/~mcdonald/statttest.html
            // Computed using R's function power.t.test 

            double mean1 = 3.2;
            double mean2 = 0;
            double var1 = System.Math.Pow(4.3, 2);
            double var2 = System.Math.Pow(4.3, 2);
            double alpha = 0.05;
            double power = 0.80;

            TwoSampleTTest test = new TwoSampleTTest(
                mean1: mean1, var1: var1, samples1: 10,
                mean2: mean2, var2: var2, samples2: 10,
                assumeEqualVariances: true, alternate: TwoSampleHypothesis.ValuesAreDifferent);

            var target = (TwoSampleTTestPowerAnalysis)test.Analysis.Clone();

            target.Power = power;
            target.Size = alpha;

            target.ComputeSamples(1);
            double actual = target.Samples1;

            double expected = 29.33682;

            Assert.AreEqual(expected, actual, 1e-5);
        }
        public void TTestConstructorTest2()
        {

            // Example from Larson & Farber, Elementary Statistics: Picturing the world.

            /*
             * A random sample of 17 police officers in Brownsville has a mean annual
             * income of $35,800 and a standard deviation of $7,800.  In Greensville,
             * a random sample of 18 police officers has a mean annual income of $35,100
             * and a standard deviation of $7,375.  Test the claim at a = 0.01 that the
             * mean annual incomes in the two cities are not the same.  Assume the 
             * population variances are equal.
             */

            double mean1 = 35800;
            double stdDev1 = 7800;
            double var1 = stdDev1 * stdDev1;
            int n1 = 17;

            double mean2 = 35100;
            double stdDev2 = 7375;
            double var2 = stdDev2 * stdDev2;
            int n2 = 18;

            TwoSampleTTest test = new TwoSampleTTest(mean1, var1, n1, mean2, var2, n2,
                assumeEqualVariances: true,
                alternate: TwoSampleHypothesis.ValuesAreDifferent);

            Assert.AreEqual(33, test.DegreesOfFreedom);
            Assert.AreEqual(2564.92, test.StandardError, 1e-3);
            Assert.AreEqual(0.273, test.Statistic, 1e-3);

            test.Size = 0.01;

            Assert.IsFalse(test.Significant);
        }
        public void TTestPowerAnalysisConstructorTest()
        {
            // Let's say we have two samples, and we would like to know whether those
            // samples have the same mean. For this, we can perform a two sample T-Test:
            double[] A = { 5.0, 6.0, 7.9, 6.95, 5.3, 10.0, 7.48, 9.4, 7.6, 8.0, 6.22 };
            double[] B = { 5.0, 1.6, 5.75, 5.80, 2.9, 8.88, 4.56, 2.4, 5.0, 10.0 };

            // Perform the test, assuming the samples have unequal variances
            var test = new TwoSampleTTest(A, B, assumeEqualVariances: false);

            double df = test.DegreesOfFreedom;   // d.f. = 14.351
            double t = test.Statistic;           // t    = 2.14
            double p = test.PValue;              // p    = 0.04999
            bool significant = test.Significant; // true

            // The test gave us an indication that the samples may
            // indeed have come from different distributions (whose
            // mean value is actually distinct from each other).

            // Now, we would like to perform an _a posteriori_ analysis of the 
            // test. When doing an a posteriori analysis, we can not change some
            // characteristics of the test (because it has been already done), 
            // but we can measure some important features that may indicate 
            // whether the test is trustworthy or not.

            // One of the first things would be to check for the test's power.
            // A test's power is 1 minus the probability of rejecting the null
            // hypothesis when the null hypothesis is actually false. It is
            // the other side of the coin when we consider that the P-value
            // is the probability of rejecting the null hypothesis when the
            // null hypothesis is actually true.

            // Ideally, this should be a high value:
            double power = test.Analysis.Power; // 0.5376260

            // Check how much effect we are trying to detect
            double effect = test.Analysis.Effect; // 0.94566

            // With this power, that is the minimal difference we can spot?
            double sigma = Math.Sqrt(test.Variance);
            double thres = test.Analysis.Effect * sigma; // 2.0700909090909

            // This means that, using our test, the smallest difference that
            // we could detect with some confidence would be something around
            // 2 standard deviations. If we would like to say the samples are
            // different when they are less than 2 std. dev. apart, we would
            // need to do repeat our experiment differently.


            // Another way to create the power analysis is to pass 
            // the test to the t-test power analysis constructor:

            // Create an a posteriori analysis of the experiment
            var analysis = new TwoSampleTTestPowerAnalysis(test);

            // When creating a power analysis, we have three things we can
            // change. We can always freely configure two of those things
            // and then ask the analysis to give us the third.

            // Those are:
            double e = analysis.Effect;       // the test's minimum detectable effect size (0.94566)
            double n = analysis.TotalSamples; // the number of samples in the test (21 or (11 + 10))
            double b = analysis.Power;        // the probability of committing a type-2 error (0.53)

            // Let's say we would like to create a test with 80% power.
            analysis.Power = 0.8;
            analysis.ComputeEffect(); // what effect could we detect?

            double detectableEffect = analysis.Effect; // we would detect a difference of 1.290514


            // However, to achieve this 80%, we would need to redo our experiment
            // more carefully. Assuming we are going to redo our experiment, we will
            // have more freedom about what we can change and what we can not. For 
            // better addressing those points, we will create an a priori analysis 
            // of the experiment:

            // We would like to know how many samples we would need to gather in
            // order to achieve a 80% power test which can detect an effect size
            // of one standard deviation:
            //
            analysis = TwoSampleTTestPowerAnalysis.GetSampleSize
            (
                variance1: A.Variance(),
                variance2: B.Variance(),
                delta: 1.0, // the minimum detectable difference we want
                power: 0.8  // the test power that we want
            );

            // How many samples would we need in order to see the effect we need?
            int n1 = (int)Math.Ceiling(analysis.Samples1); // 77
            int n2 = (int)Math.Ceiling(analysis.Samples2); // 77

            // According to our power analysis, we would need at least 77 
            // observations in each sample in order to see the effect we
            // need with the required 80% power.

            Assert.AreEqual(1.2905141186795861, detectableEffect);
            Assert.AreEqual(0.45682188621283815, analysis.Effect, 1e-6);
            Assert.AreEqual(2.0700909090909088, thres);
            Assert.AreEqual(0.53762605885988846, power);

            Assert.AreEqual(77, n1);
            Assert.AreEqual(77, n1);

            double meanA = A.Mean();
            double meanB = B.Mean();

            double varA = A.Variance();
            double varB = B.Variance();

            double sdA = A.StandardDeviation();
            double sdB = B.StandardDeviation();

            double sigma2 = Math.Sqrt((varA + varB) / 2.0);
            Assert.AreEqual(sigma2, sigma);

            Assert.AreEqual(7.259, meanA, 1e-3);
            Assert.AreEqual(5.189, meanB, 1e-3);

            Assert.AreEqual(2.492289, varA, 1e-6);
            Assert.AreEqual(7.091476, varB, 1e-6);

            Assert.AreEqual(1.5786985, sdA, 1e-6);
            Assert.AreEqual(2.6629826, sdB, 1e-6);

            Assert.AreEqual(14.351, df, 1e-3);
            Assert.AreEqual(2.14, t, 1e-3);
            Assert.AreEqual(0.04999, p, 1e-5);
            Assert.AreEqual(0.00013662, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.14004519, test.Confidence.Max, 1e-6);
            Assert.AreEqual(4.7918828787878791, test.Variance);
            Assert.IsTrue(test.Significant);

            Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6);
            Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6);
            Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6);

            test = new TwoSampleTTest(A, B, assumeEqualVariances: true);

            Assert.AreEqual(19, test.DegreesOfFreedom, 1e-3);
            Assert.AreEqual(2.1921894, test.Statistic, 1e-3);
            Assert.AreEqual(0.0410, test.PValue, 1e-4);
            Assert.AreEqual(0.09364214, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.04653967, test.Confidence.Max, 1e-6);
            Assert.IsTrue(test.Significant);

            // Check the actual power of the test...
            Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6);

            // Check how much effect we are trying to detect
            Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6);

            // So, what is the minimal difference we can detect?
            Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6);

            // Create an a posteriori analysis of the experiment
            analysis = new TwoSampleTTestPowerAnalysis(test);

            analysis.Power = 0.8;     // With 80% power, how much
            analysis.ComputeEffect(); // effect could we really detect?

            Assert.AreEqual(1.29051411, analysis.Effect, 1e-6);

            // Create an a priori power analysis so we can determine the sample
            // size needed to detect at least a difference of 2 points in the
            // student mean grades with at least 80% power:

            analysis = TwoSampleTTestPowerAnalysis.GetSampleSize(1,
              variance1: varA, variance2: varB, power: 0.8);

            Assert.AreEqual(0.4568219, analysis.Effect, 1e-6);

            // Check how many samples we would need to detect this effect with 80% power

            Assert.AreEqual(77, Math.Ceiling(analysis.Samples1));
            Assert.AreEqual(77, Math.Ceiling(analysis.Samples2));
        }
Exemple #18
0
        public void TTestPowerAnalysisConstructorTest()
        {
            // Let's say we have two samples, and we would like to know whether those
            // samples have the same mean. For this, we can perform a two sample T-Test:
            double[] A = { 5.0, 6.0, 7.9, 6.95, 5.3, 10.0, 7.48, 9.4, 7.6, 8.0, 6.22 };
            double[] B = { 5.0, 1.6, 5.75, 5.80, 2.9, 8.88, 4.56, 2.4, 5.0, 10.0 };

            // Perform the test, assuming the samples have unequal variances
            var test = new TwoSampleTTest(A, B, assumeEqualVariances: false);

            double df          = test.DegreesOfFreedom; // d.f. = 14.351
            double t           = test.Statistic;        // t    = 2.14
            double p           = test.PValue;           // p    = 0.04999
            bool   significant = test.Significant;      // true

            // The test gave us an indication that the samples may
            // indeed have come from different distributions (whose
            // mean value is actually distinct from each other).

            // Now, we would like to perform an _a posteriori_ analysis of the
            // test. When doing an a posteriori analysis, we can not change some
            // characteristics of the test (because it has been already done),
            // but we can measure some important features that may indicate
            // whether the test is trustworthy or not.

            // One of the first things would be to check for the test's power.
            // A test's power is 1 minus the probability of rejecting the null
            // hypothesis when the null hypothesis is actually false. It is
            // the other side of the coin when we consider that the P-value
            // is the probability of rejecting the null hypothesis when the
            // null hypothesis is actually true.

            // Ideally, this should be a high value:
            double power = test.Analysis.Power; // 0.5376260

            // Check how much effect we are trying to detect
            double effect = test.Analysis.Effect; // 0.94566

            // With this power, that is the minimal difference we can spot?
            double sigma = Math.Sqrt(test.Variance);
            double thres = test.Analysis.Effect * sigma; // 2.0700909090909

            // This means that, using our test, the smallest difference that
            // we could detect with some confidence would be something around
            // 2 standard deviations. If we would like to say the samples are
            // different when they are less than 2 std. dev. apart, we would
            // need to do repeat our experiment differently.


            // Another way to create the power analysis is to pass
            // the test to the t-test power analysis constructor:

            // Create an a posteriori analysis of the experiment
            var analysis = new TwoSampleTTestPowerAnalysis(test);

            // When creating a power analysis, we have three things we can
            // change. We can always freely configure two of those things
            // and then ask the analysis to give us the third.

            // Those are:
            double e = analysis.Effect;       // the test's minimum detectable effect size (0.94566)
            double n = analysis.TotalSamples; // the number of samples in the test (21 or (11 + 10))
            double b = analysis.Power;        // the probability of committing a type-2 error (0.53)

            // Let's say we would like to create a test with 80% power.
            analysis.Power = 0.8;
            analysis.ComputeEffect();                  // what effect could we detect?

            double detectableEffect = analysis.Effect; // we would detect a difference of 1.290514


            // However, to achieve this 80%, we would need to redo our experiment
            // more carefully. Assuming we are going to redo our experiment, we will
            // have more freedom about what we can change and what we can not. For
            // better addressing those points, we will create an a priori analysis
            // of the experiment:

            // We would like to know how many samples we would need to gather in
            // order to achieve a 80% power test which can detect an effect size
            // of one standard deviation:
            //
            analysis = TwoSampleTTestPowerAnalysis.GetSampleSize
                       (
                variance1: A.Variance(),
                variance2: B.Variance(),
                delta: 1.0, // the minimum detectable difference we want
                power: 0.8  // the test power that we want
                       );

            // How many samples would we need in order to see the effect we need?
            int n1 = (int)Math.Ceiling(analysis.Samples1); // 77
            int n2 = (int)Math.Ceiling(analysis.Samples2); // 77

            // According to our power analysis, we would need at least 77
            // observations in each sample in order to see the effect we
            // need with the required 80% power.

            Assert.AreEqual(1.2905141186795861, detectableEffect);
            Assert.AreEqual(0.45682188621283815, analysis.Effect, 1e-6);
            Assert.AreEqual(2.0700909090909088, thres);
            Assert.AreEqual(0.53762605885988846, power);

            Assert.AreEqual(77, n1);
            Assert.AreEqual(77, n1);

            double meanA = A.Mean();
            double meanB = B.Mean();

            double varA = A.Variance();
            double varB = B.Variance();

            double sdA = A.StandardDeviation();
            double sdB = B.StandardDeviation();

            double sigma2 = Math.Sqrt((varA + varB) / 2.0);

            Assert.AreEqual(sigma2, sigma);

            Assert.AreEqual(7.259, meanA, 1e-3);
            Assert.AreEqual(5.189, meanB, 1e-3);

            Assert.AreEqual(2.492289, varA, 1e-6);
            Assert.AreEqual(7.091476, varB, 1e-6);

            Assert.AreEqual(1.5786985, sdA, 1e-6);
            Assert.AreEqual(2.6629826, sdB, 1e-6);

            Assert.AreEqual(14.351, df, 1e-3);
            Assert.AreEqual(2.14, t, 1e-3);
            Assert.AreEqual(0.04999, p, 1e-5);
            Assert.AreEqual(0.00013662, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.14004519, test.Confidence.Max, 1e-6);
            Assert.AreEqual(4.7918828787878791, test.Variance);
            Assert.IsTrue(test.Significant);

            Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6);
            Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6);
            Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6);

            test = new TwoSampleTTest(A, B, assumeEqualVariances: true);

            Assert.AreEqual(19, test.DegreesOfFreedom, 1e-3);
            Assert.AreEqual(2.1921894, test.Statistic, 1e-3);
            Assert.AreEqual(0.0410, test.PValue, 1e-4);
            Assert.AreEqual(0.09364214, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.04653967, test.Confidence.Max, 1e-6);
            Assert.IsTrue(test.Significant);

            // Check the actual power of the test...
            Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6);

            // Check how much effect we are trying to detect
            Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6);

            // So, what is the minimal difference we can detect?
            Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6);

            // Create an a posteriori analysis of the experiment
            analysis = new TwoSampleTTestPowerAnalysis(test);

            analysis.Power = 0.8;     // With 80% power, how much
            analysis.ComputeEffect(); // effect could we really detect?

            Assert.AreEqual(1.29051411, analysis.Effect, 1e-6);

            // Create an a priori power analysis so we can determine the sample
            // size needed to detect at least a difference of 2 points in the
            // student mean grades with at least 80% power:

            analysis = TwoSampleTTestPowerAnalysis.GetSampleSize(1,
                                                                 variance1: varA, variance2: varB, power: 0.8);

            Assert.AreEqual(0.4568219, analysis.Effect, 1e-6);

            // Check how many samples we would need to detect this effect with 80% power

            Assert.AreEqual(77, Math.Ceiling(analysis.Samples1));
            Assert.AreEqual(77, Math.Ceiling(analysis.Samples2));
        }
Exemple #19
0
        public static Welch TTest(double[] x, double[] y, int radius)
        {
            var ttest = new TwoSampleTTest(x, y, false);

            ttest.Size = 0.01;
            return(new Welch
            {
                N1 = x.Length,
                N2 = y.Length,
                Radius = radius,
                PValue = ttest.PValue,
                Statistic = ttest.Statistic,
                Significant = ttest.Significant,
                Size = ttest.Size,
                DegreesOfFreedom = ttest.DegreesOfFreedom,
                EstimatedValue1 = ttest.EstimatedValue1,
                EstimatedValue2 = ttest.EstimatedValue2,
                CriticalValue = ttest.CriticalValue,
            });

            /*double sumX = 0.0;
             * double sumY = 0.0;
             * for (int i = 0; i < x.Length; ++i)
             * {
             *  sumX += x[i];
             * }
             * for (int i = 0; i < y.Length; ++i)
             * {
             *  sumY += y[i];
             * }
             *
             * int n1 = x.Length;
             * int n2 = y.Length;
             * double meanX = sumX / n1;
             * double meanY = sumY / n2;
             *
             * double sumXminusMeanSquared = 0.0; // Calculate variances
             * double sumYminusMeanSquared = 0.0;
             * for (int i = 0; i < n1; ++i)
             * {
             *  sumXminusMeanSquared += (x[i] - meanX) * (x[i] - meanX);
             * }
             * for (int i = 0; i < n2; ++i)
             * {
             *  sumYminusMeanSquared += (y[i] - meanY) * (y[i] - meanY);
             * }
             * double varX = sumXminusMeanSquared / (n1 - 1);
             * double varY = sumYminusMeanSquared / (n2 - 1);
             *
             * double top = (meanX - meanY);
             * double bot = Math.Sqrt((varX / n1) + (varY / n2));
             * double t = top / bot;
             *
             * double num = ((varX / n1) + (varY / n2)) * ((varX / n1) + (varY / n2));
             * double denomLeft = ((varX / n1) * (varX / n1)) / (n1 - 1);
             * double denomRight = ((varY / n2) * (varY / n2)) / (n2 - 1);
             * double denom = denomLeft + denomRight;
             * double df = num / denom;
             * double p = Student(t, df);
             *
             * return new Welch {
             *  Pval = p,
             *  DF = df,
             *  MeanX = meanX,
             *  MeanY = meanY,
             * };*/
        }
Exemple #20
0
        public Statistics CalculateTTest(Physician phys)
        {
            bool   isValid      = false;
            double levenesValue = 0;
            double pValue_E     = 0;
            double pValue_U     = 0;
            double pValue       = 0;

            var statistics = new Statistics
            {
                IsValid = false
            };

            var physData = _context.IndicatorsData
                           .Where(p => p.PayrollID == phys.ID)
                           .Select(p => new { value = p.NumeratorValue });

            if (physData.Count() >= 5)
            {
                #region Build Physician Array
                var physArray = Util.BuildArray(physData);
                #endregion

                #region Build Peers Array
                var peersData = _context.IndicatorsData
                                .Where(p => (p.PayrollID != phys.ID) && (p.OppePhysicianSubGroupID == phys.SubGroupID))
                                .Select(p => new { value = p.NumeratorValue });
                var peersArray = Util.BuildArray(peersData);

                if (peersData.Count() >= 5)
                {
                    statistics.IsValid = true;
                    isValid            = true;
                }
                else
                {
                    statistics.IsValid = false;
                    isValid            = false;
                }
                #endregion


                if (isValid)
                {
                    #region Levene's Test
                    double[][] doubleArray = new double[][]
                    {
                        physArray,
                        peersArray
                    };
                    var levenesTest = new LeveneTest(doubleArray);
                    levenesValue = levenesTest.PValue;
                    #endregion

                    #region T-Test Equal Variances assumed
                    var TTestEqualVariance = new TwoSampleTTest(physArray, peersArray, assumeEqualVariances: true);
                    pValue_E = TTestEqualVariance.PValue;
                    #endregion

                    #region T-Test UnEqual Variances assumed
                    var TTestUnEqualVariance = new TwoSampleTTest(physArray, peersArray, assumeEqualVariances: false);
                    pValue_U = TTestUnEqualVariance.PValue;
                    #endregion
                    pValue = levenesValue > 0.05 ? pValue_E : pValue_U;
                }

                #region Populating Statistics
                statistics.IsValid           = true;
                statistics.PayrollID         = phys.ID;
                statistics.NumeratorSum      = physArray.Sum();
                statistics.DenominatorSum    = physArray.Count();
                statistics.Mean              = Measures.Mean(physArray);
                statistics.StandardDeviation = Measures.StandardDeviation(physArray);
                if (isValid)
                {
                    statistics.PeerNumeratorSum        = peersArray.Sum();
                    statistics.PeerDenominatorSum      = peersArray.Count();
                    statistics.PeerMean                = Measures.Mean(peersArray);
                    statistics.PeerStandardDeviation   = Measures.StandardDeviation(peersArray);
                    statistics.LevenesTest             = levenesValue;
                    statistics.PValue_EqualVariances   = pValue_E;
                    statistics.PValue_UnequalVariances = pValue_U;
                    statistics.PValue = pValue;
                }
                #endregion
            }

            return(statistics);
        }
        /// <summary>
        /// Generates fake N Value's for each protein based on its intensity values in two conditions using Permutation Testing
        /// </summary>
        /// //alright, you win, I guess there are fake nvalues. Maybe a better nomenclature would be observedNValues vs permutedNValues?
        /// There's a lot of code duplication between this method and GetNValueUsingTTest. Try creating permutations and then calling GetNValueUsingTTest for each one.
        public void GetNValueUsingPermutationtests(List <double> proteinFirstConditionIntensityValues, List <double> proteinSecondConditionIntensityValues,
                                                   List <double> permutedNValues, double sOValue)
        {
            //why are these being cloned?
            double[] firstConditionIntensityValues  = new double[proteinFirstConditionIntensityValues.Count];
            double[] secondConditionIntensityValues = new double[proteinSecondConditionIntensityValues.Count];

            for (int i = 0; i < proteinFirstConditionIntensityValues.Count; i++)
            {
                firstConditionIntensityValues[i] = proteinFirstConditionIntensityValues[i];
            }

            for (int i = 0; i < proteinSecondConditionIntensityValues.Count; i++)
            {
                secondConditionIntensityValues[i] = proteinSecondConditionIntensityValues[i];
            }


            int[] indicesOfFirstConditionIntensityValues  = new int[firstConditionIntensityValues.Length];
            int[] indicesOfSecondConditionIntensityValues = new int[secondConditionIntensityValues.Length];
            for (int i = 0; i < proteinFirstConditionIntensityValues.Count; i++)
            {
                indicesOfFirstConditionIntensityValues[i] = i;
            }
            for (int i = 0; i < proteinSecondConditionIntensityValues.Count; i++)
            {
                indicesOfSecondConditionIntensityValues[i] = i;
            }

            List <List <int> > allTwoIndiciesCombinationsFromFirstCondition  = GenerateAllCombinationsOfTwoIndices(indicesOfFirstConditionIntensityValues);
            List <List <int> > allTwoIndiciesCombinationsFromSecondCondition = GenerateAllCombinationsOfTwoIndices(indicesOfSecondConditionIntensityValues);

            int count = 0;

            foreach (var twoIndiciesCombinationEntryFromFirstCondition in allTwoIndiciesCombinationsFromFirstCondition)
            {
                foreach (var twoIndiciesCombinationEntryFromSecondCondition in allTwoIndiciesCombinationsFromSecondCondition)
                {
                    // these the new arrays which will be made after swapping intensity values between the two conditions
                    double[] swappedFirstConditionIntensityValues  = new double[firstConditionIntensityValues.Length];
                    double[] swappedSecondConditionIntensityValues = new double[secondConditionIntensityValues.Length];
                    int      swappedFirstConditionArrayTracker     = 0;
                    int      swappedSecondConditionArrayTracker    = 0;

                    int[] indiciesToSwapFromFirstCondition     = new int[2];
                    int[] indiciesToSwapFromSecondCondition    = new int[2];
                    int   removeIndiciesFirstConditionTracker  = 0;
                    int   removeIndiciesSecondConditionTracker = 0;

                    // store the indices, corresponding to intensity values, to be swapped from first condition
                    foreach (var index in twoIndiciesCombinationEntryFromFirstCondition)
                    {
                        indiciesToSwapFromFirstCondition[removeIndiciesFirstConditionTracker] = index;
                        removeIndiciesFirstConditionTracker++;
                    }

                    // store the indices, corresponding to intensity values, to be swapped from second condition
                    foreach (var index in twoIndiciesCombinationEntryFromSecondCondition)
                    {
                        indiciesToSwapFromSecondCondition[removeIndiciesSecondConditionTracker] = index;
                        removeIndiciesSecondConditionTracker++;
                    }

                    // add the intensity values to be swapped from first condition the second condition
                    for (int j = 0; j < indiciesToSwapFromFirstCondition.Count(); j++)
                    {
                        swappedSecondConditionIntensityValues[swappedSecondConditionArrayTracker] = firstConditionIntensityValues[indiciesToSwapFromFirstCondition[j]];
                        swappedSecondConditionArrayTracker++;
                    }


                    // add the intensity values to be swapped from second condition the first condition
                    for (int j = 0; j < indiciesToSwapFromSecondCondition.Count(); j++)
                    {
                        swappedFirstConditionIntensityValues[swappedFirstConditionArrayTracker] = secondConditionIntensityValues[indiciesToSwapFromSecondCondition[j]];
                        swappedFirstConditionArrayTracker++;
                    }

                    // now we add the remaining intensity values from the first condition to the swappedFirstCondition Array
                    for (int j = 0; j < firstConditionIntensityValues.Count(); j++)
                    {
                        if (indiciesToSwapFromFirstCondition.Contains(j))
                        {
                            continue;
                        }
                        swappedFirstConditionIntensityValues[swappedFirstConditionArrayTracker] = firstConditionIntensityValues[j];
                        swappedFirstConditionArrayTracker++;
                    }

                    // now we add the remaining intensity values from the second condition to the swappedSecondCondition Array
                    for (int j = 0; j < secondConditionIntensityValues.Count(); j++)
                    {
                        if (indiciesToSwapFromSecondCondition.Contains(j))
                        {
                            continue;
                        }
                        swappedSecondConditionIntensityValues[swappedSecondConditionArrayTracker] = secondConditionIntensityValues[j];
                        swappedSecondConditionArrayTracker++;
                    }


                    // at this stage we have the newly made swapped arrays with mixture of groups.
                    // need to proceed with T tests for these groups to generate fake p values.

                    double firstConditionIntensityMean         = CalculateProteinMeanIntensityValue(swappedFirstConditionIntensityValues);
                    double secondConditionIntensityMean        = CalculateProteinMeanIntensityValue(swappedSecondConditionIntensityValues);
                    double firstConditionIntensityStandardDev  = CalculateProteinIntensityValuesStandardDeviation(swappedFirstConditionIntensityValues, firstConditionIntensityMean);
                    double secondConditionIntensityStandardDev = CalculateProteinIntensityValuesStandardDeviation(swappedSecondConditionIntensityValues, secondConditionIntensityMean);
                    double firstConditionIntensityVariance     = firstConditionIntensityStandardDev * firstConditionIntensityStandardDev;
                    double secondConditionIntensityVariance    = secondConditionIntensityStandardDev * secondConditionIntensityStandardDev;

                    var  ftest       = new FTest(firstConditionIntensityVariance, secondConditionIntensityVariance, swappedFirstConditionIntensityValues.Length - 1, swappedSecondConditionIntensityValues.Length - 1);
                    bool significant = ftest.Significant; // gets whether null hypothesis can be rejected

                    // Create two tailed t test to get p values
                    TwoSampleTTest ttest         = new TwoSampleTTest(swappedFirstConditionIntensityValues, swappedSecondConditionIntensityValues, !significant);
                    double         pValue        = ttest.PValue;
                    double         logpValue     = -Math.Log10(pValue);
                    double         logfoldChange = secondConditionIntensityMean - firstConditionIntensityMean;

                    permutedNValues.Add((logpValue * (logfoldChange * logfoldChange - sOValue * sOValue)) / ((logfoldChange) * (logfoldChange)));
                }
                count++;
                if (count == 2)
                {
                    break;
                }
            }
        }
        public void TTestPowerAnalysisConstructorTest()
        {
            // Declare two samples
            double[] A = { 5.0, 6.0, 7.9, 6.95, 5.3, 10.0, 7.48, 9.4, 7.6, 8.0, 6.22 };
            double[] B = { 5.0, 1.6, 5.75, 5.80, 2.9, 8.88, 4.56, 2.4, 5.0, 10.0 };

            double meanA = A.Mean();
            double meanB = B.Mean();

            double varA = A.Variance();
            double varB = B.Variance();

            double sdA = A.StandardDeviation();
            double sdB = B.StandardDeviation();

            double sigma = Math.Sqrt((varA + varB) / 2.0);

            Assert.AreEqual(7.259, meanA, 1e-3);
            Assert.AreEqual(5.189, meanB, 1e-3);

            Assert.AreEqual(2.492289, varA, 1e-6);
            Assert.AreEqual(7.091476, varB, 1e-6);

            Assert.AreEqual(1.5786985, sdA, 1e-6);
            Assert.AreEqual(2.6629826, sdB, 1e-6);

            // Perform a hypothesis test
            TwoSampleTTest test = new TwoSampleTTest(A, B, assumeEqualVariances: false);

            Assert.AreEqual(14.351, test.DegreesOfFreedom, 1e-3);
            Assert.AreEqual(2.14, test.Statistic, 1e-3);
            Assert.AreEqual(0.04999, test.PValue, 1e-5);
            Assert.AreEqual(0.00013662, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.14004519, test.Confidence.Max, 1e-6);
            Assert.IsTrue(test.Significant);

            test = new TwoSampleTTest(A, B, assumeEqualVariances: true);

            Assert.AreEqual(19, test.DegreesOfFreedom, 1e-3);
            Assert.AreEqual(2.1921894, test.Statistic, 1e-3);
            Assert.AreEqual(0.0410, test.PValue, 1e-4);
            Assert.AreEqual(0.09364214, test.Confidence.Min, 1e-6);
            Assert.AreEqual(4.04653967, test.Confidence.Max, 1e-6);
            Assert.IsTrue(test.Significant);

            // Check the actual power of the test...
            Assert.AreEqual(0.5376260, test.Analysis.Power, 1e-6);

            // Check how much effect we are trying to detect
            Assert.AreEqual(0.9456628, test.Analysis.Effect, 1e-6);

            // So, what is the minimal difference we can detect?
            Assert.AreEqual(2.070090, test.Analysis.Effect * sigma, 1e-6);

            // Create an a posteriori analysis of the experiment
            var analysis = new TwoSampleTTestPowerAnalysis(test);

            analysis.Power = 0.8;     // With 80% power, how much
            analysis.ComputeEffect(); // effect could we really detect?

            Assert.AreEqual(1.29051411, analysis.Effect, 1e-6);



            // Create an a priori power analysis so we can determine the sample
            // size needed to detect at least a difference of 2 points in the
            // student mean grades with at least 80% power:

            analysis = TwoSampleTTestPowerAnalysis.GetSampleSize(1,
                                                                 variance1: varA, variance2: varB, power: 0.8);

            Assert.AreEqual(0.4568219, analysis.Effect, 1e-6);

            // Check how many samples we would need to detect this effect with 80% power

            Assert.AreEqual(77, Math.Ceiling(analysis.Samples1));
            Assert.AreEqual(77, Math.Ceiling(analysis.Samples2));
        }
Exemple #23
0
        public Statistics CalculateStats(PValueDbContext context, Physician item)
        {
            var phys = context.Indicator_12
                       .Where(p => p.PayrollID == item.ID)
                       .Select(p => new { num = p.NumeratorValue });

            var physCount = phys.Count();

            // string physNum = "";
            double[] x1         = new double[physCount];
            double   physNumVal = 0;
            double   physDenVal = 0;
            double   physMean   = 0;
            int      i          = 0;

            foreach (var phy in phys)
            {
                x1[i] = phy.num;
                // physNum += physNum == "" ? phy.num.ToString() : " " + phy.num.ToString();
                physNumVal += phy.num;
                physDenVal++;
                i++;
            }
            if (physDenVal != 0)
            {
                physMean = physNumVal / physDenVal;
            }

            var peers = context.Indicator_12
                        .Where(p => p.OppePhysicianSubGroupID == item.SubGroupID)
                        .Where(p => p.PayrollID != item.ID)
                        .Select(p => new
            {
                num = p.NumeratorValue
            });

            var peersCount = peers.Count();

            double[] x2 = new double[peersCount];
            // string peersNum = "";
            double peersNumVal = 0;
            double peersDenVal = 0;
            double peersMean   = 0;


            double levenesTest = 0;
            double pValueA     = 0;
            double pValueNA    = 0;

            if (peers.Count() >= 5)
            {
                int j = 0;
                foreach (var peer in peers)
                {
                    x2[j]        = peer.num;
                    peersNumVal += peer.num;
                    peersDenVal++;
                    j++;
                }
                if (peersDenVal != 0)
                {
                    peersMean = peersNumVal / peersDenVal;
                }

                #region Levene's Test
                double[][] X = new double[][]
                {
                    x1,
                    x2
                };

                var levenes = new LeveneTest(X);
                levenesTest = levenes.PValue;
                #endregion

                Console.WriteLine($"Grp = {item.SubGroupID} - Phys = {item.ID}");

                #region T-Test Equal variances assumed
                var ta = new TwoSampleTTest(x1, x2, assumeEqualVariances: true);
                pValueA = ta.PValue;
                #endregion

                #region T-Test Equal variances not assumed
                var tna = new TwoSampleTTest(x1, x2, assumeEqualVariances: false);
                pValueNA = tna.PValue;
                #endregion
            }

            var stats = new Statistics
            {
                Count                   = physCount,
                PayrollID               = item.ID,
                NumeratorSum            = physNumVal,
                DenominatorSum          = physDenVal,
                Mean                    = physMean,
                StandardDeviation       = Measures.StandardDeviation(x1),
                PeerCount               = peersCount,
                PeerNumeratorSum        = peersNumVal,
                PeerDenominatorSum      = peersDenVal,
                PeerMean                = peersMean,
                PeerStandardDeviation   = Measures.StandardDeviation(x2),
                LevenesTest             = levenesTest,
                PValue_EqualVariances   = pValueA,
                PValue_UnequalVariances = pValueNA,
                PValue                  = levenesTest > 0.05 ? pValueA : pValueNA
            };

            return(stats);
        }