public void BivariateSampleManipulations()
        {
            BivariateSample s = new BivariateSample();

            s.Add(1.0, 9.0);
            s.Add(new XY(2.0, 8.0));
            s.Add(new double[] { 3.0, 4.0 }, new double[] { 7.0, 6.0 });
            s.Add(new XY[] { new XY(5.0, 5.0), new XY(6.0, 4.0) });
            Assert.IsTrue(s.Count == 6);

            Assert.IsTrue(!s.X.Contains(9.0));
            s.TransposeXY();
            Assert.IsTrue(s.X.Contains(9.0));
            s.TransposeXY();

            Assert.IsTrue(s.Remove(2.0, 8.0));
            Assert.IsTrue(s.Count == 5);
            Assert.IsFalse(s.Remove(2.0, 8.0));
            Assert.IsTrue(s.Count == 5);
            Assert.IsTrue(s.Remove(new XY(6.0, 4.0)));
            Assert.IsTrue(s.Count == 4);

            Assert.IsTrue(s.Contains(1.0, 9.0));
            Assert.IsFalse(s.Contains(9.0, 1.0));
            Assert.IsTrue(s.Contains(new XY(4.0, 6.0)));

            s.Clear();
            Assert.IsTrue(s.Count == 0);
        }
Beispiel #2
0
        public void TestBivariateRegression()
        {
            // Do a bunch of linear regressions. r^2 should be distributed as expected.

            double a0 = 1.0;
            double b0 = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            List <double> r2Sample = new List <double>();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                LinearRegressionResult fit = xySample.LinearRegression();
                double a = fit.Intercept.Value;
                double b = fit.Slope.Value;

                r2Sample.Add(fit.RSquared);
            }

            ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);
            TestResult             ks             = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Assert.IsTrue(ks.Probability > 0.05);
        }
Beispiel #3
0
        //Helpmer method for PerformMoleculeCovarianceNetworkAnalysis(). Takes paired fold changes for two molecules and calculates a spearman's rho coefficient and associated p-value.
        //These data are returned in a Corrleation data object.
        public static Correlation GetSpearmanCorrelation(List <double> molecule_a_fold_changes, List <double> molecule_b_fold_changes, int molecule_a_identifier, int molecule_b_identifier)
        {
            Correlation new_correlation = new Correlation();

            new_correlation.mol_id_a    = molecule_a_identifier;
            new_correlation.mol_id_b    = molecule_b_identifier;
            new_correlation.data_points = molecule_a_fold_changes.Count();
            var bs    = new BivariateSample("dataOne", "dataTwo");
            int count = 0;

            for (int i = 0; i < molecule_a_fold_changes.Count; i++)
            {
                bs.Add(molecule_a_fold_changes[i], molecule_b_fold_changes[i]);
                count++;
            }
            var res = bs.SpearmanRhoTest();

            if (res.Statistic < 0)
            {
                new_correlation.correlation = res.Statistic;
                new_correlation.p_value     = (2 * res.LeftProbability); //2-tailed t-test
            }
            else
            {
                new_correlation.correlation = res.Statistic;
                new_correlation.p_value     = (2 * res.RightProbability); //2-tailed t-test
            }

            if (!double.IsNaN(new_correlation.correlation) && !double.IsNaN(new_correlation.p_value) && !double.IsInfinity(new_correlation.p_value) && !double.IsInfinity(new_correlation.correlation))
            {
                return(new_correlation);
            }
            return(null);
        }
        public void SpearmanNullDistributionTest()
        {
            // pick independent distributions for x and y, which needn't be normal and needn't be related
            Distribution xDistrubtion  = new UniformDistribution();
            Distribution yDistribution = new CauchyDistribution();
            Random       rng           = new Random(1);

            // generate bivariate samples of various sizes
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8))
            {
                Sample       testStatistics   = new Sample();
                Distribution testDistribution = null;

                for (int i = 0; i < 128; i++)
                {
                    BivariateSample sample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng));
                    }

                    TestResult result = sample.SpearmanRhoTest();
                    testStatistics.Add(result.Statistic);
                    testDistribution = result.Distribution;
                }

                TestResult r2 = testStatistics.KuiperTest(testDistribution);
                Console.WriteLine("n={0} P={1}", n, r2.LeftProbability);
                Assert.IsTrue(r2.RightProbability > 0.05);

                Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean));
                Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance));
            }
        }
Beispiel #5
0
        public void NormalFitCovariances()
        {
            NormalDistribution N = new NormalDistribution(-1.0, 2.0);

            // Create a bivariate sample to hold our fitted best mu and sigma values
            // so we can determine their covariance as well as their means and variances
            BivariateSample    parameters  = new BivariateSample();
            MultivariateSample covariances = new MultivariateSample(3);

            // A bunch of times, create a normal sample
            for (int i = 0; i < 128; i++)
            {
                // We use small samples so the variation in mu and sigma will be more substantial.
                Sample s = TestUtilities.CreateSample(N, 8, i);

                // Fit each sample to a normal distribution
                NormalFitResult fit = NormalDistribution.FitToSample(s);

                // and record the mu and sigma values from the fit into our bivariate sample
                parameters.Add(fit.Mean.Value, fit.StandardDeviation.Value);

                // also record the claimed covariances among these parameters
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            // the mean fit values should agree with the population distribution
            Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean));
            Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation));

            // but also the covariances of those fit values should agree with the claimed covariances
            Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(2).Mean));
        }
Beispiel #6
0
        public void Bug6162()
        {
            // When UncertianMeasurementSample.FitToPolynomial used Cholesky inversion of (A^T A), this inversion
            // would fail when roundoff errors would made the matrix non-positive-definite. We have now changed
            // to QR decomposition, which is more robust.

            //real data
            double[] X_axis = new double[] { 40270.65625, 40270.6569444444, 40270.6576388888, 40270.6583333332, 40270.6590277776,
                                             40270.659722222, 40270.6604166669, 40270.6611111113, 40270.6618055557, 40270.6625000001 };

            double[] Y_axis = new double[] { 246.824996948242, 246.850006103516, 245.875, 246.225006103516, 246.975006103516,
                                             247.024993896484, 246.949996948242, 246.875, 247.5, 247.100006103516 };

            UncertainMeasurementSample DataSet = new UncertainMeasurementSample();

            for (int i = 0; i < 10; i++)
            {
                DataSet.Add(X_axis[i], Y_axis[i], 1);
            }

            UncertainMeasurementFitResult DataFit = DataSet.FitToPolynomial(3);

            BivariateSample bs = new BivariateSample();

            for (int i = 0; i < 10; i++)
            {
                bs.Add(X_axis[i], Y_axis[i]);
            }
            PolynomialRegressionResult bsFit = bs.PolynomialRegression(3);

            foreach (Parameter p in bsFit.Parameters)
            {
                Console.WriteLine(p);
            }
        }
        public void PearsonRDistribution()
        {
            Random rng = new Random(1);

            // pick some underlying distributions for the sample variables, which must be normal but can have any parameters
            NormalDistribution xDistribution = new NormalDistribution(1, 2);
            NormalDistribution yDistribution = new NormalDistribution(3, 4);

            // try this for several sample sizes, all low so that we see the difference from the normal distribution
            // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound"
            foreach (int n in new int[] { 3, 4, 5, 6, 8 })
            {
                Console.WriteLine("n={0}", n);

                // find r values
                Sample rSample = new Sample();
                for (int i = 0; i < 100; i++)
                {
                    // to get each r value, construct a bivariate sample of the given size with no cross-correlation
                    BivariateSample xySample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        xySample.Add(xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng));
                    }
                    double r = xySample.PearsonRTest().Statistic;
                    rSample.Add(r);
                }

                // check whether r is distributed as expected
                TestResult result = rSample.KolmogorovSmirnovTest(new PearsonRDistribution(n));
                Console.WriteLine("P={0}", result.LeftProbability);
                Assert.IsTrue(result.LeftProbability < 0.95);
            }
        }
        public void BivariateLinearRegressionGoodnessOfFitDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            for (int i = 0; i < 127; i++)
            {
                BivariateSample xys = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                double f = xys.LinearRegression().GoodnessOfFit.Statistic;
                fs.Add(f);
            }

            Distribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);
        }
Beispiel #9
0
        public void WilcoxonNullDistribution()
        {
            // Pick a very non-normal distribution
            ContinuousDistribution d = new ExponentialDistribution();

            Random rng = new Random(271828);

            foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 4))
            {
                Sample wSample = new Sample();
                ContinuousDistribution wDistribution = null;
                for (int i = 0; i < 128; i++)
                {
                    BivariateSample sample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        double x = d.GetRandomValue(rng);
                        double y = d.GetRandomValue(rng);
                        sample.Add(x, y);
                    }
                    TestResult wilcoxon = sample.WilcoxonSignedRankTest();
                    wSample.Add(wilcoxon.Statistic);
                    wDistribution = wilcoxon.Distribution;
                }

                TestResult ks = wSample.KolmogorovSmirnovTest(wDistribution);
                Assert.IsTrue(ks.Probability > 0.05);

                Assert.IsTrue(wSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wDistribution.Mean));
                Assert.IsTrue(wSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(wDistribution.StandardDeviation));
            }
        }
        public void MultivariateLinearRegressionAgreement()
        {
            Random rng = new Random(1);

            MultivariateSample SA = new MultivariateSample(2);

            for (int i = 0; i < 10; i++)
            {
                SA.Add(rng.NextDouble(), rng.NextDouble());
            }
            RegressionResult RA = SA.LinearRegression(0);
            ColumnVector     PA = RA.Parameters.Best;
            SymmetricMatrix  CA = RA.Parameters.Covariance;

            MultivariateSample SB = SA.Columns(1, 0);
            RegressionResult   RB = SB.LinearRegression(1);
            ColumnVector       PB = RB.Parameters.Best;
            SymmetricMatrix    CB = RB.Parameters.Covariance;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0]));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0]));

            BivariateSample  SC = SA.TwoColumns(1, 0);
            RegressionResult RC = SC.LinearRegression();
            ColumnVector     PC = RC.Parameters.Best;
            SymmetricMatrix  CC = RC.Parameters.Covariance;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC));
        }
Beispiel #11
0
        public void KendallNullDistributionTest()
        {
            // Pick independent distributions for x and y, which needn't be normal and needn't be related.
            ContinuousDistribution xDistrubtion  = new LogisticDistribution();
            ContinuousDistribution yDistribution = new ExponentialDistribution();
            Random rng = new Random(314159265);

            // generate bivariate samples of various sizes
            foreach (int n in TestUtilities.GenerateIntegerValues(8, 64, 4))
            {
                Sample testStatistics = new Sample();
                ContinuousDistribution testDistribution = null;

                for (int i = 0; i < 128; i++)
                {
                    BivariateSample sample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng));
                    }

                    TestResult result = sample.KendallTauTest();
                    testStatistics.Add(result.Statistic);
                    testDistribution = result.Distribution;
                }

                TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution);
                Assert.IsTrue(r2.RightProbability > 0.05);

                Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean));
                Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance));
            }
        }
        public void WaldFit()
        {
            WaldDistribution wald = new WaldDistribution(3.5, 2.5);

            BivariateSample    parameters = new BivariateSample();
            MultivariateSample variances  = new MultivariateSample(3);

            for (int i = 0; i < 128; i++)
            {
                Sample s = SampleTest.CreateSample(wald, 16, i);

                FitResult r = WaldDistribution.FitToSample(s);
                parameters.Add(r.Parameters[0], r.Parameters[1]);
                variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1));

                Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01);
            }

            Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean));
            Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape));

            Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median));
            Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median));
            Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median));
        }
Beispiel #13
0
        public void ContingencyTableProbabilitiesAndUncertainties()
        {
            // start with an underlying population
            double[,] pp = new double[, ]
            {
                { 1.0 / 45.0, 2.0 / 45.0, 3.0 / 45.0 },
                { 4.0 / 45.0, 5.0 / 45.0, 6.0 / 45.0 },
                { 7.0 / 45.0, 8.0 / 45.0, 9.0 / 45.0 }
            };

            // form 50 contingency tables, each with N = 50
            Random          rng    = new Random(314159);
            BivariateSample p22s   = new BivariateSample();
            BivariateSample pr0s   = new BivariateSample();
            BivariateSample pc1s   = new BivariateSample();
            BivariateSample pr2c0s = new BivariateSample();
            BivariateSample pc1r2s = new BivariateSample();

            for (int i = 0; i < 50; i++)
            {
                ContingencyTable T = new ContingencyTable(3, 3);
                for (int j = 0; j < 50; j++)
                {
                    int r, c;
                    ChooseRandomCell(pp, rng.NextDouble(), out r, out c);
                    T.Increment(r, c);
                }

                Assert.IsTrue(T.Total == 50);

                // for each contingency table, compute estimates of various population quantities

                UncertainValue p22   = T.ProbabilityOf(2, 2);
                UncertainValue pr0   = T.ProbabilityOfRow(0);
                UncertainValue pc1   = T.ProbabilityOfColumn(1);
                UncertainValue pr2c0 = T.ProbabilityOfRowConditionalOnColumn(2, 0);
                UncertainValue pc1r2 = T.ProbabilityOfColumnConditionalOnRow(1, 2);
                p22s.Add(p22.Value, p22.Uncertainty);
                pr0s.Add(pr0.Value, pr0.Uncertainty);
                pc1s.Add(pc1.Value, pc1.Uncertainty);
                pr2c0s.Add(pr2c0.Value, pr2c0.Uncertainty);
                pc1r2s.Add(pc1r2.Value, pc1r2.Uncertainty);
            }

            // the estimated population mean of each probability should include the correct probability in the underlyting distribution
            Assert.IsTrue(p22s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(9.0 / 45.0));
            Assert.IsTrue(pr0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(6.0 / 45.0));
            Assert.IsTrue(pc1s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(15.0 / 45.0));
            Assert.IsTrue(pr2c0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(7.0 / 12.0));
            Assert.IsTrue(pc1r2s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(8.0 / 24.0));

            // the estimated uncertainty for each population parameter should be the standard deviation across independent measurements
            // since the reported uncertainly changes each time, we use the mean value for comparison
            Assert.IsTrue(p22s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(p22s.Y.Mean));
            Assert.IsTrue(pr0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr0s.Y.Mean));
            Assert.IsTrue(pc1s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1s.Y.Mean));
            Assert.IsTrue(pr2c0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr2c0s.Y.Mean));
            Assert.IsTrue(pc1r2s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1r2s.Y.Mean));
        }
Beispiel #14
0
        public void BivariateLinearRegressionNullDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                double f = result.F.Statistic;
                fs.Add(f);

                rSample.Add(result.R.Statistic);
                rDistribution = result.R.Distribution;

                fSample.Add(result.F.Statistic);
                fDistribution = result.F.Distribution;

                Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            ContinuousDistribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
Beispiel #15
0
        // Not fixing this bug; use Polynomial interpolation for this scenario instead
        //[TestMethod]
        public void Bug6392()
        {
            // bug requests that we support regression with number of points equal to number
            // of fit parameters, i.e. polynomial fit
            var biSample = new BivariateSample();

            biSample.Add(0, 1);
            biSample.Add(1, -1);
            var fitResult = biSample.LinearRegression();
        }
        public void LinearRegressionVariances()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // do a lot of fits, recording results of each
            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp");

            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 12; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // record result
                UncertainValue p = result.Predict(12.0);
                data.AddRow(new Dictionary <string, object>()
                {
                    { "a", result.Intercept.Value },
                    { "va", result.Parameters.VarianceOf("Intercept") },
                    { "b", result.Slope.Value },
                    { "vb", result.Parameters.VarianceOf("Slope") },
                    { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") },
                    { "p", p.Value },
                    { "dp", p.Uncertainty }
                });
            }

            // variances of parameters should agree with predictions
            Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median()));
            Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median()));

            // variance of prediction should agree with claim
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Beispiel #17
0
        public void WilcoxonNullDistribution()
        {
            // Pick a very non-normal distribution
            ContinuousDistribution d = new ExponentialDistribution();

            Random rng = new Random(271828);

            foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 4))
            {
                Sample wContinuousSample = new Sample();
                ContinuousDistribution wContinuousDistribution = null;

                List <int>           wDiscreteSample       = new List <int>();
                DiscreteDistribution wDiscreteDistribution = null;

                for (int i = 0; i < 256; i++)
                {
                    BivariateSample sample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        double x = d.GetRandomValue(rng);
                        double y = d.GetRandomValue(rng);
                        sample.Add(x, y);
                    }
                    TestResult wilcoxon = sample.WilcoxonSignedRankTest();
                    if (wilcoxon.UnderlyingStatistic != null)
                    {
                        wDiscreteSample.Add(wilcoxon.UnderlyingStatistic.Value);
                        wDiscreteDistribution = wilcoxon.UnderlyingStatistic.Distribution;
                    }
                    else
                    {
                        wContinuousSample.Add(wilcoxon.Statistic.Value);
                        wContinuousDistribution = wilcoxon.Statistic.Distribution;
                    }
                }

                if (wDiscreteDistribution != null)
                {
                    TestResult chi2 = wDiscreteSample.ChiSquaredTest(wDiscreteDistribution);
                    Assert.IsTrue(chi2.Probability > 0.01);
                }
                else
                {
                    TestResult ks = wContinuousSample.KolmogorovSmirnovTest(wContinuousDistribution);
                    Assert.IsTrue(ks.Probability > 0.01);
                    Assert.IsTrue(wContinuousSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wContinuousDistribution.Mean));
                    Assert.IsTrue(wContinuousSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(wContinuousDistribution.StandardDeviation));
                }
            }
        }
Beispiel #18
0
        // We see a reliabile failure of KS or Kuiper tests for Beta distribution with small parameters, e.g. Beta(0.01,0.01).
        // This appears to occur whether we use inverse CDF or x/(x+y) to generate beta deviates.
        // Perhaps it indicates a problem with P computation for beta in this region?

        private static List <ContinuousDistribution> CreateDistributions()
        {
            List <ContinuousDistribution> distributions = new List <ContinuousDistribution>(new ContinuousDistribution[] {
                new NoncentralChiSquaredDistribution(2, 3.0),
                new CauchyDistribution(1.0, 2.0),
                new UniformDistribution(Interval.FromEndpoints(-2.0, 1.0)), new UniformDistribution(Interval.FromEndpoints(7.0, 9.0)),
                new NormalDistribution(3.0, 2.0),
                new ExponentialDistribution(2.0),
                new ChiSquaredDistribution(3),
                new StudentDistribution(5),
                new LognormalDistribution(0.2, 0.4),
                new WeibullDistribution(2.0, 3.0),
                new LogisticDistribution(-4.0, 5.0),
                new FisherDistribution(4.0, 7.0),
                new KuiperDistribution(),
                new KolmogorovDistribution(),
                new TriangularDistribution(1.0, 2.0, 4.0),
                new BetaDistribution(0.5, 0.5), new BetaDistribution(0.5, 2.0), new BetaDistribution(2.0, 2.0),
                new ParetoDistribution(1.0, 3.0),
                new WaldDistribution(3.0, 1.0),
                new PearsonRDistribution(7),
                new GammaDistribution(0.8), new GammaDistribution(3.0, 5.0), new GammaDistribution(96.2),
                new GumbelDistribution(1.2, 2.3),
                new LaplaceDistribution(4.5, 6.0),
                new ChiDistribution(1), new ChiDistribution(4),
                new RayleighDistribution(3.0),
                new FrechetDistribution(2.9, 4.0),
                new NoncentralChiSquaredDistribution(2, 1.5),
                new TestDistribution()
            });

            // Add some distributions that come from tests.
            Sample small = TestUtilities.CreateSample(distributions[0], 7);
            Sample large = TestUtilities.CreateSample(distributions[1], 127);

            distributions.Add(small.KolmogorovSmirnovTest(distributions[2]).Statistic.Distribution);
            distributions.Add(large.KolmogorovSmirnovTest(distributions[3]).Statistic.Distribution);
            //distributions.Add(small.KuiperTest(distributions[4]).Distribution);
            //distributions.Add(large.KuiperTest(distributions[5]).Distribution);
            //distributions.Add(Sample.MannWhitneyTest(small, large).Distribution);

            BivariateSample two = new BivariateSample();

            two.Add(new double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 }, new double[] { 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 });
            //distributions.Add(two.SpearmanRhoTest().Distribution);
            //distributions.Add(two.KendallTauTest().Distribution);

            return(distributions);
        }
        public void BivariateSampleCopy()
        {
            // test independency of copy

            BivariateSample sample1 = new BivariateSample();

            sample1.Add(1.0, 2.0);

            BivariateSample sample2 = sample1.Copy();

            sample2.Add(3.0, 4.0);

            Assert.IsTrue(sample1.Count == 1);
            Assert.IsTrue(sample2.Count == 2);
        }
        public void PairedStudentTTest()
        {
            BivariateSample s = new BivariateSample();

            s.Add(3, 5);
            s.Add(0, 1);
            s.Add(6, 5);
            s.Add(7, 7);
            s.Add(4, 10);
            s.Add(3, 9);
            s.Add(2, 7);
            s.Add(1, 11);
            s.Add(4, 8);
            TestResult r = s.PairedStudentTTest();
            // Maybe we should assert something here?
        }
Beispiel #21
0
        public void TestBivariateRegression()
        {
            double a0 = 1.0;
            double b0 = 0.0;

            Random       rng           = new Random(1001110000);
            Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            Distribution eDistribution = new NormalDistribution();

            Sample r2Sample = new Sample();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                FitResult fit = xySample.LinearRegression();
                double    a   = fit.Parameters[0];
                double    b   = fit.Parameters[1];

                double ss2 = 0.0;
                double ss1 = 0.0;
                foreach (XY xy in xySample)
                {
                    ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X));
                    ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean);
                }
                double r2 = 1.0 - ss2 / ss1;
                r2Sample.Add(r2);
            }

            Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum);

            Distribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);

            //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0);
            Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Console.WriteLine(ks.RightProbability);
            Console.WriteLine(ks.Probability);
        }
        public void BivariateNullAssociation()
        {
            Random rng = new Random(314159265);

            // Create sample sets for our three test statisics
            Sample PS = new Sample();
            Sample SS = new Sample();
            Sample KS = new Sample();

            // variables to hold the claimed distribution of teach test statistic
            ContinuousDistribution PD = null;
            ContinuousDistribution SD = null;
            ContinuousDistribution KD = null;

            // generate a large number of bivariate samples and conduct our three tests on each

            for (int j = 0; j < 100; j++)
            {
                BivariateSample S = new BivariateSample();

                // sample size should be large so that asymptotic assumptions are justified
                for (int i = 0; i < 100; i++)
                {
                    double x = rng.NextDouble();
                    double y = rng.NextDouble();
                    S.Add(x, y);
                }

                TestResult PR = S.PearsonRTest();
                PS.Add(PR.Statistic);
                PD = PR.Distribution;
                TestResult SR = S.SpearmanRhoTest();
                SS.Add(SR.Statistic);
                SD = SR.Distribution;
                TestResult KR = S.KendallTauTest();
                KS.Add(KR.Statistic);
                KD = KR.Distribution;
            }

            // do KS to test whether the samples follow the claimed distributions
            //Console.WriteLine(PS.KolmogorovSmirnovTest(PD).LeftProbability);
            //Console.WriteLine(SS.KolmogorovSmirnovTest(SD).LeftProbability);
            //Console.WriteLine(KS.KolmogorovSmirnovTest(KD).LeftProbability);
            Assert.IsTrue(PS.KolmogorovSmirnovTest(PD).LeftProbability < 0.95);
            Assert.IsTrue(SS.KolmogorovSmirnovTest(SD).LeftProbability < 0.95);
            Assert.IsTrue(KS.KolmogorovSmirnovTest(KD).LeftProbability < 0.95);
        }
        public void BivariateLinearPolynomialRegressionAgreement()
        {
            // A degree-1 polynomial fit should give the same answer as a linear fit

            BivariateSample B = new BivariateSample();

            B.Add(0.0, 5.0);
            B.Add(3.0, 6.0);
            B.Add(1.0, 7.0);
            B.Add(4.0, 8.0);
            B.Add(2.0, 9.0);
            GeneralLinearRegressionResult PR = B.PolynomialRegression(1);
            GeneralLinearRegressionResult LR = B.LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.ValuesVector, LR.Parameters.ValuesVector));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.CovarianceMatrix, LR.Parameters.CovarianceMatrix));
        }
        public void BivariateSampleEnumerations()
        {
            List <XY> points = new List <XY>(new XY[] { new XY(1.0, 2.0), new XY(2.0, 3.0), new XY(3.0, 4.0) });

            BivariateSample sample = new BivariateSample();

            sample.Add(points);

            Assert.IsTrue(sample.Count == points.Count);

            foreach (XY point in sample)
            {
                Assert.IsTrue(points.Remove(point));
            }

            Assert.IsTrue(points.Count == 0);
        }
        public void BivariateLinearRegressionNullDistribution()
        {
            // Create uncorrelated x and y values and do a linear fit.
            // The r-tests and F-test statistics returned by the linear fits
            // should agree and both test statistics should follow their claimed
            // distributions.

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                rSample.Add(result.R.Statistic.Value);
                rDistribution = result.R.Statistic.Distribution;

                fSample.Add(result.F.Statistic.Value);
                fDistribution = result.F.Statistic.Distribution;

                Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
        public void BivariateNonlinearFitVariances()
        {
            // Verify that we can fit a non-linear function,
            // that the estimated parameters do cluster around the true values,
            // and that the estimated parameter covariances do reflect the actually observed covariances

            double a = 2.7;
            double b = 3.1;

            ContinuousDistribution xDistribution = new ExponentialDistribution(2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0);

            FrameTable parameters = new FrameTable();

            parameters.AddColumns <double>("a", "b");
            MultivariateSample covariances = new MultivariateSample(3);

            for (int i = 0; i < 64; i++)
            {
                BivariateSample sample = new BivariateSample();
                Random          rng    = new Random(i);
                for (int j = 0; j < 8; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                NonlinearRegressionResult fit = sample.NonlinearRegression(
                    (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]),
                    new double[] { 1.0, 1.0 }
                    );

                parameters.AddRow(fit.Parameters.ValuesVector);
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a));
            Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b));

            Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
            Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
        }
        public void BivariateLinearPolynomialRegressionAgreement()
        {
            // A degree-1 polynomial fit should give the same answer as a linear fit

            BivariateSample B = new BivariateSample();

            B.Add(0.0, 5.0);
            B.Add(3.0, 6.0);
            B.Add(1.0, 7.0);
            B.Add(4.0, 8.0);
            B.Add(2.0, 9.0);
            FitResult PR = B.PolynomialRegression(1);
            FitResult LR = B.LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters, LR.Parameters));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.CovarianceMatrix, LR.CovarianceMatrix));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.GoodnessOfFit.Statistic, LR.GoodnessOfFit.Statistic));
        }
        public void BivariateSampleManipulations()
        {
            BivariateSample s = new BivariateSample();

            s.Add(1.0, 3.0);
            s.Add(2.0, 2.0);
            s.Add(3.0, 1.0);
            Assert.IsTrue(s.Count == 3);

            Assert.IsTrue(s.Remove(2.0, 2.0));
            Assert.IsTrue(s.Count == 2);
            Assert.IsFalse(s.Remove(2.0, 2.0));

            Assert.IsTrue(s.Contains(1.0, 3.0));
            Assert.IsFalse(s.Contains(3.0, 3.0));

            s.Clear();
            Assert.IsTrue(s.Count == 0);
        }
        public void PairedStudentTTest()
        {
            BivariateSample s = new BivariateSample();

            s.Add(3, 5);
            s.Add(0, 1);
            s.Add(6, 5);
            s.Add(7, 7);
            s.Add(4, 10);
            s.Add(3, 9);
            s.Add(2, 7);
            s.Add(1, 11);
            s.Add(4, 8);
            Console.WriteLine(s.Count);
            TestResult r = s.PairedStudentTTest();

            Console.WriteLine(r.Statistic);
            Console.WriteLine(r.LeftProbability);
        }
Beispiel #30
0
        public void PearsonRNullDistribution()
        {
            Random rng = new Random(1111111);

            // Pick some underlying distributions for the sample variables,
            // which must be normal but can have any parameters.
            NormalDistribution xDistribution = new NormalDistribution(1, 2);
            NormalDistribution yDistribution = new NormalDistribution(3, 4);

            // Try this for several sample sizes, all low so that we see the difference from the normal distribution
            // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound".
            foreach (int n in new int[] { 3, 4, 5, 6, 8 })
            {
                // find r values
                Sample rSample = new Sample();
                ContinuousDistribution rDistribution = null;
                for (int i = 0; i < 128; i++)
                {
                    // to get each r value, construct a bivariate sample of the given size with no cross-correlation
                    BivariateSample xySample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        xySample.Add(
                            xDistribution.GetRandomValue(rng),
                            yDistribution.GetRandomValue(rng)
                            );
                    }
                    TestResult rTest = xySample.PearsonRTest();
                    rSample.Add(rTest.Statistic);
                    rDistribution = rTest.Distribution;
                }

                // Check whether r is distributed as expected
                TestResult result = rSample.KuiperTest(new PearsonRDistribution(n));
                Assert.IsTrue(result.Probability > 0.01);

                Assert.IsTrue(rSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(rDistribution.Mean));
                Assert.IsTrue(rSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(rDistribution.StandardDeviation));
            }
        }