public void MultivariateLinearRegressionAgreement()
        {
            Random rng = new Random(1);

            MultivariateSample SA = new MultivariateSample(2);

            for (int i = 0; i < 10; i++)
            {
                SA.Add(rng.NextDouble(), rng.NextDouble());
            }
            RegressionResult RA = SA.LinearRegression(0);
            ColumnVector     PA = RA.Parameters.Best;
            SymmetricMatrix  CA = RA.Parameters.Covariance;

            MultivariateSample SB = SA.Columns(1, 0);
            RegressionResult   RB = SB.LinearRegression(1);
            ColumnVector       PB = RB.Parameters.Best;
            SymmetricMatrix    CB = RB.Parameters.Covariance;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0]));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0]));

            BivariateSample  SC = SA.TwoColumns(1, 0);
            RegressionResult RC = SC.LinearRegression();
            ColumnVector     PC = RC.Parameters.Best;
            SymmetricMatrix  CC = RC.Parameters.Covariance;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC));
        }
        public void BivariateLinearRegressionGoodnessOfFitDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            for (int i = 0; i < 127; i++)
            {
                BivariateSample xys = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                double f = xys.LinearRegression().GoodnessOfFit.Statistic;
                fs.Add(f);
            }

            Distribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);
        }
Example #3
0
        public void TestBivariateRegression()
        {
            // Do a bunch of linear regressions. r^2 should be distributed as expected.

            double a0 = 1.0;
            double b0 = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            List <double> r2Sample = new List <double>();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                LinearRegressionResult fit = xySample.LinearRegression();
                double a = fit.Intercept.Value;
                double b = fit.Slope.Value;

                r2Sample.Add(fit.RSquared);
            }

            ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);
            TestResult             ks             = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Assert.IsTrue(ks.Probability > 0.05);
        }
Example #4
0
        public void BivariateLinearRegressionNullDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                double f = result.F.Statistic;
                fs.Add(f);

                rSample.Add(result.R.Statistic);
                rDistribution = result.R.Distribution;

                fSample.Add(result.F.Statistic);
                fDistribution = result.F.Distribution;

                Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            ContinuousDistribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
Example #5
0
        // Not fixing this bug; use Polynomial interpolation for this scenario instead
        //[TestMethod]
        public void Bug6392()
        {
            // bug requests that we support regression with number of points equal to number
            // of fit parameters, i.e. polynomial fit
            var biSample = new BivariateSample();

            biSample.Add(0, 1);
            biSample.Add(1, -1);
            var fitResult = biSample.LinearRegression();
        }
        public void LinearRegressionVariances()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // do a lot of fits, recording results of each
            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp");

            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 12; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // record result
                UncertainValue p = result.Predict(12.0);
                data.AddRow(new Dictionary <string, object>()
                {
                    { "a", result.Intercept.Value },
                    { "va", result.Parameters.VarianceOf("Intercept") },
                    { "b", result.Slope.Value },
                    { "vb", result.Parameters.VarianceOf("Slope") },
                    { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") },
                    { "p", p.Value },
                    { "dp", p.Uncertainty }
                });
            }

            // variances of parameters should agree with predictions
            Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median()));
            Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median()));

            // variance of prediction should agree with claim
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Example #7
0
        public void TestBivariateRegression()
        {
            double a0 = 1.0;
            double b0 = 0.0;

            Random       rng           = new Random(1001110000);
            Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            Distribution eDistribution = new NormalDistribution();

            Sample r2Sample = new Sample();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                FitResult fit = xySample.LinearRegression();
                double    a   = fit.Parameters[0];
                double    b   = fit.Parameters[1];

                double ss2 = 0.0;
                double ss1 = 0.0;
                foreach (XY xy in xySample)
                {
                    ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X));
                    ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean);
                }
                double r2 = 1.0 - ss2 / ss1;
                r2Sample.Add(r2);
            }

            Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum);

            Distribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);

            //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0);
            Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Console.WriteLine(ks.RightProbability);
            Console.WriteLine(ks.Probability);
        }
        public void BivariateLinearPolynomialRegressionAgreement()
        {
            // A degree-1 polynomial fit should give the same answer as a linear fit

            BivariateSample B = new BivariateSample();

            B.Add(0.0, 5.0);
            B.Add(3.0, 6.0);
            B.Add(1.0, 7.0);
            B.Add(4.0, 8.0);
            B.Add(2.0, 9.0);
            GeneralLinearRegressionResult PR = B.PolynomialRegression(1);
            GeneralLinearRegressionResult LR = B.LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.ValuesVector, LR.Parameters.ValuesVector));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.CovarianceMatrix, LR.Parameters.CovarianceMatrix));
        }
        public void BivariateLinearRegressionNullDistribution()
        {
            // Create uncorrelated x and y values and do a linear fit.
            // The r-tests and F-test statistics returned by the linear fits
            // should agree and both test statistics should follow their claimed
            // distributions.

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                rSample.Add(result.R.Statistic.Value);
                rDistribution = result.R.Statistic.Distribution;

                fSample.Add(result.F.Statistic.Value);
                fDistribution = result.F.Statistic.Distribution;

                Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
        public void BivariateLinearPolynomialRegressionAgreement()
        {
            // A degree-1 polynomial fit should give the same answer as a linear fit

            BivariateSample B = new BivariateSample();

            B.Add(0.0, 5.0);
            B.Add(3.0, 6.0);
            B.Add(1.0, 7.0);
            B.Add(4.0, 8.0);
            B.Add(2.0, 9.0);
            FitResult PR = B.PolynomialRegression(1);
            FitResult LR = B.LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters, LR.Parameters));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.CovarianceMatrix, LR.CovarianceMatrix));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.GoodnessOfFit.Statistic, LR.GoodnessOfFit.Statistic));
        }
        public void BivariateLinearRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define logistic parameters
            double a0 = 2.0; double b0 = -1.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample ps = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // also keep track of test statistics
            Sample fs = new Sample();

            // do 100 fits
            for (int k = 0; k < 100; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                Distribution xd = new LogisticDistribution();
                Distribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int i = 0; i < 25; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    s.Add(x, y);
                }

                // do the regression
                FitResult r = s.LinearRegression();

                // record best fit parameters
                double a = r.Parameter(0).Value;
                double b = r.Parameter(1).Value;
                ps.Add(a, b);

                // record estimated covariances
                caa += r.Covariance(0, 0);
                cbb += r.Covariance(1, 1);
                cab += r.Covariance(0, 1);

                // record the fit statistic
                fs.Add(r.GoodnessOfFit.Statistic);
                Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic);
            }

            caa /= ps.Count;
            cbb /= ps.Count;
            cab /= ps.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance);
            Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance);

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));

            // check that F is distributed as it should be
            Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability);
        }
Example #12
0
        public void BivariateLinearRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample pSample = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // Record predictions for a new point
            double x0      = 12.0;
            Sample ySample = new Sample();
            double ySigma  = 0.0;

            // do 100 fits
            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 16; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // test consistancy
                Assert.IsTrue(result.Intercept == result.Parameters[0].Estimate);
                Assert.IsTrue(result.Intercept.Value == result.Parameters.Best[0]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.Covariance[0, 0])));
                Assert.IsTrue(result.Slope == result.Parameters[1].Estimate);
                Assert.IsTrue(result.Slope.Value == result.Parameters.Best[1]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.Covariance[1, 1])));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.R.Statistic, sample.CorrelationCoefficient));

                // record best fit parameters
                double a = result.Parameters.Best[0];
                double b = result.Parameters.Best[1];
                pSample.Add(a, b);

                // record estimated covariances
                caa += result.Parameters.Covariance[0, 0];
                cbb += result.Parameters.Covariance[1, 1];
                cab += result.Parameters.Covariance[0, 1];

                UncertainValue yPredict = result.Predict(x0);
                ySample.Add(yPredict.Value);
                ySigma += yPredict.Uncertainty;

                double SST = 0.0;
                foreach (double y in sample.Y)
                {
                    SST += MoreMath.Sqr(y - sample.Y.Mean);
                }
                Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares));

                double SSR = 0.0;
                foreach (double z in result.Residuals)
                {
                    SSR += z * z;
                }
                Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares));
            }

            caa    /= pSample.Count;
            cbb    /= pSample.Count;
            cab    /= pSample.Count;
            ySigma /= pSample.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(pSample.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(pSample.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            Console.WriteLine("{0} {1}", caa, pSample.X.PopulationVariance);
            Console.WriteLine("{0} {1}", cbb, pSample.Y.PopulationVariance);

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(pSample.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(pSample.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(pSample.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));

            // Check that the predicted ys conform to the model and the asserted uncertainty.
            Assert.IsTrue(ySample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0 + x0 * b0));
            //Assert.IsTrue(ySample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(ySigma));
        }