public void LinearRegressionVariances()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // do a lot of fits, recording results of each
            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp");

            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 12; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // record result
                UncertainValue p = result.Predict(12.0);
                data.AddRow(new Dictionary <string, object>()
                {
                    { "a", result.Intercept.Value },
                    { "va", result.Parameters.VarianceOf("Intercept") },
                    { "b", result.Slope.Value },
                    { "vb", result.Parameters.VarianceOf("Slope") },
                    { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") },
                    { "p", p.Value },
                    { "dp", p.Uncertainty }
                });
            }

            // variances of parameters should agree with predictions
            Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median()));
            Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median()));

            // variance of prediction should agree with claim
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Exemple #2
0
        public static void LinearRegression()
        {
            List <double> x = new List <double>()
            {
                -1.1, 2.2, 1.4, 0.5, 3.7, 2.8
            };
            List <double> y = new List <double>()
            {
                -2.9, 3.4, 0.9, 0.1, 6.8, 5.7
            };

            LinearRegressionResult result = y.LinearRegression(x);

            Console.WriteLine($"y = ({result.Intercept}) + ({result.Slope}) x");

            Console.WriteLine($"Fit explains {result.RSquared * 100.0}% of the variance");

            Console.WriteLine($"Probability of no dependence {result.R.Probability}.");

            OneWayAnovaResult anova = result.Anova;

            Console.WriteLine("Fit        dof = {0} SS = {1}", anova.Factor.DegreesOfFreedom, anova.Factor.SumOfSquares);
            Console.WriteLine("Residual   dof = {0} SS = {1}", anova.Residual.DegreesOfFreedom, anova.Residual.SumOfSquares);
            Console.WriteLine("Total      dof = {0} SS = {1}", anova.Total.DegreesOfFreedom, anova.Total.SumOfSquares);
            Console.WriteLine($"Probability of no dependence {anova.Result.Probability}.");

            // Print a 95% confidence interval on the slope
            Console.WriteLine($"slope is in {result.Slope.ConfidenceInterval(0.95)} with 95% confidence");

            IReadOnlyList <double> residuals = result.Residuals;

            ColumnVector    parameters = result.Parameters.ValuesVector;
            SymmetricMatrix covariance = result.Parameters.CovarianceMatrix;

            result.Parameters.CovarianceOf("Intercept", "Slope");

            double         x1 = 3.0;
            UncertainValue y1 = result.Predict(x1);

            Console.WriteLine($"Predicted y({x1}) = {y1}.");
        }
        public void LinearRegressionSimple()
        {
            double a = -1.0;
            double b = 2.0;

            ContinuousDistribution xDistribution = new CauchyDistribution();
            ContinuousDistribution eDistribution = new NormalDistribution();

            int    n   = 16;
            Random rng = new Random(1);

            double[] x = new double[n];
            double[] y = new double[n];
            for (int i = 0; i < 16; i++)
            {
                x[i] = xDistribution.GetRandomValue(rng);
                y[i] = a + b * x[i] + eDistribution.GetRandomValue(rng);
            }

            LinearRegressionResult result = y.LinearRegression(x);

            // Parameters should be right
            Assert.IsTrue(result.Intercept.ConfidenceInterval(0.95).ClosedContains(a));
            Assert.IsTrue(result.Slope.ConfidenceInterval(0.95).ClosedContains(b));

            // Reported values should be consistent
            Assert.IsTrue(result.Intercept == result.Parameters["Intercept"].Estimate);
            Assert.IsTrue(result.Intercept.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Intercept")]);
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Intercept"))));
            Assert.IsTrue(result.Slope == result.Parameters["Slope"].Estimate);
            Assert.IsTrue(result.Slope.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Slope")]);
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Slope"))));

            // Residuals should agree with definition
            for (int i = 0; i < x.Length; i++)
            {
                double yp = result.Predict(x[i]).Value;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp));
            }

            // R and R-squared agree
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.RSquared, MoreMath.Sqr(result.R.Statistic.Value)));

            // F-test and R-test agree
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.F.Probability, result.R.Probability));

            // ANOVA's sums of squares are correct
            double SST = y.Variance() * y.Length;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares));
            double SSR = 0.0;

            foreach (double z in result.Residuals)
            {
                SSR += z * z;
            }
            Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.SumOfSquaredResiduals));

            // R is same as correlation coefficient
            Assert.IsTrue(TestUtilities.IsNearlyEqual(x.CorrelationCoefficient(y), result.R.Statistic.Value));
        }
Exemple #4
0
        public void BivariateLinearRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample pSample = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // Record predictions for a new point
            double x0      = 12.0;
            Sample ySample = new Sample();
            double ySigma  = 0.0;

            // do 100 fits
            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 16; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // test consistancy
                Assert.IsTrue(result.Intercept == result.Parameters[0].Estimate);
                Assert.IsTrue(result.Intercept.Value == result.Parameters.Best[0]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.Covariance[0, 0])));
                Assert.IsTrue(result.Slope == result.Parameters[1].Estimate);
                Assert.IsTrue(result.Slope.Value == result.Parameters.Best[1]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.Covariance[1, 1])));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.R.Statistic, sample.CorrelationCoefficient));

                // record best fit parameters
                double a = result.Parameters.Best[0];
                double b = result.Parameters.Best[1];
                pSample.Add(a, b);

                // record estimated covariances
                caa += result.Parameters.Covariance[0, 0];
                cbb += result.Parameters.Covariance[1, 1];
                cab += result.Parameters.Covariance[0, 1];

                UncertainValue yPredict = result.Predict(x0);
                ySample.Add(yPredict.Value);
                ySigma += yPredict.Uncertainty;

                double SST = 0.0;
                foreach (double y in sample.Y)
                {
                    SST += MoreMath.Sqr(y - sample.Y.Mean);
                }
                Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares));

                double SSR = 0.0;
                foreach (double z in result.Residuals)
                {
                    SSR += z * z;
                }
                Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares));
            }

            caa    /= pSample.Count;
            cbb    /= pSample.Count;
            cab    /= pSample.Count;
            ySigma /= pSample.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(pSample.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(pSample.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            Console.WriteLine("{0} {1}", caa, pSample.X.PopulationVariance);
            Console.WriteLine("{0} {1}", cbb, pSample.Y.PopulationVariance);

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(pSample.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(pSample.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(pSample.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));

            // Check that the predicted ys conform to the model and the asserted uncertainty.
            Assert.IsTrue(ySample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0 + x0 * b0));
            //Assert.IsTrue(ySample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(ySigma));
        }