public void NormalFitCovariances()
        {
            NormalDistribution N = new NormalDistribution(-1.0, 2.0);

            // Create a bivariate sample to hold our fitted best mu and sigma values
            // so we can determine their covariance as well as their means and variances
            BivariateSample    parameters  = new BivariateSample();
            MultivariateSample covariances = new MultivariateSample(3);

            // A bunch of times, create a normal sample
            for (int i = 0; i < 128; i++)
            {
                // We use small samples so the variation in mu and sigma will be more substantial.
                Sample s = TestUtilities.CreateSample(N, 8, i);

                // Fit each sample to a normal distribution
                FitResult fit = NormalDistribution.FitToSample(s);

                // and record the mu and sigma values from the fit into our bivariate sample
                parameters.Add(fit.Parameter(0).Value, fit.Parameter(1).Value);

                // also record the claimed covariances among these parameters
                covariances.Add(fit.Covariance(0, 0), fit.Covariance(1, 1), fit.Covariance(0, 1));
            }

            // the mean fit values should agree with the population distribution
            Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean));
            Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation));

            // but also the covariances of those fit values should agree with the claimed covariances
            Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(2).Mean));
        }
        public void FitDataToLinearFunctionTest()
        {
            // create a data set from a linear combination of sine and cosine
            Interval r = Interval.FromEndpoints(-4.0, 6.0);

            double[] c = new double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 };
            Func <double, double> fv = delegate(double x) {
                return(2.0 * Math.Cos(x) + 1.0 * Math.Sin(x));
            };
            Func <double, double> fu = delegate(double x) {
                return(0.1 + 0.1 * Math.Abs(x));
            };
            UncertainMeasurementSample set = CreateDataSet(r, fv, fu, 20, 2);

            // fit the data set to a linear combination of sine and cosine
            Func <double, double>[] fs = new Func <double, double>[]
            { delegate(double x) { return(Math.Cos(x)); }, delegate(double x) { return(Math.Sin(x)); } };
            FitResult result = set.FitToLinearFunction(fs);

            // the fit should be right right dimension
            Assert.IsTrue(result.Dimension == 2);

            // the coefficients should match
            Console.WriteLine(result.Parameter(0));
            Console.WriteLine(result.Parameter(1));
            Assert.IsTrue(result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(2.0));
            Assert.IsTrue(result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(1.0));

            // diagonal covarainces should match errors
            Assert.IsTrue(TestUtilities.IsNearlyEqual(Math.Sqrt(result.Covariance(0, 0)), result.Parameter(0).Uncertainty));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(Math.Sqrt(result.Covariance(1, 1)), result.Parameter(1).Uncertainty));
        }
        public void WaldFit()
        {
            WaldDistribution wald = new WaldDistribution(3.5, 2.5);

            BivariateSample    parameters = new BivariateSample();
            MultivariateSample variances  = new MultivariateSample(3);

            for (int i = 0; i < 128; i++)
            {
                Sample s = SampleTest.CreateSample(wald, 16, i);

                FitResult r = WaldDistribution.FitToSample(s);
                parameters.Add(r.Parameters[0], r.Parameters[1]);
                variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1));

                Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01);
            }

            Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean));
            Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape));

            Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median));
            Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median));
            Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median));
        }
        public void GumbelFit()
        {
            GumbelDistribution d = new GumbelDistribution(-1.0, 2.0);

            MultivariateSample parameters = new MultivariateSample(2);
            MultivariateSample variances  = new MultivariateSample(3);

            // Do a bunch of fits, record reported parameters an variances
            for (int i = 0; i < 32; i++)
            {
                Sample s = SampleTest.CreateSample(d, 64, i);

                FitResult r = GumbelDistribution.FitToSample(s);
                parameters.Add(r.Parameters);
                variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1));

                Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01);
            }

            // The reported parameters should agree with the underlying parameters
            Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Location));
            Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Scale));

            // The reported covariances should agree with the observed covariances
            Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Mean));
            Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Mean));
            Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Mean));
        }
Exemple #5
0
        public void BivariateNonlinearFit()
        {
            // Verify that we can fit a non-linear function,
            // that the estimated parameters do cluster around the true values,
            // and that the estimated parameter covariances do reflect the actually observed covariances

            double a = 2.7;
            double b = 3.1;

            ContinuousDistribution xDistribution = new ExponentialDistribution(2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0);

            MultivariateSample parameters  = new MultivariateSample("a", "b");
            MultivariateSample covariances = new MultivariateSample(3);

            for (int i = 0; i < 64; i++)
            {
                BivariateSample sample = new BivariateSample();
                Random          rng    = new Random(i);
                for (int j = 0; j < 8; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                FitResult fit = sample.NonlinearRegression(
                    (IList <double> p, double x) => p[0] * Math.Pow(x, p[1]),
                    new double[] { 1.0, 1.0 }
                    );

                parameters.Add(fit.Parameters);
                covariances.Add(fit.Covariance(0, 0), fit.Covariance(1, 1), fit.Covariance(0, 1));
            }

            Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(a));
            Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(b));

            Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
        }
        public void RayleighFit()
        {
            RayleighDistribution rayleigh = new RayleighDistribution(3.2);

            Sample parameter = new Sample();
            Sample variance  = new Sample();

            for (int i = 0; i < 128; i++)
            {
                // We pick a quite-small sample, because we have a finite-n unbiased estimator.
                Sample s = SampleTest.CreateSample(rayleigh, 8, i);

                FitResult r = RayleighDistribution.FitToSample(s);
                parameter.Add(r.Parameters[0]);
                variance.Add(r.Covariance(0, 0));

                Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01);
            }

            Assert.IsTrue(parameter.PopulationMean.ConfidenceInterval(0.99).ClosedContains(rayleigh.Scale));
            Assert.IsTrue(parameter.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variance.Median));
        }
        public void FitDataToLineTest()
        {
            Interval r = Interval.FromEndpoints(0.0, 10.0);
            Func <double, double> fv = delegate(double x) {
                return(2.0 * x - 1.0);
            };
            Func <double, double> fu = delegate(double x) {
                return(1.0 + x);
            };
            UncertainMeasurementSample data = CreateDataSet(r, fv, fu, 20);


            // sanity check the data set
            Assert.IsTrue(data.Count == 20);

            // fit to a line
            FitResult line = data.FitToLine();

            Assert.IsTrue(line.Dimension == 2);
            Assert.IsTrue(line.Parameter(0).ConfidenceInterval(0.95).ClosedContains(-1.0));
            Assert.IsTrue(line.Parameter(1).ConfidenceInterval(0.95).ClosedContains(2.0));
            Assert.IsTrue(line.GoodnessOfFit.LeftProbability < 0.95);

            // correlation coefficient should be related to covariance as expected
            Assert.IsTrue(TestUtilities.IsNearlyEqual(line.CorrelationCoefficient(0, 1), line.Covariance(0, 1) / line.Parameter(0).Uncertainty / line.Parameter(1).Uncertainty));

            // fit to a 1st order polynomial and make sure it agrees
            FitResult poly = data.FitToPolynomial(1);

            Assert.IsTrue(poly.Dimension == 2);
            Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.Parameters, line.Parameters));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.CovarianceMatrix, line.CovarianceMatrix));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.GoodnessOfFit.Statistic, line.GoodnessOfFit.Statistic));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.GoodnessOfFit.LeftProbability, line.GoodnessOfFit.LeftProbability));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.GoodnessOfFit.RightProbability, line.GoodnessOfFit.RightProbability));

            // fit to a constant; the result should be poor
            FitResult constant = data.FitToConstant();

            Assert.IsTrue(constant.GoodnessOfFit.LeftProbability > 0.95);
        }
        public void BivariateLinearRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define logistic parameters
            double a0 = 2.0; double b0 = -1.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample ps = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // also keep track of test statistics
            Sample fs = new Sample();

            // do 100 fits
            for (int k = 0; k < 100; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                Distribution xd = new LogisticDistribution();
                Distribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int i = 0; i < 25; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    s.Add(x, y);
                }

                // do the regression
                FitResult r = s.LinearRegression();

                // record best fit parameters
                double a = r.Parameter(0).Value;
                double b = r.Parameter(1).Value;
                ps.Add(a, b);

                // record estimated covariances
                caa += r.Covariance(0, 0);
                cbb += r.Covariance(1, 1);
                cab += r.Covariance(0, 1);

                // record the fit statistic
                fs.Add(r.GoodnessOfFit.Statistic);
                Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic);
            }

            caa /= ps.Count;
            cbb /= ps.Count;
            cab /= ps.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance);
            Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance);

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));

            // check that F is distributed as it should be
            Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability);
        }
        public void LinearLogisticRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define logistic parameters
            double a0 = 1.0; double b0 = -1.0 / 2.0;
            //double a0 = -0.5; double b0 = 2.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample ps = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // do 50 fits
            for (int k = 0; k < 50; k++)
            {
                Console.WriteLine("k={0}", k);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int i = 0; i < 50; i++)
                {
                    double x  = 2.0 * rng.NextDouble() - 1.0;
                    double ez = Math.Exp(a0 + b0 * x);
                    double P  = ez / (1.0 + ez);
                    if (rng.NextDouble() < P)
                    {
                        s.Add(x, 1.0);
                    }
                    else
                    {
                        s.Add(x, 0.0);
                    }
                }

                //if (k != 27) continue;

                // do the regression
                FitResult r = s.LinearLogisticRegression();

                // record best fit parameters
                double a = r.Parameter(0).Value;
                double b = r.Parameter(1).Value;
                ps.Add(a, b);

                Console.WriteLine("{0}, {1}", a, b);

                // record estimated covariances
                caa += r.Covariance(0, 0);
                cbb += r.Covariance(1, 1);
                cab += r.Covariance(0, 1);
            }

            caa /= ps.Count;
            cbb /= ps.Count;
            cab /= ps.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));
        }