public void KendallNullDistributionTest()
        {
            // pick independent distributions for x and y, which needn't be normal and needn't be related
            Distribution xDistrubtion = new LogisticDistribution();
            Distribution yDistribution = new ExponentialDistribution();
            Random rng = new Random(314159265);

            // generate bivariate samples of various sizes
            //int n = 64; {
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) {

                Sample testStatistics = new Sample();
                Distribution testDistribution = null;

                for (int i = 0; i < 128; i++) {

                    BivariateSample sample = new BivariateSample();
                    for (int j = 0; j < n; j++) {
                        sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng));
                    }

                    TestResult result = sample.KendallTauTest();
                    testStatistics.Add(result.Statistic);
                    testDistribution = result.Distribution;
                }

                //TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution);
                //Console.WriteLine("n={0} P={1}", n, r2.LeftProbability);
                //Assert.IsTrue(r2.RightProbability > 0.05);

                Console.WriteLine("{0} {1}", testStatistics.PopulationVariance, testDistribution.Variance);
                Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.95).ClosedContains(testDistribution.Mean));
                Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(testDistribution.Variance));
            }
        }
        public void BivariateLinearRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define logistic parameters
            double a0 = 2.0; double b0 = -1.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample ps = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // also keep track of test statistics
            Sample fs = new Sample();

            // do 100 fits
            for (int k = 0; k < 100; k++) {

                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                Distribution xd = new LogisticDistribution();
                Distribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int i = 0; i < 25; i++) {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    s.Add(x, y);
                }

                // do the regression
                FitResult r = s.LinearRegression();

                // record best fit parameters
                double a = r.Parameter(0).Value;
                double b = r.Parameter(1).Value;
                ps.Add(a, b);

                // record estimated covariances
                caa += r.Covariance(0, 0);
                cbb += r.Covariance(1, 1);
                cab += r.Covariance(0, 1);

                // record the fit statistic
                fs.Add(r.GoodnessOfFit.Statistic);
                Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic);

            }

            caa /= ps.Count;
            cbb /= ps.Count;
            cab /= ps.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance);
            Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance);

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));

            // check that F is distributed as it should be
            Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability);
        }
Beispiel #3
0
        public void SampleMaximumLikelihoodFit()
        {
            // normal distriubtion
            Console.WriteLine("normal");

            double mu = -1.0;
            double sigma = 2.0;
            Distribution nd = new NormalDistribution(mu, sigma);
            Sample ns = CreateSample(nd, 500);
            //FitResult nr = ns.MaximumLikelihoodFit(new NormalDistribution(mu + 1.0, sigma + 1.0));
            FitResult nr = ns.MaximumLikelihoodFit((IList<double> p) => new NormalDistribution(p[0], p[1]), new double[] { mu + 1.0, sigma + 1.0 });

            Console.WriteLine(nr.Parameter(0));
            Console.WriteLine(nr.Parameter(1));

            Assert.IsTrue(nr.Dimension == 2);
            Assert.IsTrue(nr.Parameter(0).ConfidenceInterval(0.95).ClosedContains(mu));
            Assert.IsTrue(nr.Parameter(1).ConfidenceInterval(0.95).ClosedContains(sigma));

            FitResult nr2 = NormalDistribution.FitToSample(ns);

            Console.WriteLine(nr.Covariance(0,1));

            // test analytic expression
            Assert.IsTrue(TestUtilities.IsNearlyEqual(nr.Parameter(0).Value, ns.Mean, Math.Sqrt(TestUtilities.TargetPrecision)));
            // we don't expect to be able to test sigma against analytic expression because ML value has known bias for finite sample size

            // exponential distribution

            Console.WriteLine("exponential");
            double em = 3.0;
            Distribution ed = new ExponentialDistribution(em);
            Sample es = CreateSample(ed, 100);
            //FitResult er = es.MaximumLikelihoodFit(new ExponentialDistribution(em + 1.0));
            FitResult er = es.MaximumLikelihoodFit((IList<double> p) => new ExponentialDistribution(p[0]), new double[] { em + 1.0 });

            Console.WriteLine(er.Parameter(0));

            Assert.IsTrue(er.Dimension == 1);
            Assert.IsTrue(er.Parameter(0).ConfidenceInterval(0.95).ClosedContains(em));

            // test against analytic expression
            Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Value, es.Mean, Math.Sqrt(TestUtilities.TargetPrecision)));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Uncertainty, es.Mean / Math.Sqrt(es.Count), Math.Sqrt(Math.Sqrt(TestUtilities.TargetPrecision))));

            // lognormal distribution
            Console.WriteLine("lognormal");

            double l1 = -4.0;
            double l2 = 5.0;

            Distribution ld = new LognormalDistribution(l1, l2);
            Sample ls = CreateSample(ld, 100);
            //FitResult lr = ls.MaximumLikelihoodFit(new LognormalDistribution(l1 + 1.0, l2 + 1.0));
            FitResult lr = ls.MaximumLikelihoodFit((IList<double> p) => new LognormalDistribution(p[0], p[1]), new double[] { l1 + 1.0, l2 + 1.0 });

            Console.WriteLine(lr.Parameter(0));
            Console.WriteLine(lr.Parameter(1));
            Console.WriteLine(lr.Covariance(0, 1));

            Assert.IsTrue(lr.Dimension == 2);
            Assert.IsTrue(lr.Parameter(0).ConfidenceInterval(0.99).ClosedContains(l1));
            Assert.IsTrue(lr.Parameter(1).ConfidenceInterval(0.99).ClosedContains(l2));

            // weibull distribution
            Console.WriteLine("weibull");

            double w_scale = 4.0;
            double w_shape = 2.0;
            WeibullDistribution w_d = new WeibullDistribution(w_scale, w_shape);
            Sample w_s = CreateSample(w_d, 20);
            //FitResult w_r = w_s.MaximumLikelihoodFit(new WeibullDistribution(1.0, 0.5));
            FitResult w_r = w_s.MaximumLikelihoodFit((IList<double> p) => new WeibullDistribution(p[0], p[1]), new double[] { 2.0, 2.0 });

            Console.WriteLine(w_r.Parameter(0));
            Console.WriteLine(w_r.Parameter(1));
            Console.WriteLine(w_r.Covariance(0, 1));

            Assert.IsTrue(w_r.Parameter(0).ConfidenceInterval(0.95).ClosedContains(w_d.ScaleParameter));
            Assert.IsTrue(w_r.Parameter(1).ConfidenceInterval(0.95).ClosedContains(w_d.ShapeParameter));

            // logistic distribution
            Console.WriteLine("logistic");

            double logistic_m = -3.0;
            double logistic_s = 2.0;
            Distribution logistic_distribution = new LogisticDistribution(logistic_m, logistic_s);
            Sample logistic_sample = CreateSample(logistic_distribution, 100);
            //FitResult logistic_result = logistic_sample.MaximumLikelihoodFit(new LogisticDistribution());
            FitResult logistic_result = logistic_sample.MaximumLikelihoodFit((IList<double> p) => new LogisticDistribution(p[0], p[1]), new double[] { 2.0, 3.0 });

            Console.WriteLine(logistic_result.Parameter(0));
            Console.WriteLine(logistic_result.Parameter(1));

            Assert.IsTrue(logistic_result.Dimension == 2);
            Assert.IsTrue(logistic_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(logistic_m));
            Assert.IsTrue(logistic_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(logistic_s));

            // beta distribution
            // not yet!
            /*
            double beta_alpha = 0.5;
            double beta_beta = 2.0;
            Distribution beta_distribution = new BetaDistribution(beta_alpha, beta_beta);
            Sample beta_sample = CreateSample(beta_distribution, 100);
            FitResult beta_result = beta_sample.MaximumLikelihoodFit(new BetaDistribution(1.0, 1.0));

            Console.WriteLine("Beta:");
            Console.WriteLine(beta_result.Parameter(0));
            Console.WriteLine(beta_result.Parameter(1));

            Assert.IsTrue(beta_result.Dimension == 2);
            Assert.IsTrue(beta_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(beta_alpha));
            Assert.IsTrue(beta_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(beta_beta));
            */
        }