示例#1
0
        /// <summary>
        /// Computes the exponential distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameter.</returns>
        /// <remarks>
        /// <para>The returned fit parameter is &#x3BC; (the <see cref="Mean"/>).
        /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to
        /// specify a new exponential distribution.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception>
        /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
        public static FitResult FitToSample(Sample sample)
        {
            if (sample == null)
            {
                throw new ArgumentNullException("sample");
            }
            if (sample.Count < 2)
            {
                throw new InsufficientDataException();
            }

            // none of the data is allowed to be negative
            foreach (double value in sample)
            {
                if (value < 0.0)
                {
                    throw new InvalidOperationException();
                }
            }

            // the best-fit exponential's mean is the sample mean, with corresponding uncertainly

            double lambda  = sample.Mean;
            double dLambda = lambda / Math.Sqrt(sample.Count);

            Distribution distribution = new ExponentialDistribution(lambda);
            TestResult   test         = sample.KolmogorovSmirnovTest(distribution);

            return(new FitResult(lambda, dLambda, test));
        }
        public void KendallNullDistributionTest()
        {
            // pick independent distributions for x and y, which needn't be normal and needn't be related
            Distribution xDistrubtion = new LogisticDistribution();
            Distribution yDistribution = new ExponentialDistribution();
            Random rng = new Random(314159265);

            // generate bivariate samples of various sizes
            //int n = 64; {
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) {

                Sample testStatistics = new Sample();
                Distribution testDistribution = null;

                for (int i = 0; i < 128; i++) {

                    BivariateSample sample = new BivariateSample();
                    for (int j = 0; j < n; j++) {
                        sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng));
                    }

                    TestResult result = sample.KendallTauTest();
                    testStatistics.Add(result.Statistic);
                    testDistribution = result.Distribution;
                }

                //TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution);
                //Console.WriteLine("n={0} P={1}", n, r2.LeftProbability);
                //Assert.IsTrue(r2.RightProbability > 0.05);

                Console.WriteLine("{0} {1}", testStatistics.PopulationVariance, testDistribution.Variance);
                Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.95).ClosedContains(testDistribution.Mean));
                Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(testDistribution.Variance));
            }
        }
        /// <summary>
        /// Computes the exponential distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameter.</returns>
        /// <remarks>
        /// <para>The returned fit parameter is &#x3BC; (the <see cref="Mean"/>).
        /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to
        /// specify a new exponential distribution.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception>
        /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
        public static FitResult FitToSample(Sample sample)
        {
            if (sample == null)
            {
                throw new ArgumentNullException(nameof(sample));
            }
            if (sample.Count < 2)
            {
                throw new InsufficientDataException();
            }

            // None of the data is allowed to be negative.
            foreach (double value in sample)
            {
                if (value < 0.0)
                {
                    throw new InvalidOperationException();
                }
            }

            // It's easy to show that the MLE estimator of \mu is the sample mean and that its variance
            // is \mu^2 / n, which is just the the variance of the mean, since the variance of the individual
            // values is \mu^2.

            // We can do better than an asymptotic result, though. Since we know that the sum
            // of exponential-distributed values is Gamma-distributed, we know the exact
            // distribution of the mean is Gamma(n, \mu / n). This has mean \mu and variance
            // \mu^2 / n, so the asymptotic results are actually exact.

            double lambda  = sample.Mean;
            double dLambda = lambda / Math.Sqrt(sample.Count);

            ContinuousDistribution distribution = new ExponentialDistribution(lambda);
            TestResult             test         = sample.KolmogorovSmirnovTest(distribution);

            return(new FitResult(lambda, dLambda, test));
        }
        public void GammaFromExponential()
        {
            // test that x_1 + x_2 + ... + x_n ~ Gamma(n) when z ~ Exponential()

            Random rng = new Random(1);
            ExponentialDistribution eDistribution = new ExponentialDistribution();

            // pick some low values of n so distribution is not simply normal
            foreach (int n in new int[] { 2, 3, 4, 5 }) {
                Sample gSample = new Sample();
                for (int i = 0; i < 100; i++) {

                    double sum = 0.0;
                    for (int j = 0; j < n; j++) {
                        sum += eDistribution.GetRandomValue(rng);
                    }
                    gSample.Add(sum);

                }

                GammaDistribution gDistribution = new GammaDistribution(n);
                TestResult result = gSample.KolmogorovSmirnovTest(gDistribution);
                Assert.IsTrue(result.LeftProbability < 0.95);

            }
        }
示例#5
0
        public void SampleMaximumLikelihoodFit()
        {
            // normal distriubtion
            Console.WriteLine("normal");

            double mu = -1.0;
            double sigma = 2.0;
            Distribution nd = new NormalDistribution(mu, sigma);
            Sample ns = CreateSample(nd, 500);
            //FitResult nr = ns.MaximumLikelihoodFit(new NormalDistribution(mu + 1.0, sigma + 1.0));
            FitResult nr = ns.MaximumLikelihoodFit((IList<double> p) => new NormalDistribution(p[0], p[1]), new double[] { mu + 1.0, sigma + 1.0 });

            Console.WriteLine(nr.Parameter(0));
            Console.WriteLine(nr.Parameter(1));

            Assert.IsTrue(nr.Dimension == 2);
            Assert.IsTrue(nr.Parameter(0).ConfidenceInterval(0.95).ClosedContains(mu));
            Assert.IsTrue(nr.Parameter(1).ConfidenceInterval(0.95).ClosedContains(sigma));

            FitResult nr2 = NormalDistribution.FitToSample(ns);

            Console.WriteLine(nr.Covariance(0,1));

            // test analytic expression
            Assert.IsTrue(TestUtilities.IsNearlyEqual(nr.Parameter(0).Value, ns.Mean, Math.Sqrt(TestUtilities.TargetPrecision)));
            // we don't expect to be able to test sigma against analytic expression because ML value has known bias for finite sample size

            // exponential distribution

            Console.WriteLine("exponential");
            double em = 3.0;
            Distribution ed = new ExponentialDistribution(em);
            Sample es = CreateSample(ed, 100);
            //FitResult er = es.MaximumLikelihoodFit(new ExponentialDistribution(em + 1.0));
            FitResult er = es.MaximumLikelihoodFit((IList<double> p) => new ExponentialDistribution(p[0]), new double[] { em + 1.0 });

            Console.WriteLine(er.Parameter(0));

            Assert.IsTrue(er.Dimension == 1);
            Assert.IsTrue(er.Parameter(0).ConfidenceInterval(0.95).ClosedContains(em));

            // test against analytic expression
            Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Value, es.Mean, Math.Sqrt(TestUtilities.TargetPrecision)));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Uncertainty, es.Mean / Math.Sqrt(es.Count), Math.Sqrt(Math.Sqrt(TestUtilities.TargetPrecision))));

            // lognormal distribution
            Console.WriteLine("lognormal");

            double l1 = -4.0;
            double l2 = 5.0;

            Distribution ld = new LognormalDistribution(l1, l2);
            Sample ls = CreateSample(ld, 100);
            //FitResult lr = ls.MaximumLikelihoodFit(new LognormalDistribution(l1 + 1.0, l2 + 1.0));
            FitResult lr = ls.MaximumLikelihoodFit((IList<double> p) => new LognormalDistribution(p[0], p[1]), new double[] { l1 + 1.0, l2 + 1.0 });

            Console.WriteLine(lr.Parameter(0));
            Console.WriteLine(lr.Parameter(1));
            Console.WriteLine(lr.Covariance(0, 1));

            Assert.IsTrue(lr.Dimension == 2);
            Assert.IsTrue(lr.Parameter(0).ConfidenceInterval(0.99).ClosedContains(l1));
            Assert.IsTrue(lr.Parameter(1).ConfidenceInterval(0.99).ClosedContains(l2));

            // weibull distribution
            Console.WriteLine("weibull");

            double w_scale = 4.0;
            double w_shape = 2.0;
            WeibullDistribution w_d = new WeibullDistribution(w_scale, w_shape);
            Sample w_s = CreateSample(w_d, 20);
            //FitResult w_r = w_s.MaximumLikelihoodFit(new WeibullDistribution(1.0, 0.5));
            FitResult w_r = w_s.MaximumLikelihoodFit((IList<double> p) => new WeibullDistribution(p[0], p[1]), new double[] { 2.0, 2.0 });

            Console.WriteLine(w_r.Parameter(0));
            Console.WriteLine(w_r.Parameter(1));
            Console.WriteLine(w_r.Covariance(0, 1));

            Assert.IsTrue(w_r.Parameter(0).ConfidenceInterval(0.95).ClosedContains(w_d.ScaleParameter));
            Assert.IsTrue(w_r.Parameter(1).ConfidenceInterval(0.95).ClosedContains(w_d.ShapeParameter));

            // logistic distribution
            Console.WriteLine("logistic");

            double logistic_m = -3.0;
            double logistic_s = 2.0;
            Distribution logistic_distribution = new LogisticDistribution(logistic_m, logistic_s);
            Sample logistic_sample = CreateSample(logistic_distribution, 100);
            //FitResult logistic_result = logistic_sample.MaximumLikelihoodFit(new LogisticDistribution());
            FitResult logistic_result = logistic_sample.MaximumLikelihoodFit((IList<double> p) => new LogisticDistribution(p[0], p[1]), new double[] { 2.0, 3.0 });

            Console.WriteLine(logistic_result.Parameter(0));
            Console.WriteLine(logistic_result.Parameter(1));

            Assert.IsTrue(logistic_result.Dimension == 2);
            Assert.IsTrue(logistic_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(logistic_m));
            Assert.IsTrue(logistic_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(logistic_s));

            // beta distribution
            // not yet!
            /*
            double beta_alpha = 0.5;
            double beta_beta = 2.0;
            Distribution beta_distribution = new BetaDistribution(beta_alpha, beta_beta);
            Sample beta_sample = CreateSample(beta_distribution, 100);
            FitResult beta_result = beta_sample.MaximumLikelihoodFit(new BetaDistribution(1.0, 1.0));

            Console.WriteLine("Beta:");
            Console.WriteLine(beta_result.Parameter(0));
            Console.WriteLine(beta_result.Parameter(1));

            Assert.IsTrue(beta_result.Dimension == 2);
            Assert.IsTrue(beta_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(beta_alpha));
            Assert.IsTrue(beta_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(beta_beta));
            */
        }
示例#6
0
        public void SampleMannWhitneyTest()
        {
            // define two non-normal distributions
            Distribution d1 = new ExponentialDistribution(2.0);
            Distribution d2 = new ExponentialDistribution(3.0);

            // create three samples from them
            Sample s1a = CreateSample(d1, 20, 1);
            Sample s1b = CreateSample(d1, 30, 2);
            Sample s2 = CreateSample(d2, 40, 3);

            // Mann-Whitney test 1a vs. 1b; they should not be distinguished
            TestResult rab = Sample.MannWhitneyTest(s1a, s1b);
            Console.WriteLine("{0} {1}", rab.Statistic, rab.LeftProbability);
            Assert.IsTrue((rab.LeftProbability < 0.95) && (rab.RightProbability < 0.95));

            // Mann-Whitney test 1 vs. 2; they should be distinguished
            // with 1 consistently less than 2, so U abnormally small
            TestResult r12 = Sample.MannWhitneyTest(s1b, s2);
            Console.WriteLine("{0} {1}", r12.Statistic, r12.LeftProbability);
            Assert.IsTrue(r12.RightProbability > 0.95);
        }
示例#7
0
        public void SampleKuiperTest()
        {
            // this test has a whiff of meta-statistics about it
            // we want to make sure that the Kuiper test statistic V is distributed according to the Kuiper
            // distribution; to do this, we create a sample of V statistics and do KS/Kuiper tests
            // comparing it to the claimed Kuiper distribution

            // start with any 'ol underlying distribution
            Distribution distribution = new ExponentialDistribution(2.0);

            // generate some samples from it, and for each one get a V statistic from a KS test
            Sample VSample = new Sample();
            Distribution VDistribution = null;
            for (int i = 0; i < 25; i++) {
                // the sample size must be large enough that the asymptotic assumptions are satistifed
                // at the moment this test fails if we make the sample size much smaller; we should
                // be able shrink this number when we expose the finite-sample distributions
                Sample sample = CreateSample(distribution, 250, i);
                TestResult kuiper = sample.KuiperTest(distribution);
                double V = kuiper.Statistic;
                Console.WriteLine("V = {0}", V);
                VSample.Add(V);
                VDistribution = kuiper.Distribution;
            }

            // check on the mean
            Console.WriteLine("m = {0} vs. {1}", VSample.PopulationMean, VDistribution.Mean);
            Assert.IsTrue(VSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(VDistribution.Mean));

            // check on the standard deviation
            Console.WriteLine("s = {0} vs. {1}", VSample.PopulationStandardDeviation, VDistribution.StandardDeviation);
            Assert.IsTrue(VSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(VDistribution.StandardDeviation));

            // do a KS test comparing the sample to the expected distribution
            TestResult kst = VSample.KolmogorovSmirnovTest(VDistribution);
            Console.WriteLine("D = {0}, P = {1}", kst.Statistic, kst.LeftProbability);
            Assert.IsTrue(kst.LeftProbability < 0.95);

            // do a Kuiper test comparing the sample to the expected distribution
            TestResult kut = VSample.KuiperTest(VDistribution);
            Console.WriteLine("V = {0}, P = {1}", kut.Statistic, kut.LeftProbability);
            Assert.IsTrue(kut.LeftProbability < 0.95);
        }
示例#8
0
        public void ExponentialFitUncertainty()
        {
            // check that the uncertainty in reported fit parameters is actually meaningful
            // it should be the standard deviation of fit parameter values in a sample of many fits

            // define a population distribution
            Distribution distribution = new ExponentialDistribution(4.0);

            // draw a lot of samples from it; fit each sample and
            // record the reported parameter value and error of each
            Sample values = new Sample();
            Sample uncertainties = new Sample();
            for (int i = 0; i < 50; i++) {
                Sample sample = CreateSample(distribution, 10, i);
                FitResult fit = ExponentialDistribution.FitToSample(sample);
                UncertainValue lambda = fit.Parameter(0);
                values.Add(lambda.Value);
                uncertainties.Add(lambda.Uncertainty);
            }

            Console.WriteLine(uncertainties.Mean);
            Console.WriteLine(values.PopulationStandardDeviation);

            // the reported errors should agree with the standard deviation of the reported parameters
            Assert.IsTrue(values.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Mean));
        }
示例#9
0
        public void ExponentialFit()
        {
            ExponentialDistribution distribution = new ExponentialDistribution(5.0);
            Sample sample = CreateSample(distribution, 100);

            // fit to normal should be bad
            FitResult nfit = NormalDistribution.FitToSample(sample);
            Console.WriteLine("P_n = {0}", nfit.GoodnessOfFit.LeftProbability);
            Assert.IsTrue(nfit.GoodnessOfFit.LeftProbability > 0.95);

            // fit to exponential should be good
            FitResult efit = ExponentialDistribution.FitToSample(sample);
            Console.WriteLine("P_e = {0}", efit.GoodnessOfFit.LeftProbability);
            Assert.IsTrue(efit.GoodnessOfFit.LeftProbability < 0.95);
            Assert.IsTrue(efit.Parameter(0).ConfidenceInterval(0.95).ClosedContains(distribution.Mean));
        }
示例#10
0
        public void SignTestDistribution()
        {
            // start with a non-normally distributed population
            Distribution xDistribution = new ExponentialDistribution();
            Random rng = new Random(1);

            // draw 100 samples from it and compute the t statistic for each
            Sample wSample = new Sample();
            for (int i = 0; i < 100; i++) {

                // each sample has 8 observations
                Sample xSample = new Sample();
                for (int j = 0; j < 8; j++) { xSample.Add(xDistribution.GetRandomValue(rng)); }
                //Sample xSample = CreateSample(xDistribution, 8, i);
                TestResult wResult = xSample.SignTest(xDistribution.Median);
                double W = wResult.Statistic;
                //Console.WriteLine("W = {0}", W);
                wSample.Add(W);
            }

            // sanity check our sample of t's
            Assert.IsTrue(wSample.Count == 100);

            // check that the t statistics are distributed as expected
            DiscreteDistribution wDistribution = new BinomialDistribution(0.5, 8);

            // check on the mean
            Console.WriteLine("m = {0} vs. {1}", wSample.PopulationMean, wDistribution.Mean);
            Assert.IsTrue(wSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(wDistribution.Mean));

            // check on the standard deviation
            Console.WriteLine("s = {0} vs. {1}", wSample.PopulationStandardDeviation, wDistribution.StandardDeviation);
            Assert.IsTrue(wSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(wDistribution.StandardDeviation));

            // check on the skew
            Console.WriteLine("t = {0} vs. {1}", wSample.PopulationMomentAboutMean(3), wDistribution.MomentAboutMean(3));
            Assert.IsTrue(wSample.PopulationMomentAboutMean(3).ConfidenceInterval(0.95).ClosedContains(wDistribution.MomentAboutMean(3)));

            // check on the kuritosis
            Console.WriteLine("u = {0} vs. {1}", wSample.PopulationMomentAboutMean(4), wDistribution.MomentAboutMean(4));
            Assert.IsTrue(wSample.PopulationMomentAboutMean(4).ConfidenceInterval(0.95).ClosedContains(wDistribution.MomentAboutMean(4)));

            // KS tests are only for continuous distributions
        }
        /// <summary>
        /// Computes the exponential distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameter.</returns>
        /// <remarks>
        /// <para>The returned fit parameter is &#x3BC; (the <see cref="Mean"/>).
        /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to
        /// specify a new exponential distribution.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception>
        /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
        public static FitResult FitToSample(Sample sample)
        {
            if (sample == null) throw new ArgumentNullException("sample");
            if (sample.Count < 2) throw new InsufficientDataException();

            // none of the data is allowed to be negative
            foreach (double value in sample) {
                if (value < 0.0) throw new InvalidOperationException();
            }

            // the best-fit exponential's mean is the sample mean, with corresponding uncertainly

            double lambda = sample.Mean;
            double dLambda = lambda / Math.Sqrt(sample.Count);

            Distribution distribution = new ExponentialDistribution(lambda);
            TestResult test = sample.KolmogorovSmirnovTest(distribution);

            return (new FitResult(lambda, dLambda, test));
        }
        public void TwoSampleKolmogorovNullDistributionTest()
        {
            Distribution population = new ExponentialDistribution();

            int[] sizes = new int[] { 23, 30, 175 };

            foreach (int na in sizes) {
                foreach (int nb in sizes) {
                    Console.WriteLine("{0} {1}", na, nb);

                    Sample d = new Sample();
                    Distribution nullDistribution = null;
                    for (int i = 0; i < 128; i++) {

                        Sample a = TestUtilities.CreateSample(population, na, 31415 + na + i);
                        Sample b = TestUtilities.CreateSample(population, nb, 27182 + nb + i);

                        TestResult r = Sample.KolmogorovSmirnovTest(a, b);
                        d.Add(r.Statistic);
                        nullDistribution = r.Distribution;

                    }
                    // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail
                    // because the KS test detects the granularity of the distribution
                    TestResult mr = d.KolmogorovSmirnovTest(nullDistribution);
                    Console.WriteLine(mr.LeftProbability);
                    if (AdvancedIntegerMath.LCM(na, nb) > d.Count) Assert.IsTrue(mr.LeftProbability < 0.99);
                    // But always test that mean and standard deviation are as expected
                    Console.WriteLine("{0} {1}", nullDistribution.Mean, d.PopulationMean.ConfidenceInterval(0.99));
                    Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean));
                    Console.WriteLine("{0} {1}", nullDistribution.StandardDeviation, d.PopulationStandardDeviation.ConfidenceInterval(0.99));
                    Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation));
                    Console.WriteLine("{0} {1}", nullDistribution.MomentAboutMean(3), d.PopulationMomentAboutMean(3).ConfidenceInterval(0.99));
                    //Assert.IsTrue(d.PopulationMomentAboutMean(3).ConfidenceInterval(0.99).ClosedContains(nullDistribution.MomentAboutMean(3)));

                    //Console.WriteLine("m {0} {1}", nullDistribution.Mean, d.PopulationMean);
                }
            }
        }
        public void MultivariateMoments()
        {
            // create a random sample
            MultivariateSample M = new MultivariateSample(3);
            Distribution d0 = new NormalDistribution();
            Distribution d1 = new ExponentialDistribution();
            Distribution d2 = new UniformDistribution();
            Random rng = new Random(1);
            int n = 10;
            for (int i = 0; i < n; i++) {
                M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng));
            }

            // test that moments agree
            for (int i = 0; i < 3; i++) {
                int[] p = new int[3];
                p[i] = 1;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p)));
                p[i] = 2;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p)));
                for (int j = 0; j < i; j++) {
                    int[] q = new int[3];
                    q[i] = 1;
                    q[j] = 1;
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q)));
                }
            }
        }