/// <summary> /// Computes the exponential distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameter.</returns> /// <remarks> /// <para>The returned fit parameter is μ (the <see cref="Mean"/>). /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to /// specify a new exponential distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) { throw new ArgumentNullException("sample"); } if (sample.Count < 2) { throw new InsufficientDataException(); } // none of the data is allowed to be negative foreach (double value in sample) { if (value < 0.0) { throw new InvalidOperationException(); } } // the best-fit exponential's mean is the sample mean, with corresponding uncertainly double lambda = sample.Mean; double dLambda = lambda / Math.Sqrt(sample.Count); Distribution distribution = new ExponentialDistribution(lambda); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new FitResult(lambda, dLambda, test)); }
public void KendallNullDistributionTest() { // pick independent distributions for x and y, which needn't be normal and needn't be related Distribution xDistrubtion = new LogisticDistribution(); Distribution yDistribution = new ExponentialDistribution(); Random rng = new Random(314159265); // generate bivariate samples of various sizes //int n = 64; { foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) { Sample testStatistics = new Sample(); Distribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.KendallTauTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } //TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution); //Console.WriteLine("n={0} P={1}", n, r2.LeftProbability); //Assert.IsTrue(r2.RightProbability > 0.05); Console.WriteLine("{0} {1}", testStatistics.PopulationVariance, testDistribution.Variance); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.95).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(testDistribution.Variance)); } }
/// <summary> /// Computes the exponential distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameter.</returns> /// <remarks> /// <para>The returned fit parameter is μ (the <see cref="Mean"/>). /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to /// specify a new exponential distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } if (sample.Count < 2) { throw new InsufficientDataException(); } // None of the data is allowed to be negative. foreach (double value in sample) { if (value < 0.0) { throw new InvalidOperationException(); } } // It's easy to show that the MLE estimator of \mu is the sample mean and that its variance // is \mu^2 / n, which is just the the variance of the mean, since the variance of the individual // values is \mu^2. // We can do better than an asymptotic result, though. Since we know that the sum // of exponential-distributed values is Gamma-distributed, we know the exact // distribution of the mean is Gamma(n, \mu / n). This has mean \mu and variance // \mu^2 / n, so the asymptotic results are actually exact. double lambda = sample.Mean; double dLambda = lambda / Math.Sqrt(sample.Count); ContinuousDistribution distribution = new ExponentialDistribution(lambda); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new FitResult(lambda, dLambda, test)); }
public void GammaFromExponential() { // test that x_1 + x_2 + ... + x_n ~ Gamma(n) when z ~ Exponential() Random rng = new Random(1); ExponentialDistribution eDistribution = new ExponentialDistribution(); // pick some low values of n so distribution is not simply normal foreach (int n in new int[] { 2, 3, 4, 5 }) { Sample gSample = new Sample(); for (int i = 0; i < 100; i++) { double sum = 0.0; for (int j = 0; j < n; j++) { sum += eDistribution.GetRandomValue(rng); } gSample.Add(sum); } GammaDistribution gDistribution = new GammaDistribution(n); TestResult result = gSample.KolmogorovSmirnovTest(gDistribution); Assert.IsTrue(result.LeftProbability < 0.95); } }
public void SampleMaximumLikelihoodFit() { // normal distriubtion Console.WriteLine("normal"); double mu = -1.0; double sigma = 2.0; Distribution nd = new NormalDistribution(mu, sigma); Sample ns = CreateSample(nd, 500); //FitResult nr = ns.MaximumLikelihoodFit(new NormalDistribution(mu + 1.0, sigma + 1.0)); FitResult nr = ns.MaximumLikelihoodFit((IList<double> p) => new NormalDistribution(p[0], p[1]), new double[] { mu + 1.0, sigma + 1.0 }); Console.WriteLine(nr.Parameter(0)); Console.WriteLine(nr.Parameter(1)); Assert.IsTrue(nr.Dimension == 2); Assert.IsTrue(nr.Parameter(0).ConfidenceInterval(0.95).ClosedContains(mu)); Assert.IsTrue(nr.Parameter(1).ConfidenceInterval(0.95).ClosedContains(sigma)); FitResult nr2 = NormalDistribution.FitToSample(ns); Console.WriteLine(nr.Covariance(0,1)); // test analytic expression Assert.IsTrue(TestUtilities.IsNearlyEqual(nr.Parameter(0).Value, ns.Mean, Math.Sqrt(TestUtilities.TargetPrecision))); // we don't expect to be able to test sigma against analytic expression because ML value has known bias for finite sample size // exponential distribution Console.WriteLine("exponential"); double em = 3.0; Distribution ed = new ExponentialDistribution(em); Sample es = CreateSample(ed, 100); //FitResult er = es.MaximumLikelihoodFit(new ExponentialDistribution(em + 1.0)); FitResult er = es.MaximumLikelihoodFit((IList<double> p) => new ExponentialDistribution(p[0]), new double[] { em + 1.0 }); Console.WriteLine(er.Parameter(0)); Assert.IsTrue(er.Dimension == 1); Assert.IsTrue(er.Parameter(0).ConfidenceInterval(0.95).ClosedContains(em)); // test against analytic expression Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Value, es.Mean, Math.Sqrt(TestUtilities.TargetPrecision))); Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Uncertainty, es.Mean / Math.Sqrt(es.Count), Math.Sqrt(Math.Sqrt(TestUtilities.TargetPrecision)))); // lognormal distribution Console.WriteLine("lognormal"); double l1 = -4.0; double l2 = 5.0; Distribution ld = new LognormalDistribution(l1, l2); Sample ls = CreateSample(ld, 100); //FitResult lr = ls.MaximumLikelihoodFit(new LognormalDistribution(l1 + 1.0, l2 + 1.0)); FitResult lr = ls.MaximumLikelihoodFit((IList<double> p) => new LognormalDistribution(p[0], p[1]), new double[] { l1 + 1.0, l2 + 1.0 }); Console.WriteLine(lr.Parameter(0)); Console.WriteLine(lr.Parameter(1)); Console.WriteLine(lr.Covariance(0, 1)); Assert.IsTrue(lr.Dimension == 2); Assert.IsTrue(lr.Parameter(0).ConfidenceInterval(0.99).ClosedContains(l1)); Assert.IsTrue(lr.Parameter(1).ConfidenceInterval(0.99).ClosedContains(l2)); // weibull distribution Console.WriteLine("weibull"); double w_scale = 4.0; double w_shape = 2.0; WeibullDistribution w_d = new WeibullDistribution(w_scale, w_shape); Sample w_s = CreateSample(w_d, 20); //FitResult w_r = w_s.MaximumLikelihoodFit(new WeibullDistribution(1.0, 0.5)); FitResult w_r = w_s.MaximumLikelihoodFit((IList<double> p) => new WeibullDistribution(p[0], p[1]), new double[] { 2.0, 2.0 }); Console.WriteLine(w_r.Parameter(0)); Console.WriteLine(w_r.Parameter(1)); Console.WriteLine(w_r.Covariance(0, 1)); Assert.IsTrue(w_r.Parameter(0).ConfidenceInterval(0.95).ClosedContains(w_d.ScaleParameter)); Assert.IsTrue(w_r.Parameter(1).ConfidenceInterval(0.95).ClosedContains(w_d.ShapeParameter)); // logistic distribution Console.WriteLine("logistic"); double logistic_m = -3.0; double logistic_s = 2.0; Distribution logistic_distribution = new LogisticDistribution(logistic_m, logistic_s); Sample logistic_sample = CreateSample(logistic_distribution, 100); //FitResult logistic_result = logistic_sample.MaximumLikelihoodFit(new LogisticDistribution()); FitResult logistic_result = logistic_sample.MaximumLikelihoodFit((IList<double> p) => new LogisticDistribution(p[0], p[1]), new double[] { 2.0, 3.0 }); Console.WriteLine(logistic_result.Parameter(0)); Console.WriteLine(logistic_result.Parameter(1)); Assert.IsTrue(logistic_result.Dimension == 2); Assert.IsTrue(logistic_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(logistic_m)); Assert.IsTrue(logistic_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(logistic_s)); // beta distribution // not yet! /* double beta_alpha = 0.5; double beta_beta = 2.0; Distribution beta_distribution = new BetaDistribution(beta_alpha, beta_beta); Sample beta_sample = CreateSample(beta_distribution, 100); FitResult beta_result = beta_sample.MaximumLikelihoodFit(new BetaDistribution(1.0, 1.0)); Console.WriteLine("Beta:"); Console.WriteLine(beta_result.Parameter(0)); Console.WriteLine(beta_result.Parameter(1)); Assert.IsTrue(beta_result.Dimension == 2); Assert.IsTrue(beta_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(beta_alpha)); Assert.IsTrue(beta_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(beta_beta)); */ }
public void SampleMannWhitneyTest() { // define two non-normal distributions Distribution d1 = new ExponentialDistribution(2.0); Distribution d2 = new ExponentialDistribution(3.0); // create three samples from them Sample s1a = CreateSample(d1, 20, 1); Sample s1b = CreateSample(d1, 30, 2); Sample s2 = CreateSample(d2, 40, 3); // Mann-Whitney test 1a vs. 1b; they should not be distinguished TestResult rab = Sample.MannWhitneyTest(s1a, s1b); Console.WriteLine("{0} {1}", rab.Statistic, rab.LeftProbability); Assert.IsTrue((rab.LeftProbability < 0.95) && (rab.RightProbability < 0.95)); // Mann-Whitney test 1 vs. 2; they should be distinguished // with 1 consistently less than 2, so U abnormally small TestResult r12 = Sample.MannWhitneyTest(s1b, s2); Console.WriteLine("{0} {1}", r12.Statistic, r12.LeftProbability); Assert.IsTrue(r12.RightProbability > 0.95); }
public void SampleKuiperTest() { // this test has a whiff of meta-statistics about it // we want to make sure that the Kuiper test statistic V is distributed according to the Kuiper // distribution; to do this, we create a sample of V statistics and do KS/Kuiper tests // comparing it to the claimed Kuiper distribution // start with any 'ol underlying distribution Distribution distribution = new ExponentialDistribution(2.0); // generate some samples from it, and for each one get a V statistic from a KS test Sample VSample = new Sample(); Distribution VDistribution = null; for (int i = 0; i < 25; i++) { // the sample size must be large enough that the asymptotic assumptions are satistifed // at the moment this test fails if we make the sample size much smaller; we should // be able shrink this number when we expose the finite-sample distributions Sample sample = CreateSample(distribution, 250, i); TestResult kuiper = sample.KuiperTest(distribution); double V = kuiper.Statistic; Console.WriteLine("V = {0}", V); VSample.Add(V); VDistribution = kuiper.Distribution; } // check on the mean Console.WriteLine("m = {0} vs. {1}", VSample.PopulationMean, VDistribution.Mean); Assert.IsTrue(VSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(VDistribution.Mean)); // check on the standard deviation Console.WriteLine("s = {0} vs. {1}", VSample.PopulationStandardDeviation, VDistribution.StandardDeviation); Assert.IsTrue(VSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(VDistribution.StandardDeviation)); // do a KS test comparing the sample to the expected distribution TestResult kst = VSample.KolmogorovSmirnovTest(VDistribution); Console.WriteLine("D = {0}, P = {1}", kst.Statistic, kst.LeftProbability); Assert.IsTrue(kst.LeftProbability < 0.95); // do a Kuiper test comparing the sample to the expected distribution TestResult kut = VSample.KuiperTest(VDistribution); Console.WriteLine("V = {0}, P = {1}", kut.Statistic, kut.LeftProbability); Assert.IsTrue(kut.LeftProbability < 0.95); }
public void ExponentialFitUncertainty() { // check that the uncertainty in reported fit parameters is actually meaningful // it should be the standard deviation of fit parameter values in a sample of many fits // define a population distribution Distribution distribution = new ExponentialDistribution(4.0); // draw a lot of samples from it; fit each sample and // record the reported parameter value and error of each Sample values = new Sample(); Sample uncertainties = new Sample(); for (int i = 0; i < 50; i++) { Sample sample = CreateSample(distribution, 10, i); FitResult fit = ExponentialDistribution.FitToSample(sample); UncertainValue lambda = fit.Parameter(0); values.Add(lambda.Value); uncertainties.Add(lambda.Uncertainty); } Console.WriteLine(uncertainties.Mean); Console.WriteLine(values.PopulationStandardDeviation); // the reported errors should agree with the standard deviation of the reported parameters Assert.IsTrue(values.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Mean)); }
public void ExponentialFit() { ExponentialDistribution distribution = new ExponentialDistribution(5.0); Sample sample = CreateSample(distribution, 100); // fit to normal should be bad FitResult nfit = NormalDistribution.FitToSample(sample); Console.WriteLine("P_n = {0}", nfit.GoodnessOfFit.LeftProbability); Assert.IsTrue(nfit.GoodnessOfFit.LeftProbability > 0.95); // fit to exponential should be good FitResult efit = ExponentialDistribution.FitToSample(sample); Console.WriteLine("P_e = {0}", efit.GoodnessOfFit.LeftProbability); Assert.IsTrue(efit.GoodnessOfFit.LeftProbability < 0.95); Assert.IsTrue(efit.Parameter(0).ConfidenceInterval(0.95).ClosedContains(distribution.Mean)); }
public void SignTestDistribution() { // start with a non-normally distributed population Distribution xDistribution = new ExponentialDistribution(); Random rng = new Random(1); // draw 100 samples from it and compute the t statistic for each Sample wSample = new Sample(); for (int i = 0; i < 100; i++) { // each sample has 8 observations Sample xSample = new Sample(); for (int j = 0; j < 8; j++) { xSample.Add(xDistribution.GetRandomValue(rng)); } //Sample xSample = CreateSample(xDistribution, 8, i); TestResult wResult = xSample.SignTest(xDistribution.Median); double W = wResult.Statistic; //Console.WriteLine("W = {0}", W); wSample.Add(W); } // sanity check our sample of t's Assert.IsTrue(wSample.Count == 100); // check that the t statistics are distributed as expected DiscreteDistribution wDistribution = new BinomialDistribution(0.5, 8); // check on the mean Console.WriteLine("m = {0} vs. {1}", wSample.PopulationMean, wDistribution.Mean); Assert.IsTrue(wSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(wDistribution.Mean)); // check on the standard deviation Console.WriteLine("s = {0} vs. {1}", wSample.PopulationStandardDeviation, wDistribution.StandardDeviation); Assert.IsTrue(wSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(wDistribution.StandardDeviation)); // check on the skew Console.WriteLine("t = {0} vs. {1}", wSample.PopulationMomentAboutMean(3), wDistribution.MomentAboutMean(3)); Assert.IsTrue(wSample.PopulationMomentAboutMean(3).ConfidenceInterval(0.95).ClosedContains(wDistribution.MomentAboutMean(3))); // check on the kuritosis Console.WriteLine("u = {0} vs. {1}", wSample.PopulationMomentAboutMean(4), wDistribution.MomentAboutMean(4)); Assert.IsTrue(wSample.PopulationMomentAboutMean(4).ConfidenceInterval(0.95).ClosedContains(wDistribution.MomentAboutMean(4))); // KS tests are only for continuous distributions }
/// <summary> /// Computes the exponential distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameter.</returns> /// <remarks> /// <para>The returned fit parameter is μ (the <see cref="Mean"/>). /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to /// specify a new exponential distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) throw new ArgumentNullException("sample"); if (sample.Count < 2) throw new InsufficientDataException(); // none of the data is allowed to be negative foreach (double value in sample) { if (value < 0.0) throw new InvalidOperationException(); } // the best-fit exponential's mean is the sample mean, with corresponding uncertainly double lambda = sample.Mean; double dLambda = lambda / Math.Sqrt(sample.Count); Distribution distribution = new ExponentialDistribution(lambda); TestResult test = sample.KolmogorovSmirnovTest(distribution); return (new FitResult(lambda, dLambda, test)); }
public void TwoSampleKolmogorovNullDistributionTest() { Distribution population = new ExponentialDistribution(); int[] sizes = new int[] { 23, 30, 175 }; foreach (int na in sizes) { foreach (int nb in sizes) { Console.WriteLine("{0} {1}", na, nb); Sample d = new Sample(); Distribution nullDistribution = null; for (int i = 0; i < 128; i++) { Sample a = TestUtilities.CreateSample(population, na, 31415 + na + i); Sample b = TestUtilities.CreateSample(population, nb, 27182 + nb + i); TestResult r = Sample.KolmogorovSmirnovTest(a, b); d.Add(r.Statistic); nullDistribution = r.Distribution; } // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail // because the KS test detects the granularity of the distribution TestResult mr = d.KolmogorovSmirnovTest(nullDistribution); Console.WriteLine(mr.LeftProbability); if (AdvancedIntegerMath.LCM(na, nb) > d.Count) Assert.IsTrue(mr.LeftProbability < 0.99); // But always test that mean and standard deviation are as expected Console.WriteLine("{0} {1}", nullDistribution.Mean, d.PopulationMean.ConfidenceInterval(0.99)); Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Console.WriteLine("{0} {1}", nullDistribution.StandardDeviation, d.PopulationStandardDeviation.ConfidenceInterval(0.99)); Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation)); Console.WriteLine("{0} {1}", nullDistribution.MomentAboutMean(3), d.PopulationMomentAboutMean(3).ConfidenceInterval(0.99)); //Assert.IsTrue(d.PopulationMomentAboutMean(3).ConfidenceInterval(0.99).ClosedContains(nullDistribution.MomentAboutMean(3))); //Console.WriteLine("m {0} {1}", nullDistribution.Mean, d.PopulationMean); } } }
public void MultivariateMoments() { // create a random sample MultivariateSample M = new MultivariateSample(3); Distribution d0 = new NormalDistribution(); Distribution d1 = new ExponentialDistribution(); Distribution d2 = new UniformDistribution(); Random rng = new Random(1); int n = 10; for (int i = 0; i < n; i++) { M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng)); } // test that moments agree for (int i = 0; i < 3; i++) { int[] p = new int[3]; p[i] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p))); p[i] = 2; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p))); for (int j = 0; j < i; j++) { int[] q = new int[3]; q[i] = 1; q[j] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q))); } } }