public void WaldFit() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); FrameTable results = new FrameTable(); results.AddColumns <double>("Mean", "Shape", "MeanVariance", "ShapeVariance", "MeanShapeCovariance"); for (int i = 0; i < 128; i++) { Sample sample = SampleTest.CreateSample(wald, 16, i); WaldFitResult result = WaldDistribution.FitToSample(sample); Assert.IsTrue(result.Mean.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Mean")]); Assert.IsTrue(result.Shape.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Shape")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Parameters.VarianceOf("Mean"), MoreMath.Sqr(result.Mean.Uncertainty))); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Parameters.VarianceOf("Shape"), MoreMath.Sqr(result.Shape.Uncertainty))); results.AddRow( result.Mean.Value, result.Shape.Value, result.Parameters.VarianceOf("Mean"), result.Parameters.VarianceOf("Shape"), result.Parameters.CovarianceOf("Mean", "Shape") ); } Assert.IsTrue(results["Mean"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(wald.Mean)); Assert.IsTrue(results["Shape"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(wald.Shape)); Assert.IsTrue(results["Mean"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(results["MeanVariance"].As <double>().Median())); Assert.IsTrue(results["Shape"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(results["ShapeVariance"].As <double>().Median())); Assert.IsTrue(results["Mean"].As <double>().PopulationCovariance(results["Shape"].As <double>()).ConfidenceInterval(0.99).ClosedContains(results["MeanShapeCovariance"].As <double>().Median())); }
public void WaldFit() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); BivariateSample parameters = new BivariateSample(); MultivariateSample variances = new MultivariateSample(3); for (int i = 0; i < 128; i++) { Sample s = SampleTest.CreateSample(wald, 16, i); FitResult r = WaldDistribution.FitToSample(s); parameters.Add(r.Parameters[0], r.Parameters[1]); variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1)); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape)); Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median)); }
public void InverseGaussianSummation() { // X_i ~ IG(\mu,\lambda) \rightarrow \sum_{i=0}^{n} X_i ~ IG(n \mu, n^2 \lambda) Random rng = new Random(1); WaldDistribution d0 = new WaldDistribution(1.0, 2.0); List <double> s = new List <double>(); for (int i = 0; i < 64; i++) { s.Add(d0.GetRandomValue(rng) + d0.GetRandomValue(rng) + d0.GetRandomValue(rng)); } WaldDistribution d1 = new WaldDistribution(3.0 * 1.0, 9.0 * 2.0); TestResult r = s.KolmogorovSmirnovTest(d1); Assert.IsTrue(r.Probability > 0.05); }
public void InverseGaussianSummation() { // X_i ~ IG(\mu,\lambda) \rightarrow \sum_{i=0}^{n} X_i ~ IG(n \mu, n^2 \lambda) Random rng = new Random(0); WaldDistribution d0 = new WaldDistribution(1.0, 2.0); Sample s = new Sample(); for (int i = 0; i < 64; i++) { s.Add(d0.GetRandomValue(rng) + d0.GetRandomValue(rng) + d0.GetRandomValue(rng)); } WaldDistribution d1 = new WaldDistribution(3.0 * 1.0, 9.0 * 2.0); TestResult r = s.KolmogorovSmirnovTest(d1); Console.WriteLine(r.LeftProbability); }
/// <summary> /// Finds the Wald distribution that best fits a sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The fit.</returns> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static WaldFitResult FitToWald(this IReadOnlyList <double> sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } if (sample.Count < 3) { throw new InsufficientDataException(); } // For maximum likelihood estimation, take logs of pdfs and sum: // \ln p = \frac{1}{2} \ln \lambda - \frac{1}{2} \ln (2\pi) - \frac{3}{2} \ln x // - \frac{\lambda x}{2\mu^2} + \frac{\lambda}{\mu} - \frac{\lambda}{2x} // \ln L = \sum_i p_i // Take derivative wrt \mu // \frac{\partial \ln L}{\partial \mu} = \sum_i \left[ \frac{\lambda x_i}{\mu^3} - \frac{\lambda}{\mu^2} \right] // and set equal to zero to obtain // \mu = \frac{1}{n} \sum_i x_i = <x> // which agrees with method of moments. // Take derivative wrt \lambda // \frac{\partial \ln L}{\partial \lambda} = \sum_i \left[ \frac{1}{2 \lambda} -\frac{x_i}{2\mu^2} + \frac{1}{\mu} - \frac{1}{2 x_i} \right] // Set equal to zero, plug in our expression for \mu, and solve for \lambda to get // \frac{n}{\lambda} = \sum_i \left( \frac{1}{x_i} - \frac{1}{\mu} \right) // i.e. \lambda^{-1} = <(x^{-1} - \mu^{-1})> int n; double mu; ComputeMomentsUpToFirst(sample, out n, out mu); double mui = 1.0 / mu; double lambda = 0.0; foreach (double value in sample) { if (value <= 0.0) { throw new InvalidOperationException(); } lambda += (1.0 / value - mui); } lambda = (n - 3) / lambda; // If x ~ IG(\mu, \lambda), then \sum_i x_i ~ IG(n \mu, n^2 \lambda), so \hat{\mu} ~ IG (\mu, n \lambda). This gives us // not just the exact mean and variance of \hat{\mu}, but its entire distribution. Since its mean is \mu, \hat{\mu} is an // unbiased estimator. And by the variance formula for IG, the variance of \hat{\mu} is \frac{\mu^3}{n \lambda}. // Tweedie, "Statistical Properties of Inverse Gaussian Distributions" (http://projecteuclid.org/download/pdf_1/euclid.aoms/1177706964) // showed that \frac{n \lambda}{\hat{\lambda}} ~ \chi^2_{n-1}. Since the mean of \chi^2_{k} is k, the MLE estimator of // \frac{1}{\lambda} can be made unbiased by replacing n by (n-1). However, we are estimating \lambda, not \frac{1}{\lambda}. // By the relation between chi squared and inverse chi squared distributions, \frac{\hat{\lambda}}{n \lambda} ~ I\chi^2_{n-1}. // The mean of I\chi^2_{k} is \frac{1}{n-2}, so to get an unbiased estimator of \lambda, we need to replace n by (n-3). This is // what we have done above. Furthermore, the variance of I\chi^2_{k} is \frac{2}{(k-2)^2 (k-4)}, so the variance of \hat{\lambda} // is \frac{2 \lambda^2}{(n-5)}. // We can also get covariances from the MLE approach. To get a curvature matrix, take additional derivatives // \frac{\partial^2 \ln L}{\partial \mu^2} = \sum_i \left[ -\frac{3 \lambda x_i}{\mu^4} + \frac{2 \lambda}{\mu^3} \right] // \frac{\partial^2 \ln L}{\partial \mu \partial \lambda} = \sum_i \left[ \frac{x_i}{\mu^3} - \frac{1}{\mu^2} \right] // \frac{\partial^2 \ln L}{\partial \lambda^2} =\sum_i \left[ - \frac{1}{2 \lambda^2} \right] // and substitutue in best-fit values of \mu and \lambda // \frac{\partial^2 \ln L}{\partial \mu^2} = - \frac{n \lambda}{\mu^3} // \frac{\partial^2 \ln L}{\partial \mu \partial \lambda} = 0 // \frac{\partial^2 \ln L}{\partial \lambda^2} = - \frac{n}{2 \lambda^2} // Mixed derivative vanishes, so matrix is trivially invertible to obtain covariances. These results agree with the // results from exact distributions in the asymptotic regime. double v_mu_mu = mu * mu * mu / lambda / n; double v_lambda_lambda = 2.0 * lambda * lambda / (n - 5); //double v_mu_lambda = 0.0; WaldDistribution dist = new WaldDistribution(mu, lambda); TestResult test = sample.KolmogorovSmirnovTest(dist); return(new WaldFitResult(mu, lambda, v_mu_mu, v_lambda_lambda, dist, test)); }