/// <summary> /// Determines the parameters of the Wald distribution that best fits a sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The fit.</returns> /// <remarks> /// <para>The returned fit parameters are the <see cref="Mean"/> and <see cref="ShapeParameter"/>, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="WaldDistribution(double,double)"/> constructor to /// specify a new Wald distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) { throw new ArgumentNullException("sample"); } if (sample.Count < 3) { throw new InsufficientDataException(); } // best fit mu = <x> double mu = sample.Mean; // best fit lambda is 1/lambda = <1/x - 1/mu> double lambda = 0.0; foreach (double value in sample) { if (value <= 0.0) { throw new InvalidOperationException(); } lambda += (1.0 / value - 1.0 / mu); } lambda = sample.Count / lambda; // correct lambda estimate for its bias in the non-asymptotic regime lambda = lambda * (1.0 - 3.0 / sample.Count); // variances are expressible in closed form, covariance is zero double v_mu_mu = MoreMath.Pow(mu, 3) / lambda / sample.Count; double v_lambda_lambda = 2.0 * lambda * lambda / sample.Count; double v_mu_lambda = 0.0; Distribution dist = new WaldDistribution(mu, lambda); TestResult test = sample.KolmogorovSmirnovTest(dist); return(new FitResult(mu, Math.Sqrt(v_mu_mu), lambda, Math.Sqrt(v_lambda_lambda), v_mu_lambda, test)); }
public void InverseGaussianSummation() { // X_i ~ IG(\mu,\lambda) \rightarrow \sum_{i=0}^{n} X_i ~ IG(n \mu, n^2 \lambda) Random rng = new Random(0); WaldDistribution d0 = new WaldDistribution(1.0, 2.0); Sample s = new Sample(); for (int i = 0; i < 64; i++) { s.Add(d0.GetRandomValue(rng) + d0.GetRandomValue(rng) + d0.GetRandomValue(rng)); } WaldDistribution d1 = new WaldDistribution(3.0 * 1.0, 9.0 * 2.0); TestResult r = s.KolmogorovSmirnovTest(d1); Console.WriteLine(r.LeftProbability); }
public void WaldFitUncertainties() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); Random rng = new Random(314159); BivariateSample P = new BivariateSample(); double cmm = 0.0; double css = 0.0; double cms = 0.0; for (int i = 0; i < 50; i++) { Sample s = new Sample(); for (int j = 0; j < 50; j++) { s.Add(wald.GetRandomValue(rng)); } FitResult r = WaldDistribution.FitToSample(s); P.Add(r.Parameter(0).Value, r.Parameter(1).Value); cmm += r.Covariance(0, 0); css += r.Covariance(1, 1); cms += r.Covariance(0, 1); } cmm /= P.Count; css /= P.Count; cms /= P.Count; Console.WriteLine("{0} {1}", P.X.PopulationMean, P.Y.PopulationMean); Assert.IsTrue(P.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(wald.Mean)); Assert.IsTrue(P.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(wald.ShapeParameter)); // the ML shape parameter estimate appears to be asymptoticly unbiased, as it must be according to ML fit theory, // but detectably upward biased for small n. we now correct for this. Console.WriteLine("{0} {1} {2}", P.X.PopulationVariance, P.Y.PopulationVariance, P.PopulationCovariance); Console.WriteLine("{0} {1} {2}", cmm, css, cms); Assert.IsTrue(P.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cmm)); Assert.IsTrue(P.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(css)); Assert.IsTrue(P.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cms)); }
/// <summary> /// Determines the parameters of the Wald distribution that best fits a sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The fit.</returns> /// <remarks> /// <para>The returned fit parameters are the <see cref="Mean"/> and <see cref="Shape"/>, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="WaldDistribution(double,double)"/> constructor to /// specify a new Wald distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } if (sample.Count < 3) { throw new InsufficientDataException(); } // For maximum likelihood estimation, take logs of pdfs and sum: // \ln p = \frac{1}{2} \ln \lambda - \frac{1}{2} \ln (2\pi) - \frac{3}{2} \ln x // - \frac{\lambda x}{2\mu^2} + \frac{\lambda}{\mu} - \frac{\lambda}{2x} // \ln L = \sum_i p_i // Take derivative wrt \mu // \frac{\partial \ln L}{\partial \mu} = \sum_i \left[ \frac{\lambda x_i}{\mu^3} - \frac{\lambda}{\mu^2} \right] // and set equal to zero to obtain // \mu = \frac{1}{n} \sum_i x_i = <x> // which agrees with method of moments. // Take derivative wrt \lambda // \frac{\partial \ln L}{\partial \lambda} = \sum_i \left[ \frac{1}{2 \lambda} -\frac{x_i}{2\mu^2} + \frac{1}{\mu} - \frac{1}{2 x_i} \right] // Set equal to zero, plug in our expression for \mu, and solve for \lambda to get // \frac{n}{\lambda} = \sum_i \left( \frac{1}{x_i} - \frac{1}{\mu} \right) // i.e. \lambda^{-1} = <(x^{-1} - \mu^{-1})> double mu = sample.Mean; double mui = 1.0 / mu; double lambda = 0.0; foreach (double value in sample) { if (value <= 0.0) { throw new InvalidOperationException(); } lambda += (1.0 / value - mui); } lambda = (sample.Count - 3) / lambda; // If x ~ IG(\mu, \lambda), then \sum_i x_i ~ IG(n \mu, n^2 \lambda), so \hat{\mu} ~ IG (\mu, n \lambda). This gives us // not just the exact mean and variance of \hat{\mu}, but its entire distribution. Since its mean is \mu, \hat{\mu} is an // unbiased estimator. And by the variance formula for IG, the variance of \hat{\mu} is \frac{\mu^3}{n \lambda}. // Tweedie, "Statistical Properties of Inverse Gaussian Distributions" (http://projecteuclid.org/download/pdf_1/euclid.aoms/1177706964) // showed that \frac{n \lambda}{\hat{\lambda}} ~ \chi^2_{n-1}. Since the mean of \chi^2_{k} is k, the MLE estimator of // \frac{1}{\lambda} can be made unbiased by replacing n by (n-1). However, we are estimating \lambda, not \frac{1}{\lambda}. // By the relation between chi squared and inverse chi squared distributions, \frac{\hat{\lambda}}{n \lambda} ~ I\chi^2_{n-1}. // The mean of I\chi^2_{k} is \frac{1}{n-2}, so to get an unbiased estimator of \lambda, we need to replace n by (n-3). This is // what we have done above. Furthermore, the variance of I\chi^2_{k} is \frac{2}{(k-2)^2 (k-4)}, so the variance of \hat{\lambda} // is \frac{2 \lambda^2}{(n-5)}. // We can also get covariances from the MLE approach. To get a curvature matrix, take additional derivatives // \frac{\partial^2 \ln L}{\partial \mu^2} = \sum_i \left[ -\frac{3 \lambda x_i}{\mu^4} + \frac{2 \lambda}{\mu^3} \right] // \frac{\partial^2 \ln L}{\partial \mu \partial \lambda} = \sum_i \left[ \frac{x_i}{\mu^3} - \frac{1}{\mu^2} \right] // \frac{\partial^2 \ln L}{\partial \lambda^2} =\sum_i \left[ - \frac{1}{2 \lambda^2} \right] // and substitutue in best-fit values of \mu and \lambda // \frac{\partial^2 \ln L}{\partial \mu^2} = - \frac{n \lambda}{\mu^3} // \frac{\partial^2 \ln L}{\partial \mu \partial \lambda} = 0 // \frac{\partial^2 \ln L}{\partial \lambda^2} = - \frac{n}{2 \lambda^2} // Mixed derivative vanishes, so matrix is trivially invertible to obtain covariances. These results agree with the // results from exact distributions in the asymptotic regime. double v_mu_mu = mu * mu * mu / lambda / sample.Count; double v_lambda_lambda = 2.0 * lambda * lambda / (sample.Count - 5); double v_mu_lambda = 0.0; ContinuousDistribution dist = new WaldDistribution(mu, lambda); TestResult test = sample.KolmogorovSmirnovTest(dist); return(new FitResult(mu, Math.Sqrt(v_mu_mu), lambda, Math.Sqrt(v_lambda_lambda), v_mu_lambda, test)); }
/// <summary> /// Determines the parameters of the Wald distribution that best fits a sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The fit.</returns> /// <remarks> /// <para>The returned fit parameters are the <see cref="Mean"/> and <see cref="ShapeParameter"/>, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="WaldDistribution(double,double)"/> constructor to /// specify a new Wald distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) throw new ArgumentNullException("sample"); if (sample.Count < 3) throw new InsufficientDataException(); // best fit mu = <x> double mu = sample.Mean; // best fit lambda is 1/lambda = <1/x - 1/mu> double lambda = 0.0; foreach (double value in sample) { if (value <= 0.0) throw new InvalidOperationException(); lambda += (1.0 / value - 1.0 / mu); } lambda = sample.Count / lambda; // correct lambda estimate for its bias in the non-asymptotic regime lambda = lambda * (1.0 - 3.0 / sample.Count); // variances are expressible in closed form, covariance is zero double v_mu_mu = MoreMath.Pow(mu, 3) / lambda / sample.Count; double v_lambda_lambda = 2.0 * lambda * lambda / sample.Count; double v_mu_lambda = 0.0; Distribution dist = new WaldDistribution(mu, lambda); TestResult test = sample.KolmogorovSmirnovTest(dist); return (new FitResult(mu, Math.Sqrt(v_mu_mu), lambda, Math.Sqrt(v_lambda_lambda), v_mu_lambda, test)); }