public MultivariateSample CreateMultivariateNormalSample(ColumnVector M, SymmetricMatrix C, int n) { int d = M.Dimension; MultivariateSample S = new MultivariateSample(d); SquareMatrix A = C.CholeskyDecomposition().SquareRootMatrix(); Random rng = new Random(1); Distribution normal = new NormalDistribution(); for (int i = 0; i < n; i++) { // create a vector of normal deviates ColumnVector V = new ColumnVector(d); for (int j = 0; j < d; j++) { double y = rng.NextDouble(); double z = normal.InverseLeftProbability(y); V[j] = z; } // form the multivariate distributed vector ColumnVector X = M + A * V; // add it to the sample S.Add(X); } return (S); }
public void CatalanHankelMatrixDeterminant() { for (int d = 1; d <= 8; d++) { SymmetricMatrix S = new SymmetricMatrix(d); for (int r = 0; r < d; r++) { for (int c = 0; c <= r; c++) { int n = r + c; S[r, c] = AdvancedIntegerMath.BinomialCoefficient(2*n, n) / (n + 1); } } CholeskyDecomposition CD = S.CholeskyDecomposition(); Assert.IsTrue(TestUtilities.IsNearlyEqual(CD.Determinant(), 1.0)); } }
// routines for maximum likelyhood fitting /// <summary> /// Computes the Gamma distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameters.</returns> /// <remarks> /// <para>The returned fit parameters are the <see cref="ShapeParameter"/> and <see cref="ScaleParameter"/>, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="GammaDistribution(double,double)"/> constructor to /// specify a new Gamma distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) throw new ArgumentNullException("sample"); if (sample.Count < 3) throw new InsufficientDataException(); // The log likelyhood of a sample given k and s is // \log L = (k-1) \sum_i \log x_i - \frac{1}{s} \sum_i x_i - N \log \Gamma(k) - N k \log s // Differentiating, // \frac{\partial \log L}{\partial s} = \frac{1}{s^2} \sum_i x_i - \frac{Nk}{s} // \frac{\partial \log L}{\partial k} = \sum_i \log x_i - N \psi(k) - N \log s // Setting the first equal to zero gives // k s = N^{-1} \sum_i x_i = <x> // \psi(k) + \log s = N^{-1} \sum_i \log x_i = <log x> // Inserting the first into the second gives a single equation for k // \log k - \psi(k) = \log <x> - <\log x> // Note the RHS need only be computed once. // \log k > \psi(k) for all k, so the RHS had better be positive. They get // closer for large k, so smaller RHS will produce a larger k. double s = 0.0; foreach (double x in sample) { if (x <= 0.0) throw new InvalidOperationException(); s += Math.Log(x); } s = Math.Log(sample.Mean) - s / sample.Count; // We can get an initial guess for k from the method of moments // \frac{\mu^2}{\sigma^2} = k double k0 = MoreMath.Sqr(sample.Mean) / sample.Variance; // Since 1/(2k) < \log(k) - \psi(k) < 1/k, we could get a bound; that // might be better to avoid the solver running into k < 0 territory double k1 = FunctionMath.FindZero(k => (Math.Log(k) - AdvancedMath.Psi(k) - s), k0); double s1 = sample.Mean / k1; // Curvature of the log likelyhood is straightforward // \frac{\partial^2 \log L}{\partial s^2} = -\frac{2}{s^3} \sum_i x_i + \frac{Nk}{s^2} = - \frac{Nk}{s^2} // \frac{\partial^2 \log L}{\partial k \partial s} = - \frac{N}{s} // \frac{\partial^2 \log L}{\partial k^2} = - N \psi'(k) // This gives the curvature matrix and thus via inversion the covariance matrix. SymmetricMatrix B = new SymmetricMatrix(2); B[0, 0] = sample.Count * AdvancedMath.Psi(1, k1); B[0, 1] = sample.Count / s1; B[1, 1] = sample.Count * k1 / MoreMath.Sqr(s1); SymmetricMatrix C = B.CholeskyDecomposition().Inverse(); // Do a KS test for goodness-of-fit TestResult test = sample.KolmogorovSmirnovTest(new GammaDistribution(k1, s1)); return (new FitResult(new double[] { k1, s1 }, C, test)); }
public void GaussianIntegrals() { Random rng = new Random(1); for (int d = 2; d < 4; d++) { if (d == 4 || d == 5 || d == 6) continue; Console.WriteLine(d); // Create a symmetric matrix SymmetricMatrix A = new SymmetricMatrix(d); for (int r = 0; r < d; r++) { for (int c = 0; c < r; c++) { A[r, c] = rng.NextDouble(); } // Ensure it is positive definite by diagonal dominance A[r, r] = r + 1.0; } // Compute its determinant, which appears in the analytic value of the integral CholeskyDecomposition CD = A.CholeskyDecomposition(); double detA = CD.Determinant(); // Compute the integral Func<IList<double>, double> f = (IList<double> x) => { ColumnVector v = new ColumnVector(x); double s = v.Transpose() * (A * v); return (Math.Exp(-s)); }; Interval[] volume = new Interval[d]; for (int i = 0; i < d; i++) volume[i] = Interval.FromEndpoints(Double.NegativeInfinity, Double.PositiveInfinity); IntegrationResult I = MultiFunctionMath.Integrate(f, volume); // Compare to the analytic result Console.WriteLine("{0} ({1}) {2}", I.Value, I.Precision, Math.Sqrt(MoreMath.Pow(Math.PI, d) / detA)); Assert.IsTrue(TestUtilities.IsNearlyEqual(I.Value, Math.Sqrt(MoreMath.Pow(Math.PI, d) / detA), new EvaluationSettings() { AbsolutePrecision = 2.0 * I.Precision })); } }
/// <summary> /// Computes the Weibull distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameters.</returns> /// <remarks> /// <para>The returned fit parameters are the <see cref="ShapeParameter"/> and <see cref="ScaleParameter"/>, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="WeibullDistribution(double,double)"/> constructor to /// specify a new Weibull distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) throw new ArgumentNullException("sample"); if (sample.Count < 3) throw new InsufficientDataException(); if (sample.Minimum <= 0.0) throw new InvalidOperationException(); // The log likelyhood function is // \log L = N \log k + (k-1) \sum_i \log x_i - N K \log \lambda - \sum_i \left(\frac{x_i}{\lambda}\right)^k // Taking derivatives, we get // \frac{\partial \log L}{\partial \lambda} = - \frac{N k}{\lambda} + \sum_i \frac{k}{\lambda} \left(\frac{x_i}{\lambda}\right)^k // \frac{\partial \log L}{\partial k} =\frac{N}{k} + \sum_i \left[ 1 - \left(\frac{x_i}{\lambda}\right)^k \right] \log \left(\frac{x_i}{\lambda}\right) // Setting the first expression to zero and solving for \lambda gives // \lambda = \left( N^{-1} \sum_i x_i^k \right)^{1/k} = ( < x^k > )^{1/k} // which allows us to reduce the problem from 2D to 1D. // By the way, using the expression for the moment < x^k > of the Weibull distribution, you can show there is // no bias to this result even for finite samples. // Setting the second expression to zero gives // \frac{1}{k} = \frac{1}{N} \sum_i \left[ \left( \frac{x_i}{\lambda} \right)^k - 1 \right] \log \left(\frac{x_i}{\lambda}\right) // which, given the equation for \lambda as a function of k derived from the first expression, is an implicit equation for k. // It cannot be solved in closed form, but we have now reduced our problem to finding a root in one-dimension. // We need a starting guess for k. // The method of moments equations are not solvable for the parameters in closed form // but the scale parameter drops out of the ratio of the 1/3 and 2/3 quantile points // and the result is easily solved for the shape parameter // k = \frac{\log 2}{\log\left(\frac{x_{2/3}}{x_{1/3}}\right)} double x1 = sample.InverseLeftProbability(1.0 / 3.0); double x2 = sample.InverseLeftProbability(2.0 / 3.0); double k0 = Global.LogTwo / Math.Log(x2 / x1); // Given the shape paramter, we could invert the expression for the mean to get // the scale parameter, but since we have an expression for \lambda from k, we // dont' need it. //double s0 = sample.Mean / AdvancedMath.Gamma(1.0 + 1.0 / k0); // Simply handing our 1D function to a root-finder works fine until we start to encounter large k. For large k, // even just computing \lambda goes wrong because we are taking x_i^k which overflows. Horst Rinne, "The Weibull // Distribution: A Handbook" describes a way out. Basically, we first move to variables z_i = \log(x_i) and // then w_i = z_i - \bar{z}. Then lots of factors of e^{k \bar{z}} cancel out and, even though we still do // have some e^{k w_i}, the w_i are small and centered around 0 instead of large and centered around \lambda. Sample transformedSample = sample.Copy(); transformedSample.Transform(x => Math.Log(x)); double zbar = transformedSample.Mean; transformedSample.Transform(z => z - zbar); // After this change of variable the 1D function to zero becomes // g(k) = \sum_i ( 1 - k w_i ) e^{k w_i} // It's easy to show that g(0) = n and g(\infinity) = -\infinity, so it must cross zero. It's also easy to take // a derivative // g'(k) = - k \sum_i w_i^2 e^{k w_i} // so we can apply Newton's method. int i = 0; double k1 = k0; while (true) { i++; double g = 0.0; double gp = 0.0; foreach (double w in transformedSample) { double e = Math.Exp(k1 * w); g += (1.0 - k1 * w) * e; gp -= k1 * w * w * e; } double dk = -g / gp; k1 += dk; if (Math.Abs(dk) <= Global.Accuracy * Math.Abs(k1)) break; if (i >= Global.SeriesMax) throw new NonconvergenceException(); } // The corresponding lambda can also be expressed in terms of zbar and w's. double t = 0.0; foreach (double w in transformedSample) { t += Math.Exp(k1 * w); } t /= transformedSample.Count; double lambda1 = Math.Exp(zbar) * Math.Pow(t, 1.0 / k1); // We need the curvature matrix at the minimum of our log likelyhood function // to determine the covariance matrix. Taking more derivatives... // \frac{\partial^2 \log L} = \frac{N k}{\lambda^2} - \sum_i \frac{k(k+1) x_i^k}{\lambda^{k+2}} // = - \frac{N k^2}{\lambda^2} // The second expression follows by inserting the first-derivative-equal-zero relation into the first. // For k=1, this agrees with the variance formula for the mean of the best-fit exponential. // Derivatives involving k are less simple. // We end up needing the means < (x/lambda)^k log(x/lambda) > and < (x/lambda)^k log^2(x/lambda) > double mpl = 0.0; double mpl2 = 0.0; foreach (double x in sample) { double r = x / lambda1; double p = Math.Pow(r, k1); double l = Math.Log(r); double pl = p * l; double pl2 = pl * l; mpl += pl; mpl2 += pl2; } mpl = mpl / sample.Count; mpl2 = mpl2 / sample.Count; // See if we can't do any better here. Transforming to zbar and w's looked ugly, but perhaps it // can be simplified? One interesting observation: if we take expectation values (which gives // the Fisher information matrix) the entries become simple: // B_{\lambda \lambda} = \frac{N k^2}{\lambda^2} // B_{\lambda k} = -\Gamma'(2) \frac{N}{\lambda} // B_{k k } = [1 + \Gamma''(2)] \frac{N}{k^2} // Would it be bad to just use these directly? // Construct the curvature matrix and invert it. SymmetricMatrix B = new SymmetricMatrix(2); B[0, 0] = sample.Count * MoreMath.Sqr(k1 / lambda1); B[0, 1] = -sample.Count * k1 / lambda1 * mpl; B[1, 1] = sample.Count * (1.0 / MoreMath.Pow2(k1) + mpl2); SymmetricMatrix C = B.CholeskyDecomposition().Inverse(); // Do a KS test to compare sample to best-fit distribution Distribution distribution = new WeibullDistribution(lambda1, k1); TestResult test = sample.KolmogorovSmirnovTest(distribution); // return the result return (new FitResult(new double[] {lambda1, k1}, C, test)); }
public void SymmetricRandomMatrixCholeskyDecomposition() { int d = 100; Random rng = new Random(d); ColumnVector[] V = new ColumnVector[d]; for (int i=0; i < d; i++) { V[i] = new ColumnVector(d); for (int j = 0; j < d; j++) { V[i][j] = rng.NextDouble(); } } SymmetricMatrix A = new SymmetricMatrix(d); for (int i = 0; i < d; i++) { for (int j = 0; j <= i; j++) { A[i, j] = V[i].Transpose() * V[j]; } } Stopwatch s = Stopwatch.StartNew(); CholeskyDecomposition CD = A.CholeskyDecomposition(); s.Stop(); Console.WriteLine("{0} {1}", d, s.ElapsedMilliseconds); Assert.IsTrue(CD != null); }
/// <summary> /// Computes the Beta distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameters.</returns> /// <remarks> /// <para>The returned fit parameters are the α (<see cref="Alpha"/>) and β (<see cref="Beta"/>) parameters, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="BetaDistribution(double,double)"/> constructor to /// specify a new Beta distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> /// <exception cref="InvalidOperationException">Not all the entries in <paramref name="sample" /> lie between zero and one.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) throw new ArgumentNullException("sample"); if (sample.Count < 3) throw new InsufficientDataException(); // maximum likelyhood calculation // \log L = \sum_i \left[ (\alpha-1) \log x_i + (\beta-1) \log (1-x_i) - \log B(\alpha,\beta) \right] // using \frac{\partial B(a,b)}{\partial a} = \psi(a) - \psi(a+b), we have // \frac{\partial \log L}{\partial \alpha} = \sum_i \log x_i - N \left[ \psi(\alpha) - \psi(\alpha+\beta) \right] // \frac{\partial \log L}{\partial \beta} = \sum_i \log (1-x_i) - N \left[ \psi(\beta) - \psi(\alpha+\beta) \right] // set equal to zero to get equations for \alpha, \beta // \psi(\alpha) - \psi(\alpha+\beta) = <\log x> // \psi(\beta) - \psi(\alpha+\beta) = <\log (1-x)> // compute the mean log of x and (1-x) // these are the (logs of) the geometric means double ga = 0.0; double gb = 0.0; foreach (double value in sample) { if ((value <= 0.0) || (value >= 1.0)) throw new InvalidOperationException(); ga += Math.Log(value); gb += Math.Log(1.0 - value); } ga /= sample.Count; gb /= sample.Count; // define the function to zero Func<IList<double>, IList<double>> f = delegate(IList<double> x) { double pab = AdvancedMath.Psi(x[0] + x[1]); return (new double[] { AdvancedMath.Psi(x[0]) - pab - ga, AdvancedMath.Psi(x[1]) - pab - gb }); }; // guess initial values using the method of moments // M1 = \frac{\alpha}{\alpha+\beta} C2 = \frac{\alpha\beta}{(\alpha+\beta)^2 (\alpha+\beta+1)} // implies // \alpha = M1 \left( \frac{M1 (1-M1)}{C2} - 1 \right) // \beta = (1 - M1) \left( \frac{M1 (1-M1)}{C2} -1 \right) double m = sample.Mean; double mm = 1.0 - m; double q = m * mm / sample.Variance - 1.0; double[] x0 = new double[] { m * q, mm * q }; // find the parameter values that zero the two equations IList<double> x1 = MultiFunctionMath.FindZero(f, x0); double a = x1[0]; double b = x1[1]; // take more derivatives of \log L to get curvature matrix // \frac{\partial^2 \log L}{\partial\alpha^2} = - N \left[ \psi'(\alpha) - \psi'(\alpha+\beta) \right] // \frac{\partial^2 \log L}{\partial\beta^2} = - N \left[ \psi'(\beta) - \psi'(\alpha+\beta) \right] // \frac{\partial^2 \log L}{\partial \alpha \partial \beta} = - N \psi'(\alpha+\beta) // covariance matrix is inverse of curvature matrix SymmetricMatrix CI = new SymmetricMatrix(2); CI[0, 0] = sample.Count * (AdvancedMath.Psi(1, a) - AdvancedMath.Psi(1, a + b)); CI[1, 1] = sample.Count * (AdvancedMath.Psi(1, b) - AdvancedMath.Psi(1, a + b)); CI[0, 1] = sample.Count * AdvancedMath.Psi(1, a + b); CholeskyDecomposition CD = CI.CholeskyDecomposition(); SymmetricMatrix C = CD.Inverse(); // do a KS test on the result TestResult test = sample.KolmogorovSmirnovTest(new BetaDistribution(a, b)); // return the results FitResult result = new FitResult(x1, C, test); return (result); }