public MultivariateSample CreateMultivariateNormalSample(ColumnVector M, SymmetricMatrix C, int n)
        {
            int d = M.Dimension;

            MultivariateSample S = new MultivariateSample(d);

            SquareMatrix A = C.CholeskyDecomposition().SquareRootMatrix();

            Random rng = new Random(1);
            Distribution normal = new NormalDistribution();

            for (int i = 0; i < n; i++) {

                // create a vector of normal deviates
                ColumnVector V = new ColumnVector(d);
                for (int j = 0; j < d; j++) {
                    double y = rng.NextDouble();
                    double z = normal.InverseLeftProbability(y);
                    V[j] = z;
                }

                // form the multivariate distributed vector
                ColumnVector X = M + A * V;

                // add it to the sample
                S.Add(X);

            }

            return (S);
        }
        public void CatalanHankelMatrixDeterminant()
        {
            for (int d = 1; d <= 8; d++) {

                SymmetricMatrix S = new SymmetricMatrix(d);
                for (int r = 0; r < d; r++) {
                    for (int c = 0; c <= r; c++) {
                        int n = r + c;
                        S[r, c] = AdvancedIntegerMath.BinomialCoefficient(2*n, n) / (n + 1);
                    }
                }

                CholeskyDecomposition CD = S.CholeskyDecomposition();
                Assert.IsTrue(TestUtilities.IsNearlyEqual(CD.Determinant(), 1.0));

            }
        }
Exemplo n.º 3
0
        // routines for maximum likelyhood fitting
        /// <summary>
        /// Computes the Gamma distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameters.</returns>
        /// <remarks>
        /// <para>The returned fit parameters are the <see cref="ShapeParameter"/> and <see cref="ScaleParameter"/>, in that order.
        /// These are the same parameters, in the same order, that are required by the <see cref="GammaDistribution(double,double)"/> constructor to
        /// specify a new Gamma distribution.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception>
        public static FitResult FitToSample(Sample sample)
        {
            if (sample == null) throw new ArgumentNullException("sample");
            if (sample.Count < 3) throw new InsufficientDataException();

            // The log likelyhood of a sample given k and s is
            //   \log L = (k-1) \sum_i \log x_i - \frac{1}{s} \sum_i x_i - N \log \Gamma(k) - N k \log s
            // Differentiating,
            //   \frac{\partial \log L}{\partial s} = \frac{1}{s^2} \sum_i x_i - \frac{Nk}{s}
            //   \frac{\partial \log L}{\partial k} = \sum_i \log x_i - N \psi(k) - N \log s
            // Setting the first equal to zero gives
            //   k s = N^{-1} \sum_i x_i = <x>
            //   \psi(k) + \log s = N^{-1} \sum_i \log x_i = <log x>
            // Inserting the first into the second gives a single equation for k
            //   \log k - \psi(k) = \log <x> - <\log x>
            // Note the RHS need only be computed once.
            // \log k > \psi(k) for all k, so the RHS had better be positive. They get
            // closer for large k, so smaller RHS will produce a larger k.

            double s = 0.0;
            foreach (double x in sample) {
                if (x <= 0.0) throw new InvalidOperationException();
                s += Math.Log(x);
            }
            s = Math.Log(sample.Mean) - s / sample.Count;

            // We can get an initial guess for k from the method of moments
            //   \frac{\mu^2}{\sigma^2} = k

            double k0 = MoreMath.Sqr(sample.Mean) / sample.Variance;

            // Since 1/(2k) < \log(k) - \psi(k) < 1/k, we could get a bound; that
            // might be better to avoid the solver running into k < 0 territory

            double k1 = FunctionMath.FindZero(k => (Math.Log(k) - AdvancedMath.Psi(k) - s), k0);

            double s1 = sample.Mean / k1;

            // Curvature of the log likelyhood is straightforward
            //   \frac{\partial^2 \log L}{\partial s^2} = -\frac{2}{s^3} \sum_i x_i + \frac{Nk}{s^2} = - \frac{Nk}{s^2}
            //   \frac{\partial^2 \log L}{\partial k \partial s} = - \frac{N}{s}
            //   \frac{\partial^2 \log L}{\partial k^2} = - N \psi'(k)
            // This gives the curvature matrix and thus via inversion the covariance matrix.

            SymmetricMatrix B = new SymmetricMatrix(2);
            B[0, 0] = sample.Count * AdvancedMath.Psi(1, k1);
            B[0, 1] = sample.Count / s1;
            B[1, 1] = sample.Count * k1 / MoreMath.Sqr(s1);
            SymmetricMatrix C = B.CholeskyDecomposition().Inverse();

            // Do a KS test for goodness-of-fit
            TestResult test = sample.KolmogorovSmirnovTest(new GammaDistribution(k1, s1));

            return (new FitResult(new double[] { k1, s1 }, C, test));
        }
        public void GaussianIntegrals()
        {
            Random rng = new Random(1);
            for (int d = 2; d < 4; d++) {
                if (d == 4 || d == 5 || d == 6) continue;
                Console.WriteLine(d);

                // Create a symmetric matrix
                SymmetricMatrix A = new SymmetricMatrix(d);
                for (int r = 0; r < d; r++) {
                    for (int c = 0; c < r; c++) {
                        A[r, c] = rng.NextDouble();
                    }
                    // Ensure it is positive definite by diagonal dominance
                    A[r, r] = r + 1.0;
                }

                // Compute its determinant, which appears in the analytic value of the integral
                CholeskyDecomposition CD = A.CholeskyDecomposition();
                double detA = CD.Determinant();

                // Compute the integral
                Func<IList<double>, double> f = (IList<double> x) => {
                    ColumnVector v = new ColumnVector(x);
                    double s = v.Transpose() * (A * v);
                    return (Math.Exp(-s));
                };

                Interval[] volume = new Interval[d];
                for (int i = 0; i < d; i++) volume[i] = Interval.FromEndpoints(Double.NegativeInfinity, Double.PositiveInfinity);

                IntegrationResult I = MultiFunctionMath.Integrate(f, volume);

                // Compare to the analytic result
                Console.WriteLine("{0} ({1}) {2}", I.Value, I.Precision, Math.Sqrt(MoreMath.Pow(Math.PI, d) / detA));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(I.Value, Math.Sqrt(MoreMath.Pow(Math.PI, d) / detA), new EvaluationSettings() { AbsolutePrecision = 2.0 * I.Precision }));
            }
        }
        /// <summary>
        /// Computes the Weibull distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameters.</returns>
        /// <remarks>
        /// <para>The returned fit parameters are the <see cref="ShapeParameter"/> and <see cref="ScaleParameter"/>, in that order.
        /// These are the same parameters, in the same order, that are required by the <see cref="WeibullDistribution(double,double)"/> constructor to
        /// specify a new Weibull distribution.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception>
        public static FitResult FitToSample(Sample sample)
        {
            if (sample == null) throw new ArgumentNullException("sample");
            if (sample.Count < 3) throw new InsufficientDataException();
            if (sample.Minimum <= 0.0) throw new InvalidOperationException();

            // The log likelyhood function is
            //   \log L = N \log k + (k-1) \sum_i \log x_i - N K \log \lambda - \sum_i \left(\frac{x_i}{\lambda}\right)^k
            // Taking derivatives, we get
            //   \frac{\partial \log L}{\partial \lambda} = - \frac{N k}{\lambda} + \sum_i \frac{k}{\lambda} \left(\frac{x_i}{\lambda}\right)^k
            //   \frac{\partial \log L}{\partial k} =\frac{N}{k} + \sum_i \left[ 1 - \left(\frac{x_i}{\lambda}\right)^k \right] \log \left(\frac{x_i}{\lambda}\right)
            // Setting the first expression to zero and solving for \lambda gives
            //   \lambda = \left( N^{-1} \sum_i x_i^k \right)^{1/k} = ( < x^k > )^{1/k}
            // which allows us to reduce the problem from 2D to 1D.
            // By the way, using the expression for the moment < x^k > of the Weibull distribution, you can show there is
            // no bias to this result even for finite samples.
            // Setting the second expression to zero gives
            //   \frac{1}{k} = \frac{1}{N} \sum_i \left[ \left( \frac{x_i}{\lambda} \right)^k - 1 \right] \log \left(\frac{x_i}{\lambda}\right)
            // which, given the equation for \lambda as a function of k derived from the first expression, is an implicit equation for k.
            // It cannot be solved in closed form, but we have now reduced our problem to finding a root in one-dimension.

            // We need a starting guess for k.
            // The method of moments equations are not solvable for the parameters in closed form
            // but the scale parameter drops out of the ratio of the 1/3 and 2/3 quantile points
            // and the result is easily solved for the shape parameter
            //   k = \frac{\log 2}{\log\left(\frac{x_{2/3}}{x_{1/3}}\right)}
            double x1 = sample.InverseLeftProbability(1.0 / 3.0);
            double x2 = sample.InverseLeftProbability(2.0 / 3.0);
            double k0 = Global.LogTwo / Math.Log(x2 / x1);
            // Given the shape paramter, we could invert the expression for the mean to get
            // the scale parameter, but since we have an expression for \lambda from k, we
            // dont' need it.
            //double s0 = sample.Mean / AdvancedMath.Gamma(1.0 + 1.0 / k0);

            // Simply handing our 1D function to a root-finder works fine until we start to encounter large k. For large k,
            // even just computing \lambda goes wrong because we are taking x_i^k which overflows. Horst Rinne, "The Weibull
            // Distribution: A Handbook" describes a way out. Basically, we first move to variables z_i = \log(x_i) and
            // then w_i = z_i - \bar{z}. Then lots of factors of e^{k \bar{z}} cancel out and, even though we still do
            // have some e^{k w_i}, the w_i are small and centered around 0 instead of large and centered around \lambda.

            Sample transformedSample = sample.Copy();
            transformedSample.Transform(x => Math.Log(x));
            double zbar = transformedSample.Mean;
            transformedSample.Transform(z => z - zbar);

            // After this change of variable the 1D function to zero becomes
            //   g(k) = \sum_i ( 1 - k w_i ) e^{k w_i}
            // It's easy to show that g(0) = n and g(\infinity) = -\infinity, so it must cross zero. It's also easy to take
            // a derivative
            //   g'(k) = - k \sum_i w_i^2 e^{k w_i}
            // so we can apply Newton's method.

            int i = 0;
            double k1 = k0;
            while (true) {
                i++;
                double g = 0.0;
                double gp = 0.0;
                foreach (double w in transformedSample) {
                    double e = Math.Exp(k1 * w);
                    g += (1.0 - k1 * w) * e;
                    gp -= k1 * w * w * e;
                }
                double dk = -g / gp;
                k1 += dk;
                if (Math.Abs(dk) <= Global.Accuracy * Math.Abs(k1)) break;
                if (i >= Global.SeriesMax) throw new NonconvergenceException();
            }

            // The corresponding lambda can also be expressed in terms of zbar and w's.

            double t = 0.0;
            foreach (double w in transformedSample) {
                t += Math.Exp(k1 * w);
            }
            t /= transformedSample.Count;
            double lambda1 = Math.Exp(zbar) * Math.Pow(t, 1.0 / k1);

            // We need the curvature matrix at the minimum of our log likelyhood function
            // to determine the covariance matrix. Taking more derivatives...
            //    \frac{\partial^2 \log L} = \frac{N k}{\lambda^2} - \sum_i \frac{k(k+1) x_i^k}{\lambda^{k+2}}
            //    = - \frac{N k^2}{\lambda^2}
            // The second expression follows by inserting the first-derivative-equal-zero relation into the first.
            // For k=1, this agrees with the variance formula for the mean of the best-fit exponential.

            // Derivatives involving k are less simple.

            // We end up needing the means < (x/lambda)^k log(x/lambda) > and < (x/lambda)^k log^2(x/lambda) >

            double mpl = 0.0; double mpl2 = 0.0;
            foreach (double x in sample) {
                double r = x / lambda1;
                double p = Math.Pow(r, k1);
                double l = Math.Log(r);
                double pl = p * l;
                double pl2 = pl * l;
                mpl += pl;
                mpl2 += pl2;
            }
            mpl = mpl / sample.Count;
            mpl2 = mpl2 / sample.Count;

            // See if we can't do any better here. Transforming to zbar and w's looked ugly, but perhaps it
            // can be simplified? One interesting observation: if we take expectation values (which gives
            // the Fisher information matrix) the entries become simple:
            //   B_{\lambda \lambda} = \frac{N k^2}{\lambda^2}
            //   B_{\lambda k} = -\Gamma'(2) \frac{N}{\lambda}
            //   B_{k k } = [1 + \Gamma''(2)] \frac{N}{k^2}
            // Would it be bad to just use these directly?

            // Construct the curvature matrix and invert it.
            SymmetricMatrix B = new SymmetricMatrix(2);
            B[0, 0] = sample.Count * MoreMath.Sqr(k1 / lambda1);
            B[0, 1] = -sample.Count * k1 / lambda1 * mpl;
            B[1, 1] = sample.Count * (1.0 / MoreMath.Pow2(k1) + mpl2);
            SymmetricMatrix C = B.CholeskyDecomposition().Inverse();

            // Do a KS test to compare sample to best-fit distribution
            Distribution distribution = new WeibullDistribution(lambda1, k1);
            TestResult test = sample.KolmogorovSmirnovTest(distribution);

            // return the result
            return (new FitResult(new double[] {lambda1, k1}, C, test));
        }
        public void SymmetricRandomMatrixCholeskyDecomposition()
        {
            int d = 100;
            Random rng = new Random(d);
            ColumnVector[] V = new ColumnVector[d];
            for (int i=0; i < d; i++) {
                V[i] = new ColumnVector(d);
                for (int j = 0; j < d; j++) {
                    V[i][j] = rng.NextDouble();
                }
            }

            SymmetricMatrix A = new SymmetricMatrix(d);
            for (int i = 0; i < d; i++) {
                for (int j = 0; j <= i; j++) {
                    A[i, j] = V[i].Transpose() * V[j];
                }
            }

            Stopwatch s = Stopwatch.StartNew();
            CholeskyDecomposition CD = A.CholeskyDecomposition();
            s.Stop();
            Console.WriteLine("{0} {1}", d, s.ElapsedMilliseconds);

            Assert.IsTrue(CD != null);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Computes the Beta distribution that best fits the given sample.
        /// </summary>
        /// <param name="sample">The sample to fit.</param>
        /// <returns>The best fit parameters.</returns>
        /// <remarks>
        /// <para>The returned fit parameters are the &#x3B1; (<see cref="Alpha"/>) and  &#x3B2; (<see cref="Beta"/>) parameters, in that order.
        /// These are the same parameters, in the same order, that are required by the <see cref="BetaDistribution(double,double)"/> constructor to
        /// specify a new Beta distribution.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
        /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception>
        /// <exception cref="InvalidOperationException">Not all the entries in <paramref name="sample" /> lie between zero and one.</exception>
        public static FitResult FitToSample(Sample sample)
        {
            if (sample == null) throw new ArgumentNullException("sample");
            if (sample.Count < 3) throw new InsufficientDataException();

            // maximum likelyhood calculation
            //   \log L = \sum_i \left[ (\alpha-1) \log x_i + (\beta-1) \log (1-x_i) - \log B(\alpha,\beta) \right]
            // using \frac{\partial B(a,b)}{\partial a} = \psi(a) - \psi(a+b), we have
            //   \frac{\partial \log L}{\partial \alpha} = \sum_i \log x_i -     N \left[ \psi(\alpha) - \psi(\alpha+\beta) \right]
            //   \frac{\partial \log L}{\partial \beta}  = \sum_i \log (1-x_i) - N \left[ \psi(\beta)  - \psi(\alpha+\beta) \right]
            // set equal to zero to get equations for \alpha, \beta
            //   \psi(\alpha) - \psi(\alpha+\beta) = <\log x>
            //   \psi(\beta) - \psi(\alpha+\beta) = <\log (1-x)>

            // compute the mean log of x and (1-x)
            // these are the (logs of) the geometric means
            double ga = 0.0; double gb = 0.0;
            foreach (double value in sample) {
                if ((value <= 0.0) || (value >= 1.0)) throw new InvalidOperationException();
                ga += Math.Log(value); gb += Math.Log(1.0 - value);
            }
            ga /= sample.Count; gb /= sample.Count;

            // define the function to zero
            Func<IList<double>, IList<double>> f = delegate(IList<double> x) {
                double pab = AdvancedMath.Psi(x[0] + x[1]);
                return (new double[] {
                    AdvancedMath.Psi(x[0]) - pab - ga,
                    AdvancedMath.Psi(x[1]) - pab - gb
                });
            };

            // guess initial values using the method of moments
            //   M1 = \frac{\alpha}{\alpha+\beta} C2 = \frac{\alpha\beta}{(\alpha+\beta)^2 (\alpha+\beta+1)}
            // implies
            //   \alpha = M1 \left( \frac{M1 (1-M1)}{C2} - 1 \right)
            //   \beta = (1 - M1) \left( \frac{M1 (1-M1)}{C2} -1 \right)
            double m = sample.Mean; double mm = 1.0 - m;
            double q = m * mm / sample.Variance - 1.0;
            double[] x0 = new double[] { m * q, mm * q };

            // find the parameter values that zero the two equations
            IList<double> x1 = MultiFunctionMath.FindZero(f, x0);
            double a = x1[0]; double b = x1[1];

            // take more derivatives of \log L to get curvature matrix
            //   \frac{\partial^2 \log L}{\partial\alpha^2} = - N \left[ \psi'(\alpha) - \psi'(\alpha+\beta) \right]
            //   \frac{\partial^2 \log L}{\partial\beta^2}  = - N \left[ \psi'(\beta)  - \psi'(\alpha+\beta) \right]
            //   \frac{\partial^2 \log L}{\partial \alpha \partial \beta} = - N \psi'(\alpha+\beta)
            // covariance matrix is inverse of curvature matrix
            SymmetricMatrix CI = new SymmetricMatrix(2);
            CI[0, 0] = sample.Count * (AdvancedMath.Psi(1, a) - AdvancedMath.Psi(1, a + b));
            CI[1, 1] = sample.Count * (AdvancedMath.Psi(1, b) - AdvancedMath.Psi(1, a + b));
            CI[0, 1] = sample.Count * AdvancedMath.Psi(1, a + b);
            CholeskyDecomposition CD = CI.CholeskyDecomposition();
            SymmetricMatrix C = CD.Inverse();

            // do a KS test on the result
            TestResult test = sample.KolmogorovSmirnovTest(new BetaDistribution(a, b));

            // return the results
            FitResult result = new FitResult(x1, C, test);
            return (result);
        }