Exemple #1
0
        public void SolveTest()
        {
            float[][] value = // positive-definite
            {
                new float[] {  2, -1,  0 },
                new float[] { -1,  2, -1 },
                new float[] {  0, -1,  2 }
            };

            JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value);

            float[][] L = chol.LeftTriangularFactor;

            float[][] B = Matrix.ColumnVector(new float[] { 1, 2, 3 }).ToArray();

            float[][] expected = Matrix.ColumnVector(new float[] { 2.5f, 4.0f, 3.5f }).ToArray();

            float[][] actual = chol.Solve(B);
            Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-6f));
            Assert.AreNotEqual(actual, B);

            actual = chol.Solve(B, true);
            Assert.AreEqual(actual, B);
            Assert.IsTrue(Matrix.IsEqual(expected, B, 1e-6f));
        }
Exemple #2
0
        public void InverseTestNaN()
        {
            int n = 5;

            var I = Matrix.Identity(n).ToSingle().ToArray();

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < n; j++)
                {
                    var value = Matrix.Magic(n).ToArray().ToSingle();

                    value[i][j] = Single.NaN;

                    bool thrown = false;

                    var target = new JaggedCholeskyDecompositionF(value);

                    try
                    {
                        target.Solve(I);
                    }
                    catch (NonPositiveDefiniteMatrixException)
                    {
                        thrown = true;
                    }

                    Assert.IsTrue(thrown);
                }
            }
        }
Exemple #3
0
        public void SolveTest2()
        {
            float[][] value = // not positive-definite
            {
                new float[] {  6, -1,  2,  6 },
                new float[] { -1,  3, -3, -2 },
                new float[] {  2, -3,  2,  0 },
                new float[] {  6, -2,  0,  0 },
            };

            JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true);

            float[][] L = chol.LeftTriangularFactor;

            float[][] B = Matrix.Identity(4).ToSingle().ToArray();

            float[][] expected =
            {
                new float[] {  0.4000f,  1.2000f,  1.4000f, -0.5000f },
                new float[] {  1.2000f,  3.6000f,  4.2000f, -2.0000f },
                new float[] {  1.4000f,  4.2000f,  5.4000f, -2.5000f },
                new float[] { -0.5000f, -2.0000f, -2.5000f,  1.0000f },
            };

            float[][] actual = chol.Solve(B);

            Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-5f));
        }
        public void InverseTestNaN()
        {
            int n = 5;

            var I = Matrix.Identity(n).ToSingle().ToArray();

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < n; j++)
                {
                    var value = Matrix.Magic(n).ToArray().ToSingle();

                    value[i][j] = Single.NaN;

                    bool thrown = false;

                    var target = new JaggedCholeskyDecompositionF(value);

                    try
                    {
                        target.Solve(I);
                    }
                    catch (NonPositiveDefiniteMatrixException)
                    {
                        thrown = true;
                    }

                    Assert.IsTrue(thrown);
                }
            }
        }
Exemple #5
0
        public void InverseTest1()
        {
            float[][] value = // positive-definite
            {
                new float[] {  2, -1,  0 },
                new float[] { -1,  2, -1 },
                new float[] {  0, -1,  2 }
            };

            var chol = new JaggedCholeskyDecompositionF(value, robust: false);

            Assert.IsTrue(chol.IsPositiveDefinite);
            float[][] L = chol.LeftTriangularFactor;

            float[][] expected =
            {
                new float[] { 0.750f, 0.500f, 0.250f },
                new float[] { 0.500f, 1.000f, 0.500f },
                new float[] { 0.250f, 0.500f, 0.750f },
            };

            float[][] actual = chol.Inverse();
            Assert.IsTrue(actual.IsEqual(expected, 1e-5f));

            float[][] inv = chol.Solve(Jagged.Identity <float>(3));
            Assert.IsTrue(inv.IsEqual(expected, 1e-5f));
        }
Exemple #6
0
        public void SolveTest4()
        {
            float[][] value = // not positive-definite
            {
                new float[] {  6, -1,  2,  6 },
                new float[] { -1,  3, -3, -2 },
                new float[] {  2, -3,  2,  0 },
                new float[] {  6, -2,  0,  0 },
            };

            JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true);

            float[][] L = chol.LeftTriangularFactor;

            float[] B = new float[] { 1, 2, 3, 4 };

            float[] expected = { 5, 13, 16, -8 };
            float[] actual   = chol.Solve(B);

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 1e-3);
            }
        }
        public void SolveTest4()
        {
            float[][] value = // not positive-definite
            {
               new float[] {  6, -1,  2,  6 },
               new float[] { -1,  3, -3, -2 },
               new float[] {  2, -3,  2,  0 },
               new float[] {  6, -2,  0,  0 },
            };

            JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true);
            float[][] L = chol.LeftTriangularFactor;

            float[] B = new float[] { 1, 2, 3, 4 };

            float[] expected = { 5, 13, 16, -8 };
            float[] actual = chol.Solve(B);

            for (int i = 0; i < actual.Length; i++)
                Assert.AreEqual(expected[i], actual[i], 1e-3);
        }
        public void SolveTest2()
        {
            float[][] value = // not positive-definite
            {
               new float[] {  6, -1,  2,  6 },
               new float[] { -1,  3, -3, -2 },
               new float[] {  2, -3,  2,  0 },
               new float[] {  6, -2,  0,  0 },
            };

            JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true);
            float[][] L = chol.LeftTriangularFactor;

            float[][] B = Matrix.Identity(4).ToSingle().ToArray();

            float[][] expected = 
            {
                new float[] { 0.4000f,    1.2000f,    1.4000f,   -0.5000f },
                new float[] { 1.2000f,    3.6000f,    4.2000f,   -2.0000f },
                new float[] { 1.4000f,    4.2000f,    5.4000f,   -2.5000f },
                new float[] { -0.5000f,  -2.0000f,   -2.5000f,    1.0000f }, 
            };

            float[][] actual = chol.Solve(B);

            Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-5f));
        }
        public void SolveTest3()
        {
            float[][] value = // positive-definite
            {
               new float[] {  2, -1,  0 },
               new float[] { -1,  2, -1 },
               new float[] {  0, -1,  2 }
            };

            JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value);
            float[][] L = chol.LeftTriangularFactor;

            float[] B = new float[] { 1, 2, 3 };

            float[] expected = new float[] { 2.5f, 4.0f, 3.5f };
            float[] actual = chol.Solve(B);

            Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-5f));

            actual = chol.Solve(B, true);
            Assert.AreEqual(actual, B);
            Assert.IsTrue(Matrix.IsEqual(expected, B, 1e-5f));
        }
        /// <summary>
        ///   Runs a single learning epoch.
        /// </summary>
        /// 
        /// <param name="input">Array of input vectors.</param>
        /// <param name="output">Array of output vectors.</param>
        /// 
        /// <returns>Returns summary learning error for the epoch.</returns>
        /// 
        /// <remarks><para>The method runs one learning epoch, by calling running necessary
        /// iterations of the Levenberg Marquardt to achieve an error decrease.</para></remarks>
        ///
        public double RunEpoch(double[][] input, double[][] output)
        {
            // Initial definitions and memory allocations
            int N = input.Length;

            JaggedCholeskyDecompositionF decomposition = null;
            double sumOfSquaredErrors = 0.0;
            double sumOfSquaredWeights = 0.0;
            double trace = 0.0;

            // Set upper triangular Hessian to zero
            for (int i = 0; i < hessian.Length; i++)
                Array.Clear(hessian[i], i, hessian.Length - i);

            // Set Gradient vector to zero
            Array.Clear(gradient, 0, gradient.Length);


            // Divide the problem into blocks. Instead of computing
            // a single Jacobian and a single error vector, we will
            // be computing multiple Jacobians for smaller problems
            // and then sum all blocks into the final Hessian matrix
            // and gradient vector.

            int blockSize = input.Length / Blocks;
            int finalBlock = input.Length % Blocks;
            int jacobianSize = blockSize * outputCount;

            // Re-allocate the partial Jacobian matrix only if needed
            if (jacobian[0] == null || jacobian[0].Length < jacobianSize)
            {
                for (int i = 0; i < jacobian.Length; i++)
                    this.jacobian[i] = new float[jacobianSize];
            }

            // Re-allocate error vector only if needed
            if (errors == null || errors.Length < jacobianSize)
                errors = new double[jacobianSize];


            // For each block
            for (int s = 0; s <= Blocks; s++)
            {
                if (s == Blocks && finalBlock == 0)
                    continue;

                Trace.TraceInformation("Starting Jacobian block {0}/{1}", s + 1, Blocks);

                int B = (s == Blocks) ? finalBlock : blockSize;
                int[] block = Matrix.Indices(s * blockSize, s * blockSize + B);

                double[][] inputBlock = input.Submatrix(block);
                double[][] outputBlock = output.Submatrix(block);


                // Compute the partial Jacobian matrix
                if (method == JacobianMethod.ByBackpropagation)
                    sumOfSquaredErrors = JacobianByChainRule(inputBlock, outputBlock);
                else
                    sumOfSquaredErrors = JacobianByFiniteDifference(inputBlock, outputBlock);

                if (Double.IsNaN(sumOfSquaredErrors))
                    throw new ArithmeticException("Jacobian calculation has produced a non-finite number.");

                Trace.TraceInformation("Jacobian block finished.");


                // Compute error gradient using jacobian
                Trace.TraceInformation("Updating gradient.");
                for (int i = 0; i < jacobian.Length; i++)
                {
                    double gsum = 0;
                    for (int j = 0; j < jacobianSize; j++)
                        gsum += jacobian[i][j] * errors[j];
                    gradient[i] += (float)gsum;
                }


                // Compute Quasi-Hessian Matrix approximation
                //  using the outer project Jacobian (H ~ J'J)
                Trace.TraceInformation("Updating Hessian.");
                Parallel.For(0, jacobian.Length, i =>
                {
                    float[] ji = jacobian[i];
                    float[] hi = hessian[i];

                    for (int j = i; j < hi.Length; j++)
                    {
                        float[] jj = jacobian[j];

                        double hsum = 0;
                        for (int k = 0; k < jacobianSize; k++)
                            hsum += ji[k] * jj[k];

                        // The Hessian need only be upper-triangular, since
                        // it is symmetric. The Cholesky decomposition will
                        // make use of this fact and use the lower-triangular
                        // portion to hold the decomposition, conserving memory.
                        hi[j] += (float)(2 * beta * hsum);
                    }
                });
            }

            Trace.TraceInformation("Hessian computation finished.");

            // Store the Hessian's diagonal for future computations. The
            // diagonal will be destroyed in the decomposition, so it can
            // still be updated on every iteration by restoring this copy.
            for (int i = 0; i < hessian.Length; i++)
                diagonal[i] = hessian[i][i];

            // Create the initial weights vector
            sumOfSquaredWeights = saveNetworkToArray();


            // Define the objective function: (bayesian regularization objective)
            double objective = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights;
            double current = objective + 1.0; // (starting value to enter iteration)


            // Begin of the main Levenberg-Marcquardt method
            lambda /= v;

            // We'll try to find a direction with less error
            //  (or where the objective function is smaller)
            while (current >= objective && lambda < lambdaMax)
            {
                lambda *= v;

                // Update diagonal (Levenberg-Marquardt)
                for (int i = 0; i < diagonal.Length; i++)
                    hessian[i][i] = (float)(diagonal[i] + 2 * lambda + 2 * alpha);

                Trace.TraceInformation("Decomposition started.");

                // Decompose to solve the linear system. The Cholesky decomposition
                // is done in place, occupying the Hessian's lower-triangular part.
                decomposition = new JaggedCholeskyDecompositionF(hessian, robust: true, inPlace: true);

                Trace.TraceInformation("Decomposition ended.");

                // Check if the decomposition exists
                if (decomposition.IsNotDefined)
                {
                    // The Hessian is singular. Continue to the next
                    // iteration until the diagonal update transforms
                    // it back to non-singular.
                    continue;
                }

                Trace.TraceInformation("Solving linear system.");

                // Solve using Cholesky decomposition
                deltas = decomposition.Solve(gradient);

                Trace.TraceInformation("Updating weights.");

                // Update weights using the calculated deltas
                sumOfSquaredWeights = loadArrayIntoNetwork();

                // Calculate the new error
                sumOfSquaredErrors = ComputeError(input, output);

                // Update the objective function
                current = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights;

                // If the object function is bigger than before, the method
                //  is tried again using a greater damping factor.
            }

            // If this iteration caused a error drop, then next iteration
            //  will use a smaller damping factor.
            lambda /= v;


            // If we are using bayesian regularization, we need to
            //   update the bayesian hyperparameters alpha and beta
            if (useBayesianRegularization)
            {
                // References: 
                // - http://www-alg.ist.hokudai.ac.jp/~jan/alpha.pdf
                // - http://www.inference.phy.cam.ac.uk/mackay/Bayes_FAQ.html

                // Compute the trace for the inverse hessian in place. The
                // Hessian which was still being hold together with the L
                // factorization will be destroyed after this computation.
                trace = decomposition.InverseTrace(destroy: true);


                // Poland update's formula:
                gamma = numberOfParameters - (alpha * trace);
                alpha = numberOfParameters / (2 * sumOfSquaredWeights + trace);
                beta = System.Math.Abs((N - gamma) / (2 * sumOfSquaredErrors));
                //beta = (N - gama) / (2.0 * sumOfSquaredErrors);

                // Original MacKay's update formula:
                //  gama = (double)networkParameters - (alpha * trace);
                //  alpha = gama / (2.0 * sumOfSquaredWeights);
                //  beta = (gama - N) / (2.0 * sumOfSquaredErrors);
            }

            return sumOfSquaredErrors;
        }
        /// <summary>
        ///   Runs a single learning epoch.
        /// </summary>
        ///
        /// <param name="input">Array of input vectors.</param>
        /// <param name="output">Array of output vectors.</param>
        ///
        /// <returns>Returns summary learning error for the epoch.</returns>
        ///
        /// <remarks><para>The method runs one learning epoch, by calling running necessary
        /// iterations of the Levenberg Marquardt to achieve an error decrease.</para></remarks>
        ///
        public double RunEpoch(double[][] input, double[][] output)
        {
            // Initial definitions and memory allocations
            int N = input.Length;

            JaggedCholeskyDecompositionF decomposition = null;
            double sumOfSquaredErrors  = 0.0;
            double sumOfSquaredWeights = 0.0;
            double trace = 0.0;

            // Set upper triangular Hessian to zero
            for (int i = 0; i < hessian.Length; i++)
            {
                Array.Clear(hessian[i], i, hessian.Length - i);
            }

            // Set Gradient vector to zero
            Array.Clear(gradient, 0, gradient.Length);


            // Divide the problem into blocks. Instead of computing
            // a single Jacobian and a single error vector, we will
            // be computing multiple Jacobians for smaller problems
            // and then sum all blocks into the final Hessian matrix
            // and gradient vector.

            int blockSize    = input.Length / Blocks;
            int finalBlock   = input.Length % Blocks;
            int jacobianSize = blockSize * outputCount;

            // Re-allocate the partial Jacobian matrix only if needed
            if (jacobian[0] == null || jacobian[0].Length < jacobianSize)
            {
                for (int i = 0; i < jacobian.Length; i++)
                {
                    this.jacobian[i] = new float[jacobianSize];
                }
            }

            // Re-allocate error vector only if needed
            if (errors == null || errors.Length < jacobianSize)
            {
                errors = new double[jacobianSize];
            }


            // For each block
            for (int s = 0; s <= Blocks; s++)
            {
                if (s == Blocks && finalBlock == 0)
                {
                    continue;
                }

                int   B     = (s == Blocks) ? finalBlock : blockSize;
                int[] block = Vector.Range(s * blockSize, s * blockSize + B);

                double[][] inputBlock  = input.Get(block);
                double[][] outputBlock = output.Get(block);


                // Compute the partial Jacobian matrix
                if (method == JacobianMethod.ByBackpropagation)
                {
                    sumOfSquaredErrors = JacobianByChainRule(inputBlock, outputBlock);
                }
                else
                {
                    sumOfSquaredErrors = JacobianByFiniteDifference(inputBlock, outputBlock);
                }

                if (Double.IsNaN(sumOfSquaredErrors))
                {
                    throw new ArithmeticException("Jacobian calculation has produced a non-finite number.");
                }


                // Compute error gradient using Jacobian
                for (int i = 0; i < jacobian.Length; i++)
                {
                    double gsum = 0;
                    for (int j = 0; j < jacobianSize; j++)
                    {
                        gsum += jacobian[i][j] * errors[j];
                    }
                    gradient[i] += (float)gsum;
                }


                // Compute Quasi-Hessian Matrix approximation
                //  using the outer product Jacobian (H ~ J'J)
                Parallel.For(0, jacobian.Length, ParallelOptions, i =>
                {
                    float[] ji = jacobian[i];
                    float[] hi = hessian[i];

                    for (int j = i; j < hi.Length; j++)
                    {
                        float[] jj = jacobian[j];

                        double hsum = 0;
                        for (int k = 0; k < jacobianSize; k++)
                        {
                            hsum += ji[k] * jj[k];
                        }

                        // The Hessian need only be upper-triangular, since
                        // it is symmetric. The Cholesky decomposition will
                        // make use of this fact and use the lower-triangular
                        // portion to hold the decomposition, conserving memory.
                        hi[j] += (float)(2 * beta * hsum);
                    }
                });
            }


            // Store the Hessian's diagonal for future computations. The
            // diagonal will be destroyed in the decomposition, so it can
            // still be updated on every iteration by restoring this copy.
            for (int i = 0; i < hessian.Length; i++)
            {
                diagonal[i] = hessian[i][i];
            }

            // Create the initial weights vector
            sumOfSquaredWeights = saveNetworkToArray();


            // Define the objective function: (Bayesian regularization objective)
            double objective = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights;
            double current   = objective + 1.0; // (starting value to enter iteration)


            // Begin of the main Levenberg-Marquardt method
            lambda /= v;

            // We'll try to find a direction with less error
            //  (or where the objective function is smaller)
            while (current >= objective && lambda < lambdaMax)
            {
                lambda *= v;

                // Update diagonal (Levenberg-Marquardt)
                for (int i = 0; i < diagonal.Length; i++)
                {
                    hessian[i][i] = (float)(diagonal[i] + 2 * lambda + 2 * alpha);
                }

                // Decompose to solve the linear system. The Cholesky decomposition
                // is done in place, occupying the Hessian's lower-triangular part.
                decomposition = new JaggedCholeskyDecompositionF(hessian, robust: true, inPlace: true);

                // Check if the decomposition exists
                if (decomposition.IsUndefined)
                {
                    // The Hessian is singular. Continue to the next
                    // iteration until the diagonal update transforms
                    // it back to non-singular.
                    continue;
                }


                // Solve using Cholesky decomposition
                deltas = decomposition.Solve(gradient);

                // Update weights using the calculated deltas
                sumOfSquaredWeights = loadArrayIntoNetwork();

                // Calculate the new error
                sumOfSquaredErrors = ComputeError(input, output);

                // Update the objective function
                current = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights;

                // If the object function is bigger than before, the method
                //  is tried again using a greater damping factor.
            }

            // If this iteration caused a error drop, then next
            // iteration will use a smaller damping factor.
            lambda /= v;

            if (lambda < 1e-300)
            {
                lambda = 1e-300;
            }


            // If we are using Bayesian regularization, we need to
            //   update the Bayesian hyperparameters alpha and beta
            if (useBayesianRegularization)
            {
                // References:
                // - http://www-alg.ist.hokudai.ac.jp/~jan/alpha.pdf
                // - http://www.inference.phy.cam.ac.uk/mackay/Bayes_FAQ.html

                // Compute the trace for the inverse Hessian in place. The
                // Hessian which was still being hold together with the L
                // factorization will be destroyed after this computation.
                trace = decomposition.InverseTrace(destroy: true);


                // Poland update's formula:
                gamma = numberOfParameters - (alpha * trace);
                alpha = numberOfParameters / (2 * sumOfSquaredWeights + trace);
                beta  = System.Math.Abs((N - gamma) / (2 * sumOfSquaredErrors));
                //beta = (N - gamma) / (2.0 * sumOfSquaredErrors);

                // Original MacKay's update formula:
                //  gamma = (double)networkParameters - (alpha * trace);
                //  alpha = gamma / (2.0 * sumOfSquaredWeights);
                //  beta = (gamma - N) / (2.0 * sumOfSquaredErrors);
            }

            return(sumOfSquaredErrors);
        }
        public void InverseTest1()
        {
            float[][] value = // positive-definite
            {
               new float[] {  2, -1,  0 },
               new float[] { -1,  2, -1 },
               new float[] {  0, -1,  2 }
            };

            var chol = new JaggedCholeskyDecompositionF(value, robust: false);
            Assert.IsTrue(chol.IsPositiveDefinite);
            float[][] L = chol.LeftTriangularFactor;

            float[][] expected = 
            {
                new float[] { 0.750f, 0.500f, 0.250f },
                new float[] { 0.500f, 1.000f, 0.500f },
                new float[] { 0.250f, 0.500f, 0.750f },
            };

            float[][] actual = chol.Inverse();
            Assert.IsTrue(actual.IsEqual(expected, 1e-5f));

            float[][] inv = chol.Solve(Jagged.Identity<float>(3));
            Assert.IsTrue(inv.IsEqual(expected, 1e-5f));
        }