public void SolveTest() { float[][] value = // positive-definite { new float[] { 2, -1, 0 }, new float[] { -1, 2, -1 }, new float[] { 0, -1, 2 } }; JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value); float[][] L = chol.LeftTriangularFactor; float[][] B = Matrix.ColumnVector(new float[] { 1, 2, 3 }).ToArray(); float[][] expected = Matrix.ColumnVector(new float[] { 2.5f, 4.0f, 3.5f }).ToArray(); float[][] actual = chol.Solve(B); Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-6f)); Assert.AreNotEqual(actual, B); actual = chol.Solve(B, true); Assert.AreEqual(actual, B); Assert.IsTrue(Matrix.IsEqual(expected, B, 1e-6f)); }
public void InverseTestNaN() { int n = 5; var I = Matrix.Identity(n).ToSingle().ToArray(); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { var value = Matrix.Magic(n).ToArray().ToSingle(); value[i][j] = Single.NaN; bool thrown = false; var target = new JaggedCholeskyDecompositionF(value); try { target.Solve(I); } catch (NonPositiveDefiniteMatrixException) { thrown = true; } Assert.IsTrue(thrown); } } }
public void SolveTest2() { float[][] value = // not positive-definite { new float[] { 6, -1, 2, 6 }, new float[] { -1, 3, -3, -2 }, new float[] { 2, -3, 2, 0 }, new float[] { 6, -2, 0, 0 }, }; JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true); float[][] L = chol.LeftTriangularFactor; float[][] B = Matrix.Identity(4).ToSingle().ToArray(); float[][] expected = { new float[] { 0.4000f, 1.2000f, 1.4000f, -0.5000f }, new float[] { 1.2000f, 3.6000f, 4.2000f, -2.0000f }, new float[] { 1.4000f, 4.2000f, 5.4000f, -2.5000f }, new float[] { -0.5000f, -2.0000f, -2.5000f, 1.0000f }, }; float[][] actual = chol.Solve(B); Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-5f)); }
public void InverseTest1() { float[][] value = // positive-definite { new float[] { 2, -1, 0 }, new float[] { -1, 2, -1 }, new float[] { 0, -1, 2 } }; var chol = new JaggedCholeskyDecompositionF(value, robust: false); Assert.IsTrue(chol.IsPositiveDefinite); float[][] L = chol.LeftTriangularFactor; float[][] expected = { new float[] { 0.750f, 0.500f, 0.250f }, new float[] { 0.500f, 1.000f, 0.500f }, new float[] { 0.250f, 0.500f, 0.750f }, }; float[][] actual = chol.Inverse(); Assert.IsTrue(actual.IsEqual(expected, 1e-5f)); float[][] inv = chol.Solve(Jagged.Identity <float>(3)); Assert.IsTrue(inv.IsEqual(expected, 1e-5f)); }
public void SolveTest4() { float[][] value = // not positive-definite { new float[] { 6, -1, 2, 6 }, new float[] { -1, 3, -3, -2 }, new float[] { 2, -3, 2, 0 }, new float[] { 6, -2, 0, 0 }, }; JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true); float[][] L = chol.LeftTriangularFactor; float[] B = new float[] { 1, 2, 3, 4 }; float[] expected = { 5, 13, 16, -8 }; float[] actual = chol.Solve(B); for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i], 1e-3); } }
public void SolveTest4() { float[][] value = // not positive-definite { new float[] { 6, -1, 2, 6 }, new float[] { -1, 3, -3, -2 }, new float[] { 2, -3, 2, 0 }, new float[] { 6, -2, 0, 0 }, }; JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value, robust: true); float[][] L = chol.LeftTriangularFactor; float[] B = new float[] { 1, 2, 3, 4 }; float[] expected = { 5, 13, 16, -8 }; float[] actual = chol.Solve(B); for (int i = 0; i < actual.Length; i++) Assert.AreEqual(expected[i], actual[i], 1e-3); }
public void SolveTest3() { float[][] value = // positive-definite { new float[] { 2, -1, 0 }, new float[] { -1, 2, -1 }, new float[] { 0, -1, 2 } }; JaggedCholeskyDecompositionF chol = new JaggedCholeskyDecompositionF(value); float[][] L = chol.LeftTriangularFactor; float[] B = new float[] { 1, 2, 3 }; float[] expected = new float[] { 2.5f, 4.0f, 3.5f }; float[] actual = chol.Solve(B); Assert.IsTrue(Matrix.IsEqual(expected, actual, 1e-5f)); actual = chol.Solve(B, true); Assert.AreEqual(actual, B); Assert.IsTrue(Matrix.IsEqual(expected, B, 1e-5f)); }
/// <summary> /// Runs a single learning epoch. /// </summary> /// /// <param name="input">Array of input vectors.</param> /// <param name="output">Array of output vectors.</param> /// /// <returns>Returns summary learning error for the epoch.</returns> /// /// <remarks><para>The method runs one learning epoch, by calling running necessary /// iterations of the Levenberg Marquardt to achieve an error decrease.</para></remarks> /// public double RunEpoch(double[][] input, double[][] output) { // Initial definitions and memory allocations int N = input.Length; JaggedCholeskyDecompositionF decomposition = null; double sumOfSquaredErrors = 0.0; double sumOfSquaredWeights = 0.0; double trace = 0.0; // Set upper triangular Hessian to zero for (int i = 0; i < hessian.Length; i++) Array.Clear(hessian[i], i, hessian.Length - i); // Set Gradient vector to zero Array.Clear(gradient, 0, gradient.Length); // Divide the problem into blocks. Instead of computing // a single Jacobian and a single error vector, we will // be computing multiple Jacobians for smaller problems // and then sum all blocks into the final Hessian matrix // and gradient vector. int blockSize = input.Length / Blocks; int finalBlock = input.Length % Blocks; int jacobianSize = blockSize * outputCount; // Re-allocate the partial Jacobian matrix only if needed if (jacobian[0] == null || jacobian[0].Length < jacobianSize) { for (int i = 0; i < jacobian.Length; i++) this.jacobian[i] = new float[jacobianSize]; } // Re-allocate error vector only if needed if (errors == null || errors.Length < jacobianSize) errors = new double[jacobianSize]; // For each block for (int s = 0; s <= Blocks; s++) { if (s == Blocks && finalBlock == 0) continue; Trace.TraceInformation("Starting Jacobian block {0}/{1}", s + 1, Blocks); int B = (s == Blocks) ? finalBlock : blockSize; int[] block = Matrix.Indices(s * blockSize, s * blockSize + B); double[][] inputBlock = input.Submatrix(block); double[][] outputBlock = output.Submatrix(block); // Compute the partial Jacobian matrix if (method == JacobianMethod.ByBackpropagation) sumOfSquaredErrors = JacobianByChainRule(inputBlock, outputBlock); else sumOfSquaredErrors = JacobianByFiniteDifference(inputBlock, outputBlock); if (Double.IsNaN(sumOfSquaredErrors)) throw new ArithmeticException("Jacobian calculation has produced a non-finite number."); Trace.TraceInformation("Jacobian block finished."); // Compute error gradient using jacobian Trace.TraceInformation("Updating gradient."); for (int i = 0; i < jacobian.Length; i++) { double gsum = 0; for (int j = 0; j < jacobianSize; j++) gsum += jacobian[i][j] * errors[j]; gradient[i] += (float)gsum; } // Compute Quasi-Hessian Matrix approximation // using the outer project Jacobian (H ~ J'J) Trace.TraceInformation("Updating Hessian."); Parallel.For(0, jacobian.Length, i => { float[] ji = jacobian[i]; float[] hi = hessian[i]; for (int j = i; j < hi.Length; j++) { float[] jj = jacobian[j]; double hsum = 0; for (int k = 0; k < jacobianSize; k++) hsum += ji[k] * jj[k]; // The Hessian need only be upper-triangular, since // it is symmetric. The Cholesky decomposition will // make use of this fact and use the lower-triangular // portion to hold the decomposition, conserving memory. hi[j] += (float)(2 * beta * hsum); } }); } Trace.TraceInformation("Hessian computation finished."); // Store the Hessian's diagonal for future computations. The // diagonal will be destroyed in the decomposition, so it can // still be updated on every iteration by restoring this copy. for (int i = 0; i < hessian.Length; i++) diagonal[i] = hessian[i][i]; // Create the initial weights vector sumOfSquaredWeights = saveNetworkToArray(); // Define the objective function: (bayesian regularization objective) double objective = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights; double current = objective + 1.0; // (starting value to enter iteration) // Begin of the main Levenberg-Marcquardt method lambda /= v; // We'll try to find a direction with less error // (or where the objective function is smaller) while (current >= objective && lambda < lambdaMax) { lambda *= v; // Update diagonal (Levenberg-Marquardt) for (int i = 0; i < diagonal.Length; i++) hessian[i][i] = (float)(diagonal[i] + 2 * lambda + 2 * alpha); Trace.TraceInformation("Decomposition started."); // Decompose to solve the linear system. The Cholesky decomposition // is done in place, occupying the Hessian's lower-triangular part. decomposition = new JaggedCholeskyDecompositionF(hessian, robust: true, inPlace: true); Trace.TraceInformation("Decomposition ended."); // Check if the decomposition exists if (decomposition.IsNotDefined) { // The Hessian is singular. Continue to the next // iteration until the diagonal update transforms // it back to non-singular. continue; } Trace.TraceInformation("Solving linear system."); // Solve using Cholesky decomposition deltas = decomposition.Solve(gradient); Trace.TraceInformation("Updating weights."); // Update weights using the calculated deltas sumOfSquaredWeights = loadArrayIntoNetwork(); // Calculate the new error sumOfSquaredErrors = ComputeError(input, output); // Update the objective function current = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights; // If the object function is bigger than before, the method // is tried again using a greater damping factor. } // If this iteration caused a error drop, then next iteration // will use a smaller damping factor. lambda /= v; // If we are using bayesian regularization, we need to // update the bayesian hyperparameters alpha and beta if (useBayesianRegularization) { // References: // - http://www-alg.ist.hokudai.ac.jp/~jan/alpha.pdf // - http://www.inference.phy.cam.ac.uk/mackay/Bayes_FAQ.html // Compute the trace for the inverse hessian in place. The // Hessian which was still being hold together with the L // factorization will be destroyed after this computation. trace = decomposition.InverseTrace(destroy: true); // Poland update's formula: gamma = numberOfParameters - (alpha * trace); alpha = numberOfParameters / (2 * sumOfSquaredWeights + trace); beta = System.Math.Abs((N - gamma) / (2 * sumOfSquaredErrors)); //beta = (N - gama) / (2.0 * sumOfSquaredErrors); // Original MacKay's update formula: // gama = (double)networkParameters - (alpha * trace); // alpha = gama / (2.0 * sumOfSquaredWeights); // beta = (gama - N) / (2.0 * sumOfSquaredErrors); } return sumOfSquaredErrors; }
/// <summary> /// Runs a single learning epoch. /// </summary> /// /// <param name="input">Array of input vectors.</param> /// <param name="output">Array of output vectors.</param> /// /// <returns>Returns summary learning error for the epoch.</returns> /// /// <remarks><para>The method runs one learning epoch, by calling running necessary /// iterations of the Levenberg Marquardt to achieve an error decrease.</para></remarks> /// public double RunEpoch(double[][] input, double[][] output) { // Initial definitions and memory allocations int N = input.Length; JaggedCholeskyDecompositionF decomposition = null; double sumOfSquaredErrors = 0.0; double sumOfSquaredWeights = 0.0; double trace = 0.0; // Set upper triangular Hessian to zero for (int i = 0; i < hessian.Length; i++) { Array.Clear(hessian[i], i, hessian.Length - i); } // Set Gradient vector to zero Array.Clear(gradient, 0, gradient.Length); // Divide the problem into blocks. Instead of computing // a single Jacobian and a single error vector, we will // be computing multiple Jacobians for smaller problems // and then sum all blocks into the final Hessian matrix // and gradient vector. int blockSize = input.Length / Blocks; int finalBlock = input.Length % Blocks; int jacobianSize = blockSize * outputCount; // Re-allocate the partial Jacobian matrix only if needed if (jacobian[0] == null || jacobian[0].Length < jacobianSize) { for (int i = 0; i < jacobian.Length; i++) { this.jacobian[i] = new float[jacobianSize]; } } // Re-allocate error vector only if needed if (errors == null || errors.Length < jacobianSize) { errors = new double[jacobianSize]; } // For each block for (int s = 0; s <= Blocks; s++) { if (s == Blocks && finalBlock == 0) { continue; } int B = (s == Blocks) ? finalBlock : blockSize; int[] block = Vector.Range(s * blockSize, s * blockSize + B); double[][] inputBlock = input.Get(block); double[][] outputBlock = output.Get(block); // Compute the partial Jacobian matrix if (method == JacobianMethod.ByBackpropagation) { sumOfSquaredErrors = JacobianByChainRule(inputBlock, outputBlock); } else { sumOfSquaredErrors = JacobianByFiniteDifference(inputBlock, outputBlock); } if (Double.IsNaN(sumOfSquaredErrors)) { throw new ArithmeticException("Jacobian calculation has produced a non-finite number."); } // Compute error gradient using Jacobian for (int i = 0; i < jacobian.Length; i++) { double gsum = 0; for (int j = 0; j < jacobianSize; j++) { gsum += jacobian[i][j] * errors[j]; } gradient[i] += (float)gsum; } // Compute Quasi-Hessian Matrix approximation // using the outer product Jacobian (H ~ J'J) Parallel.For(0, jacobian.Length, ParallelOptions, i => { float[] ji = jacobian[i]; float[] hi = hessian[i]; for (int j = i; j < hi.Length; j++) { float[] jj = jacobian[j]; double hsum = 0; for (int k = 0; k < jacobianSize; k++) { hsum += ji[k] * jj[k]; } // The Hessian need only be upper-triangular, since // it is symmetric. The Cholesky decomposition will // make use of this fact and use the lower-triangular // portion to hold the decomposition, conserving memory. hi[j] += (float)(2 * beta * hsum); } }); } // Store the Hessian's diagonal for future computations. The // diagonal will be destroyed in the decomposition, so it can // still be updated on every iteration by restoring this copy. for (int i = 0; i < hessian.Length; i++) { diagonal[i] = hessian[i][i]; } // Create the initial weights vector sumOfSquaredWeights = saveNetworkToArray(); // Define the objective function: (Bayesian regularization objective) double objective = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights; double current = objective + 1.0; // (starting value to enter iteration) // Begin of the main Levenberg-Marquardt method lambda /= v; // We'll try to find a direction with less error // (or where the objective function is smaller) while (current >= objective && lambda < lambdaMax) { lambda *= v; // Update diagonal (Levenberg-Marquardt) for (int i = 0; i < diagonal.Length; i++) { hessian[i][i] = (float)(diagonal[i] + 2 * lambda + 2 * alpha); } // Decompose to solve the linear system. The Cholesky decomposition // is done in place, occupying the Hessian's lower-triangular part. decomposition = new JaggedCholeskyDecompositionF(hessian, robust: true, inPlace: true); // Check if the decomposition exists if (decomposition.IsUndefined) { // The Hessian is singular. Continue to the next // iteration until the diagonal update transforms // it back to non-singular. continue; } // Solve using Cholesky decomposition deltas = decomposition.Solve(gradient); // Update weights using the calculated deltas sumOfSquaredWeights = loadArrayIntoNetwork(); // Calculate the new error sumOfSquaredErrors = ComputeError(input, output); // Update the objective function current = beta * sumOfSquaredErrors + alpha * sumOfSquaredWeights; // If the object function is bigger than before, the method // is tried again using a greater damping factor. } // If this iteration caused a error drop, then next // iteration will use a smaller damping factor. lambda /= v; if (lambda < 1e-300) { lambda = 1e-300; } // If we are using Bayesian regularization, we need to // update the Bayesian hyperparameters alpha and beta if (useBayesianRegularization) { // References: // - http://www-alg.ist.hokudai.ac.jp/~jan/alpha.pdf // - http://www.inference.phy.cam.ac.uk/mackay/Bayes_FAQ.html // Compute the trace for the inverse Hessian in place. The // Hessian which was still being hold together with the L // factorization will be destroyed after this computation. trace = decomposition.InverseTrace(destroy: true); // Poland update's formula: gamma = numberOfParameters - (alpha * trace); alpha = numberOfParameters / (2 * sumOfSquaredWeights + trace); beta = System.Math.Abs((N - gamma) / (2 * sumOfSquaredErrors)); //beta = (N - gamma) / (2.0 * sumOfSquaredErrors); // Original MacKay's update formula: // gamma = (double)networkParameters - (alpha * trace); // alpha = gamma / (2.0 * sumOfSquaredWeights); // beta = (gamma - N) / (2.0 * sumOfSquaredErrors); } return(sumOfSquaredErrors); }
public void InverseTest1() { float[][] value = // positive-definite { new float[] { 2, -1, 0 }, new float[] { -1, 2, -1 }, new float[] { 0, -1, 2 } }; var chol = new JaggedCholeskyDecompositionF(value, robust: false); Assert.IsTrue(chol.IsPositiveDefinite); float[][] L = chol.LeftTriangularFactor; float[][] expected = { new float[] { 0.750f, 0.500f, 0.250f }, new float[] { 0.500f, 1.000f, 0.500f }, new float[] { 0.250f, 0.500f, 0.750f }, }; float[][] actual = chol.Inverse(); Assert.IsTrue(actual.IsEqual(expected, 1e-5f)); float[][] inv = chol.Solve(Jagged.Identity<float>(3)); Assert.IsTrue(inv.IsEqual(expected, 1e-5f)); }