public static void InverseDeterminant(DualMatrix matrix, out DualMatrix inverse, out DualNumber determinant) { int n = matrix.Rows; if (matrix.Columns != n) { throw new ArgumentException("The matrix is not a square matrix."); } DualNumber[,] a = matrix.ToArray(); if (!spdmatrixcholesky(ref a, n, false)) { throw new ArithmeticException(); } determinant = spdmatrixcholeskydet(ref a, n); int info = 0; spdmatrixcholeskyinverse(ref a, n, false, ref info); for (int i = 0; i < n; i++) { for (int j = i + 1; j < n; j++) { a[i, j] = a[j, i]; } } inverse = new DualMatrix(a); }
/// <summary> /// This method saves some time if both the inverse and the determinant is needed by only having to compute /// the inverse once. /// </summary> public static void InverseDeterminant(DualMatrix matrix, out DualMatrix inverse, out DualNumber determinant) { int m = matrix.Rows; if (matrix.Columns != m) { throw new ArgumentException("The matrix isn't a square matrix."); } if (m <= 3) { // The naive implementation seems to be faster (for all values of n). Maybe it's // faster to perform the LU decomposition directly on the DualNumbers? determinant = DeterminantDirect(m, matrix); inverse = InverseDirect(m, matrix, determinant); return; } int n = GradientLength(m, matrix); LUDecomposition lu = LUDecomposition.Decompose(matrix.GetValues()); Matrix inverse0 = lu.Inverse(); double determinant0 = lu.Determinant(); inverse = Inverse(matrix, m, n, inverse0); determinant = Determinant(matrix, m, n, inverse0, determinant0); }
public DualVector(DualNumber[] entries) { int n = entries.Length; DualNumber[,] a = new DualNumber[n, 1]; for (int i = 0; i < n; i++) { a[i, 0] = entries[i]; } inner = new DualMatrix(a); }
public DualMatrix(int rows, int columns, DualNumber value) { if (rows < 0 || columns < 0) { throw new ArgumentOutOfRangeException(); } entries = new DualNumber[rows, columns]; for (int i = 0; i < rows; i++) { for (int j = 0; j < columns; j++) { entries[i, j] = value; } } }
private static void rmatrixmv(int m, int n, ref DualNumber[,] a, int ia, int ja, int opa, ref DualNumber[] x, int ix, ref DualNumber[] y, int iy) { int i = 0; DualNumber v = 0; int i_ = 0; int i1_ = 0; if (m == 0) { return; } if (n == 0) { for (i = 0; i <= m - 1; i++) { y[iy + i] = 0; } return; } //if (ablasf.rmatrixmvf(m, n, ref a, ia, ja, opa, ref x, ix, ref y, iy)) //{ // return; //} if (opa == 0) { // // y = A*x // for (i = 0; i <= m - 1; i++) { i1_ = (ix) - (ja); v = 0.0; for (i_ = ja; i_ <= ja + n - 1; i_++) { v += a[ia + i, i_] * x[i_ + i1_]; } y[iy + i] = v; } return; } if (opa == 1) { throw new NotImplementedException(); } }
//hashCode public DualMatrix(DualNumber[,] entries) { if (entries == null) { throw new ArgumentNullException(); } for (int i = 0; i < entries.GetLength(0); i++) { for (int j = 0; j < entries.GetLength(1); j++) { if (entries[i, j] == null) { throw new ArgumentNullException(); } } } this.entries = (DualNumber[,])entries.Clone(); }
private static DualMatrix Inverse(DualMatrix matrix, int m, int n, Matrix inverse) { if (n < 0) { // Return a matrix without derivatives information (using the implicit matrix conversion). return inverse; } double[,][] gradientArrays = new double[m, m][]; // Compute all gradients in the first loop. These are used to compute the Hessian. for (int i0 = 0; i0 < m; i0++) { for (int j0 = 0; j0 < m; j0++) { double[] gradientArray = new double[n]; for (int i = 0; i < n; i++) { // Formula (36) in The Matrix Cookbook. double s = 0.0; for (int k = 0; k < m; k++) { for (int l = 0; l < m; l++) { s += inverse[i0, k] * matrix[k, l].Gradient[i] * inverse[l, j0]; } } gradientArray[i] = -s; } gradientArrays[i0, j0] = gradientArray; } } // Compute Hessians and DualNumber instances. DualNumber[,] a = new DualNumber[m, m]; for (int i0 = 0; i0 < m; i0++) { for (int j0 = 0; j0 < m; j0++) { double[] hessianArray = new double[n * (n + 1) / 2]; for (int i = 0, h = 0; i < n; i++) { for (int j = i; j < n; j++, h++) { double s = 0.0; for (int k = 0; k < m; k++) { for (int l = 0; l < m; l++) { s += gradientArrays[i0, k][i] * matrix[k, l].Gradient[j] * inverse[l, j0] + inverse[i0, k] * matrix[k, l].Gradient[h] * inverse[l, j0] + inverse[i0, k] * matrix[k, l].Gradient[j] * gradientArrays[l, j0][i]; } } hessianArray[h] = -s; } } a[i0, j0] = new DualNumber(inverse[i0, j0], gradientArrays[i0, j0], hessianArray); } } return new DualMatrix(a); }
public void Test(IPoint point) { // http://en.wikipedia.org/wiki/Finite_difference_coefficients //int k = 2; //int[] xdelta = new int[] { 0, 1 }; //double[] wdelta = new double[] { -1.0, 1.0 }; //int k = 2; //int[] xdelta = new int[] { -1, 1 }; //double[] wdelta = new double[] { -0.5, 0.5 }; int k = 4; int[] xdelta = new int[] { -2, -1, 1, 2 }; double[] wdelta = new double[] { 0.083333333333333329, -0.66666666666666663, 0.66666666666666663, -0.083333333333333329 }; //int k = 6; //int[] xdelta = new int[] { -3, -2, -1, 1, 2, 3 }; //double[] wdelta = new double[] { -0.016666666666666666, 0.15, -0.75, 0.75, -0.15, 0.016666666666666666 }; // The following test code is heavily inspired by Ipopt::TNLPAdapter::CheckDerivatives in IPOPT (see IpTNLPAdapter.cpp). Console.WriteLine("Evaluating unperturbed function"); Console.WriteLine(); DualNumber y = Compute(point); double[] delta = new double[n]; DualNumber[] ydelta = new DualNumber[n]; Console.WriteLine("Starting derivative checker for first derivatives"); Console.WriteLine(); for (int i = 0; i < n; i++) { double exact = y.Gradient[i]; try { if (point[variables[i]] != 0.0) { delta[i] = perturbation * Math.Abs(point[variables[i]]); } else { // Don't know the scale in this particular case. Choose an arbitrary scale (i.e. 1). delta[i] = perturbation; } double approx = 0.0; for (int j = 0; j < k; j++) { approx += wdelta[j] * ComputeDelta(point, i, xdelta[j] * delta[i]).Value; } approx /= delta[i]; ydelta[i] = ComputeDelta(point, i, delta[i]); //approx = (ydelta[i].Value - y.Value) / delta[i]; double relativeError = Math.Abs(approx - exact) / Math.Max(1.0, Math.Abs(approx)); bool error = relativeError >= tolerance; if (error || showAll) { Console.WriteLine("{0} Gradient [{1} {2}] = {3} ~ {4} [{5}]", error ? "*" : " ", FormatIndex(-1), FormatIndex(i), FormatValue(exact), FormatValue(approx), FormatRelativeValue(relativeError)); } } catch (ArithmeticException) { Console.WriteLine("* Gradient [{0} {1}] = {2} ~ FAILED", FormatIndex(-1), FormatIndex(i), FormatValue(exact)); } } Console.WriteLine(); Console.WriteLine(); Console.WriteLine("Starting derivative checker for second derivatives"); Console.WriteLine(); for (int i = 0; i < n; i++) { // Though the Hessian is supposed to be symmetric test the full matrix anyway // (the finite difference could very well be different and provide insight). for (int j = 0; j < n; j++) { double exact = y.Hessian[i, j]; try { double approx = 0.0; for (int l = 0; l < k; l++) { approx += wdelta[l] * ComputeDelta(point, i, xdelta[l] * delta[i]).Gradient[j]; } approx /= delta[i]; //approx = (ydelta[i].Gradient[j] - y.Gradient[j]) / delta[i]; double relativeError = Math.Abs(approx - exact) / Math.Max(1.0, Math.Abs(approx)); bool error = relativeError >= tolerance; if (error || showAll) { Console.WriteLine("{0} Hessian [{1},{2}] = {3} ~ {4} [{5}]", error ? "*" : " ", FormatIndex(i), FormatIndex(j), FormatValue(exact), FormatValue(approx), FormatRelativeValue(relativeError)); } } catch (ArithmeticException) { Console.WriteLine("* Hessian [{0},{1}] = {2} ~ FAILED", FormatIndex(i), FormatIndex(j), FormatValue(exact)); } } } }
public static DualNumber Sqrt(DualNumber f) { double g = Math.Sqrt(f.value); double g1 = 0.5 / g; double g11 = -0.5 * g1 / f.value; return new DualNumber(f, g, g1, g11); }
public static DualNumber Sqr(DualNumber f) { return new DualNumber(f, f.value * f.value, 2.0 * f.value, 2.0); }
private static bool spdmatrixcholeskyrec(ref DualNumber[,] a, int offs, int n, bool isupper, ref DualNumber[] tmp) { bool result = new bool(); //int n1 = 0; //int n2 = 0; // // check N // if (n < 1) { result = false; return result; } // // special cases // if (n == 1) { if (a[offs, offs].Value > 0.0) { a[offs, offs] = DualNumber.Sqrt(a[offs, offs]); result = true; } else { result = false; } return result; } //if (n <= ablas.ablasblocksize(ref a)) { result = spdmatrixcholesky2(ref a, offs, n, isupper, ref tmp); return result; } }
public static DualNumber Min(DualNumber f, DualNumber g) { // Like the step function. return f.Value < g.Value ? f : g; }
public DualMatrix SetEntry(int row, int column, DualNumber value) { if (row < 0 || row >= Rows || column < 0 || column >= Columns) { throw new ArgumentOutOfRangeException(); } DualMatrix a = new DualMatrix(entries); a[row, column] = value; return a; }
public static DualNumber Exp(DualNumber f) { double g = Math.Exp(f.value); double g1 = g; double g11 = g; return new DualNumber(f, g, g1, g11); }
public static DualNumber Log(DualNumber f) { double g = Math.Log(f.value); double g1 = 1.0 / f.value; double g11 = -g1 / f.value; return new DualNumber(f, g, g1, g11); }
static DualNumber() { // Used very often (e.g. in matrix initialization). zero = new DualNumber(0.0); }
/// <summary> /// Perform the binary operation h(x)=g(f_1(x),f_2(x)). Using the chain rule we're able to compute /// h(x), h'(x), and h''(x). The value and the first and second partial derivatives of the outer /// function (the binary operation) must be specified. /// </summary> /// <param name="f1">The first inner function f_1 (left of the binary operator).</param> /// <param name="f2">The second inner function f_2 (right of the binary operator).</param> /// <param name="g">g(f_1(x),f_2(x)).</param> /// <param name="g1">The partial derivative $\frac{\partial g}{\partial x_1}(f_1(x),f_2(x))$.</param> /// <param name="g2">The partial derivative $\frac{\partial g}{\partial x_2}(f_1(x),f_2(x))$.</param> /// <param name="g11">The partial derivative $\frac{\partial^2g}{\partial x_1^2}(f_1(x),f_2(x))$.</param> /// <param name="g12">The partial derivative $\frac{\partial^2g}{\partial x_1\partial x_2}(f_1(x),f_2(x))$.</param> /// <param name="g22">The partial derivative $\frac{\partial^2g}{\partial x_2^2}(f_1(x),f_2(x))$.</param> public DualNumber(DualNumber f1, DualNumber f2, double g, double g1, double g2, double g11, double g12, double g22) { value = g; if (f1.gradientArray != null || f2.gradientArray != null) { if (f1.gradientArray != null && f2.gradientArray != null && f1.n != f2.n) { throw new ArgumentException("Inconsistent number of derivatives."); } // One of the counters may be zero if the corresponding DualNumber is a constant. n = Math.Max(f1.n, f2.n); gradientArray = new double[n]; if (g1 != 0.0 && f1.gradientArray != null) { for (int i = 0; i < n; i++) { gradientArray[i] += g1 * f1.gradientArray[i]; } } if (g2 != 0.0 && f2.gradientArray != null) { for (int i = 0; i < n; i++) { gradientArray[i] += g2 * f2.gradientArray[i]; } } if (f1.hessianArray != null || f2.hessianArray != null || g11 != 0.0 || g12 != 0.0 || g22 != 0.0) { hessianArray = new double[HessianSize(n)]; if (g1 != 0.0 && f1.hessianArray != null) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g1 * f1.hessianArray[k]; } } } if (g2 != 0.0 && f2.hessianArray != null) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g2 * f2.hessianArray[k]; } } } if (g11 != 0.0 && f1.gradientArray != null) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g11 * f1.gradientArray[i] * f1.gradientArray[j]; } } } if (g22 != 0.0 && f2.gradientArray != null) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g22 * f2.gradientArray[i] * f2.gradientArray[j]; } } } if (g12 != 0.0 && f1.gradientArray != null && f2.gradientArray != null) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g12 * (f1.gradientArray[i] * f2.gradientArray[j] + f2.gradientArray[i] * f1.gradientArray[j]); } } } } } }
/// <summary> /// Perform the unary operation h(x)=g(f(x)). Using the chain rule we're able to compute /// h(x), h'(x), and h''(x). The value and the first and second derivative of the outer /// function (the unary operation) must be specified. /// </summary> /// <param name="f">The inner function f.</param> /// <param name="g">g(f(x)).</param> /// <param name="g1">g'(f(x)).</param> /// <param name="g11">g''(f(x)).</param> public DualNumber(DualNumber f, double g, double g1, double g11) { value = g; if (f.gradientArray != null) { n = f.n; gradientArray = new double[n]; if (g1 != 0.0) { for (int i = 0; i < n; i++) { gradientArray[i] += g1 * f.gradientArray[i]; } } if (f.hessianArray != null || g11 != 0.0) { hessianArray = new double[HessianSize(n)]; if (g1 != 0.0 && f.hessianArray != null) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g1 * f.hessianArray[k]; } } } if (g11 != 0.0) { for (int i = 0, k = 0; i < n; i++) { for (int j = i; j < n; j++, k++) { hessianArray[k] += g11 * f.gradientArray[i] * f.gradientArray[j]; } } } } } }
private static DualMatrix InverseDirect(int m, DualMatrix matrix, DualNumber determinant) { if (m == 1) { DualNumber a11 = matrix[0, 0]; return new DualMatrix(new DualNumber[,] { { 1.0 / a11 } }); } else if (m == 2) { DualNumber a11 = matrix[0, 0]; DualNumber a12 = matrix[0, 1]; DualNumber a21 = matrix[1, 0]; DualNumber a22 = matrix[1, 1]; return new DualMatrix(new DualNumber[,] { { a22 / determinant, -a12 / determinant }, { -a21 / determinant, a11 / determinant } }); } else if (m == 3) { DualNumber a11 = matrix[0, 0]; DualNumber a12 = matrix[0, 1]; DualNumber a13 = matrix[0, 2]; DualNumber a21 = matrix[1, 0]; DualNumber a22 = matrix[1, 1]; DualNumber a23 = matrix[1, 2]; DualNumber a31 = matrix[2, 0]; DualNumber a32 = matrix[2, 1]; DualNumber a33 = matrix[2, 2]; // Using http://mathworld.wolfram.com/MatrixInverse.html. return new DualMatrix(new DualNumber[,] { { (a22 * a33 - a23 * a32) / determinant, (a13 * a32 - a12 * a33) / determinant, (a12 * a23 - a13 * a22) / determinant }, { (a23 * a31 - a21 * a33) / determinant, (a11 * a33 - a13 * a31) / determinant, (a13 * a21 - a11 * a23) / determinant }, { (a21 * a32 - a22 * a31) / determinant, (a12 * a31 - a11 * a32) / determinant, (a11 * a22 - a12 * a21) / determinant } }); } else { throw new NotImplementedException(); } }
public DualMatrix(Matrix values, Matrix[] gradients, Matrix[,] hessians) { if (values == null || gradients == null || hessians == null) { throw new ArgumentNullException(); } int rows = values.Rows; int columns = values.Columns; entries = new DualNumber[rows, columns]; int n = 0; if (gradients != null) { n = gradients.Length; for (int i = 0; i < n; i++) { if (gradients[i] == null) { throw new ArgumentNullException("gradients", "The gradients must be fully specified."); } if (gradients[i].Rows != rows || gradients[i].Columns != columns) { throw new ArgumentException("Inconsistent matrix sizes."); } } } if (hessians != null) { if (gradients == null) { throw new ArgumentException("The gradients must be specified if the Hessians are specified."); } if (hessians.GetLength(0) != n || hessians.GetLength(1) != n) { throw new ArgumentException("Inconsistent number of derivatives."); } for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { if (hessians[i, j] == null) { throw new ArgumentNullException("hessians", "The Hessians must be fully specified."); } if (hessians[i, j].Rows != rows || hessians[i, j].Columns != columns) { throw new ArgumentException("Inconsistent matrix sizes."); } } } } for (int i = 0; i < rows; i++) { for (int j = 0; j < columns; j++) { double value = values[i, j]; Vector gradient = null; if (gradients != null) { double[] a = new double[n]; for (int k = 0; k < n; k++) { a[k] = gradients[k][i, j]; } gradient = new Vector(a); } Matrix hessian = null; if (hessians != null) { double[,] a = new double[n, n]; for (int k = 0; k < n; k++) { for (int l = 0; l < n; l++) { a[k, l] = hessians[k, l][i, j]; } } hessian = new Matrix(a); } entries[i, j] = new DualNumber(value, gradient, hessian); } } }
/************************************************************************* Copyright (c) 2005-2007, Sergey Bochkanov (ALGLIB project). >>> SOURCE LICENSE >>> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation (www.fsf.org); either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. A copy of the GNU General Public License is available at http://www.fsf.org/licensing/licenses >>> END OF LICENSE >>> *************************************************************************/ private static DualNumber spdmatrixdet(DualNumber[,] a, int n, bool isupper) { DualNumber result = 0; a = (DualNumber[,])a.Clone(); if (!spdmatrixcholesky(ref a, n, isupper)) { result = -1; } else { result = spdmatrixcholeskydet(ref a, n); } return result; }
public DualNumber[] ToArray() { int n = Length; DualNumber[] a = new DualNumber[n]; for (int i = 0; i < n; i++) { a[i] = inner[i, 0]; } return a; }
public static DualNumber Pow(DualNumber f, double a) { double g = Math.Pow(f.value, a); double g1 = a * Math.Pow(f.value, a - 1.0); double g11 = a * (a - 1.0) * Math.Pow(f.value, a - 2.0); return new DualNumber(f, g, g1, g11); }
private static void spdmatrixinverse(ref DualNumber[,] a, int n, bool isupper, ref int info) { if (n < 1) { info = -1; return; } info = 1; if (spdmatrixcholesky(ref a, n, isupper)) { spdmatrixcholeskyinverse(ref a, n, isupper, ref info); } else { info = -3; } }
public DualNumber[][] ToJaggedArray() { DualNumber[][] entries = new DualNumber[Rows][]; for (int i = 0; i < Rows; i++) { entries[i] = new DualNumber[Columns]; for (int j = 0; j < Columns; j++) { entries[i][j] = this[i, j]; } } return entries; }
public static DualNumber Pow(double a, DualNumber f) { double g = Math.Pow(a, f.value); double c = Math.Log(a); double g1 = c * g; double g11 = c * g1; return new DualNumber(f, g, g1, g11); }
public DualVector SetEntry(int index, DualNumber t) { return new DualVector(inner.SetEntry(index, 0, t)); }
public static DualNumber Pow(DualNumber f1, DualNumber f2) { double g = Math.Pow(f1.value, f2.value); double c1 = Math.Pow(f1.value, f2.value - 1.0); double g1 = f2.value * c1; double g11 = f2.value * (f2.value - 1.0) * Math.Pow(f1.value, f2.value - 2.0); double c2 = Math.Log(f1.value); double g2 = c2 * g; double g22 = c2 * g2; double g12 = c1 * (1.0 + c2 * f2.value); return new DualNumber(f1, f2, g, g1, g2, g11, g12, g22); }
public static DualNumber Sin(DualNumber f) { double g = Math.Sin(f.value); double g1 = Math.Cos(f.value); double g11 = -g; return new DualNumber(f, g, g1, g11); }
private static void spdmatrixcholeskyinverserec(ref DualNumber[,] a, int offs, int n, bool isupper, ref DualNumber[] tmp) { int i = 0; int j = 0; DualNumber v = 0; //int n1 = 0; //int n2 = 0; int info2 = 0; //matinvreport rep2 = new matinvreport(); int i_ = 0; int i1_ = 0; if (n < 1) { return; } // // Base case // //if (n <= ablas.ablasblocksize(ref a)) { rmatrixtrinverserec(ref a, offs, n, isupper, false, ref tmp, ref info2); if (isupper) { throw new NotImplementedException(); } else { // // Compute the product L' * L // NOTE: we never assume that diagonal of L is real // for (i = 0; i <= n - 1; i++) { if (i == 0) { // // 1x1 matrix // a[offs + i, offs + i] = DualNumber.Sqr(a[offs + i, offs + i]); } else { // // (I+1)x(I+1) matrix, // // ( A11^H A21^H ) ( A11 ) ( A11^H*A11+A21^H*A21 A21^H*A22 ) // ( ) * ( ) = ( ) // ( A22^H ) ( A21 A22 ) ( A22^H*A21 A22^H*A22 ) // // A11 is IxI, A22 is 1x1. // i1_ = (offs) - (0); for (i_ = 0; i_ <= i - 1; i_++) { tmp[i_] = a[offs + i, i_ + i1_]; } for (j = 0; j <= i - 1; j++) { v = a[offs + i, offs + j]; i1_ = (0) - (offs); for (i_ = offs; i_ <= offs + j; i_++) { a[offs + j, i_] = a[offs + j, i_] + v * tmp[i_ + i1_]; } } v = a[offs + i, offs + i]; for (i_ = offs; i_ <= offs + i - 1; i_++) { a[offs + i, i_] = v * a[offs + i, i_]; } a[offs + i, offs + i] = DualNumber.Sqr(a[offs + i, offs + i]); } } } return; } }