/// <summary>Computes the Matrix-matrix transpose product alpha*D*transpose(V)</summary> public static floatMatrix DiagTranspMatProd(floatDiag D, floatMatrix u, float alpha, ref floatMatrix ans) { if (ans != null && (ans.Rows != u.Cols || ans.Cols != u.Rows)) throw new Exception("ans length should match transpose(u)"); if (u.Cols != D.Rows) throw new Exception("u Cols should match D dimension"); if (ans == null) ans = new floatMatrix(new float[u.Cols, u.Rows]); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { u.CLCoef.WriteToDevice(new float[] { alpha }); kernelDiagTranspMatProd.Execute(new CLCalc.Program.MemoryObject[] { D.CLValues, u.CLValues, u.CLCoef, ans.CLValues }, new int[] { u.Cols, u.Rows }); } else { int NN = u.Cols; int MM = u.Rows; for (int j = 0; j < u.Rows; j++) { for (int i = 0; i < D.Rows; i++) { ans.Values[j + MM * i] = alpha * D.Values[i] * u.Values[i + NN * j]; } } } return ans; }
/// <summary>Computes transpose(A)*A and transpose(A)*b weighted by W</summary> /// <param name="A">Original matrix</param> /// <param name="W">Measurement weight vector</param> /// <param name="lambda">Regularization term</param> /// <param name="AtA">Answer, A transpose times A</param> private static floatSymPosDefMatrix AuxLeastSquaresAtAnoCL(floatMatrix A, floatDiag W, floatVector lambda, ref floatSymPosDefMatrix AtA) { //A (mxn), AtA (nxn) positive semidef symmetric int m = A.Rows; int n = A.Cols; if (AtA == null) AtA = new floatSymPosDefMatrix(new float[(n * (n + 1)) >> 1]); if (W != null) { for (int i = 0; i < n; i++) { for (int j = 0; j <= i; j++) { double val = 0; for (int k = 0; k < m; k++) { val += A[k, i] * A[k, j] * W.Values[k]; } AtA.Values[((i * (i + 1)) >> 1) + j] = (float)val; } } } else { for (int i = 0; i < n; i++) { for (int j = 0; j <= i; j++) { double val = 0; for (int k = 0; k < m; k++) { val += A[k, i] * A[k, j]; } AtA.Values[((i * (i + 1)) >> 1) + j] = (float)val; } } } //regularization term for (int i = 0; i < n; i++) { AtA.Values[((i * (i + 1)) >> 1) + i] += lambda.Values[i]; } return AtA; }
/// <summary>Computes transpose(A)*A and transpose(A)*b weighted by W using OpenCL. Lambda is regularization term</summary> private static floatSymPosDefMatrix AuxLSAtACL(floatMatrix A, floatDiag W, floatVector lambda, ref floatSymPosDefMatrix AtA) { if (AtA == null || AtA.CLValues.OriginalVarLength != (A.Cols * (A.Cols + 1)) >> 1) { AtA = new floatSymPosDefMatrix(new float[(A.Cols * (A.Cols + 1)) >> 1]); } CLCalc.Program.Variable[] args = new CLCalc.Program.Variable[] { A.CLValues, A.CLDim, W.CLValues, AtA.CLValues, lambda.CLValues }; kernelComputeAtWA.Execute(args, AtA.CLValues.OriginalVarLength); //Just modified values in CL memory, matrix is no longer Cholesky factorized AtA.IsCholeskyFactorized = false; return AtA; }
/// <summary>Computes transpose(A)*diag(W)*b*alpha</summary> /// <param name="A">Original matrix</param> /// <param name="b">Vector to multiply</param> /// <param name="W">Measurement weight vector</param> /// <param name="alpha">Multiplication constant</param> /// <param name="ans">Answer. If null, gets created</param> public static floatVector MatrTraspVecMult(floatMatrix A, floatDiag W, floatVector b, float alpha, ref floatVector ans) { int m = A.Rows; int n = A.Cols; if (ans == null) ans = new floatVector(new float[A.Cols]); if (A.Rows != W.Rows) throw new Exception("Incompatible A and W dimensions"); if (A.Rows != b.Length) throw new Exception("Incompatible A and b dimensions"); if (A.Cols != ans.Length) throw new Exception("Incompatible A and ans dimensions"); if (CLCalc.CLAccelerationType.UsingCL == CLCalc.CLAcceleration) { b.CLCoef.WriteToDevice(new float[] {alpha}); kernelTranspMatrVecProdW.Execute(new CLCalc.Program.MemoryObject[] { A.CLValues, A.CLDim, b.CLValues, b.CLCoef, W.CLValues, ans.CLValues }, A.Cols); } else { for (int i = 0; i < n; i++) { double val = 0; for (int k = 0; k < m; k++) { val += A[k, i] * b.Values[k] * W.Values[k] * alpha; } ans.Values[i] = (float)val; } } return ans; }
/// <summary>Computes transpose(A)*diag(W)*b*alpha</summary> /// <param name="A">Original matrix</param> /// <param name="b">Vector to multiply</param> /// <param name="W">Measurement weight vector</param> /// <param name="ans">Answer. If null, gets created</param> public static floatVector MatrTraspVecMult(floatMatrix A, floatDiag W, floatVector b, ref floatVector ans) { return MatrTraspVecMult(A, W, b, 1.0f, ref ans); }
/// <summary>Computes transpose(A)*A using weights W</summary> /// <param name="A">Original matrix</param> /// <param name="W">Measurement weight vector</param> /// <param name="lambda">Regularization term</param> /// <param name="AtA">Answer, A transpose times A</param> public static floatSymPosDefMatrix MatrTranspMatrProd(floatMatrix A, floatDiag W, floatVector lambda, ref floatSymPosDefMatrix AtA) { if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { return AuxLSAtACL(A, W, lambda, ref AtA); } else { return AuxLeastSquaresAtAnoCL(A, W, lambda, ref AtA); } }
/// <summary>Computes the Matrix-vector product alpha*D*u</summary> public static floatVector DiagVecProd(floatDiag D, floatVector u, float alpha, ref floatVector ans) { if (ans != null && ans.Length != D.Rows) throw new Exception("ans length should match D dimension"); if (u.Length != D.Rows) throw new Exception("u length should match D dimension"); if (ans == null) ans = new floatVector(new float[D.Rows]); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) { u.CLCoef.WriteToDevice(new float[] { alpha }); kernelDiagVecProd.Execute(new CLCalc.Program.MemoryObject[] { D.CLValues, u.CLValues, u.CLCoef, ans.CLValues }, D.Rows); } else { for (int i = 0; i < D.Rows; i++) { ans.Values[i] = alpha * D.Values[i] * u.Values[i]; } } return ans; }
/// <summary>Computes nonlinear least squares using user functions to evaluate residues and their gradients</summary> /// <param name="f">Function that computes residues [m] and their gradients [grad r1; grad r2] m x n (each gradient in one line) [i,j] = gradR[i,j]</param> /// <param name="x">Intial guess</param> /// <param name="m">Number of residue equations</param> /// <param name="maxiter">Maximum number of iterations</param> /// <param name="err">Adjustment error</param> public static float[] NonLinearLS(ComputeResidueGrad f, float[] x, int m, int maxiter, ref double err) { int n = x.Length; float eps = 5e-5f * 0.5f; float alpha = 0.002f; float[,] A = new float[m, n]; float[] r = new float[m]; floatMatrix CLA = new floatMatrix(A); floatVector CLr = new floatVector(r); floatVector CLlambda = new floatVector(new float[CLA.Cols]); float[] ww = new float[CLA.Rows]; for (int i = 0; i < ww.Length; i++) ww[i] = 1; floatDiag CLW = new floatDiag(ww); float[] v = new float[CLA.Cols]; floatVector CLv = new floatVector(v); double errAnt = 0; for (int i = 0; i < maxiter; i++) { //Computes residues and gradient f(x, ref r, ref A, true); CLA.SetValues(A); CLr.CLValues.WriteToDevice(r); errAnt = err; err = NormAtb(A, r, m, n); //if (errAnt == err) it means algorithm is not converging at all if (err < eps || errAnt == err || double.IsNaN(err)) i = maxiter; else { floatSymPosDefMatrix AtA = null; AtA = BLAS.MatrTranspMatrProd(CLA, CLlambda, ref AtA); CLv = BLAS.MatrTraspVecMult(CLA, CLW, CLr, ref CLv); if (CLCalc.CLAcceleration == CLCalc.CLAccelerationType.UsingCL) CLv.CLValues.ReadFromDeviceTo(CLv.Values); v = AtA.LinearSolve(CLv.Values); for (int k = 0; k < v.Length; k++) v[k] = -v[k]; //Line search //||r||² float normRSquared = 0; for (int k = 0; k < r.Length; k++) normRSquared += r[k] * r[k]; //2transpose(r)Av float transpRAv = 0; for (int p = 0; p < m; p++) { float val = 0; for (int q = 0; q < n; q++) val += A[p, q] * v[q]; transpRAv += r[p] * val; } transpRAv *= 2.0f; float t = 2.0f; //iterates while sum(ri*(x+tv)^2)>||r||²+alpha*2*transpose(r)*A*v*t float lhs = 1; float rhs = 0; float[] newX = (float[])x.Clone(); while (lhs > rhs) { t *= 0.5f; //Update x for (int k = 0; k < x.Length; k++) newX[k] = x[k] + v[k] * t; //Update r f(newX, ref r, ref A, false); lhs = 0; for (int k = 0; k < m; k++) lhs += r[k] * r[k]; rhs = normRSquared + alpha * transpRAv * t; } x = newX; } } return x; }