public qr_decomp_GS(matrix A) { int n = A.size1; int m = A.size2; q = A.copy(); r = new matrix(m, m); vector qi = new vector(n); vector qj = new vector(n); for (int i = 0; i < m; i++) { qi = q.col_toVector(i); r[i, i] = Sqrt(qi.dot(qi)); for (int k = 0; k < n; k++) { q[k, i] = q[k, i] / r[i, i]; } for (int j = i + 1; j < m; j++) { qi = q.col_toVector(i); qj = q.col_toVector(j); r[i, j] = qi.dot(qj); for (int k = 0; k < n; k++) { q[k, j] = q[k, j] - q[k, i] * r[i, j]; } } } }
// The following method performs the inverse iteration method on a real symmetric matrix A public static int inverse_iteration(matrix A, ref double s, ref vector v, double tau = 1e-6, double eps = 1e-6, int n_max = 999, int updates = 999) { int n = 0; int m = 0; matrix As; matrix I = new matrix(A.size1, A.size1); I.set_identity(); v = v / v.norm(); As = A - s * I; qr As_QR = new qr(As); double abs = 0; double rel = 0; while (converge(v, A, s, tau, eps, ref abs, ref rel) && n < n_max) { v = As_QR.solve(v); v = v / v.norm(); s = v.dot(A * v); if (m > updates) // Update QR decomposition if Rayleigh updates are used (if updates<999) { m = 0; s = v.dot(A * v) / (v.dot(v)); As = A - s * I; As_QR = new qr(As); } n++; m++; } s = v.dot(A * v) / (v.norm() * v.norm()); v = v / v.norm(); return(n); }
}//Method: gradient public static vector sr1(Func <vector, double> f, vector x, double acc = 1e-6) { double fx = f(x); vector gx = gradient(f, x); matrix B = matrix.id(x.size); int nsteps = 0; while (nsteps < 999) { nsteps++; vector delta_x = -B * gx; if (delta_x.norm() < eps * x.norm()) { Error.WriteLine($"broyden: |delta_x|<eps*|x|"); break; } if (gx.norm() < acc) { Error.WriteLine($"broyden: |gx|<acc"); break; } double fz, lambda = 1, alpha = 1e-4; vector z, s; while (true) { s = delta_x * lambda; z = x + s; fz = f(z); if (fz < fx + alpha * s.dot(gx)) { break; } //Stop if step is good if (lambda < eps) { B.setid(); break; } //If step is too small: reset B and stop lambda /= 2; } vector gz = gradient(f, z); vector y = gz - gx; vector u = s - B * y; if (Abs(u.dot(y)) > 1e-6) { B += matrix.outer(u, u) / (u.dot(y)); } x = z; gx = gz; fx = fz; } WriteLine($"nsteps={nsteps}"); return(x); } //Method: sr1
}//broyden public static int broyden (Func <vector, vector> f, ref vector x, double eps = 1e-3) { vector fx = f(x), z, fz; matrix J = jacobian(f, x, fx); var qrJ = new qrdecomposition(J); matrix B = qrJ.inverse(); int nsteps = 0; while (++nsteps < 999) { vector Dx = -B * fx; double s = 1; while (true) { z = x + Dx * s; fz = f(z); if (fz.norm() < (1 - s / 2) * fx.norm()) { break; } if (s < 1.0 / 32) { J = jacobian(f, x, fx); qrJ = new qrdecomposition(J); B = qrJ.inverse(); break; } s /= 2; } vector dx = z - x; vector df = fz - fx; if (dx.dot(df) > 1e-9) { vector c = (dx - B * df) / dx.dot(df); B.update(c, dx); } //vector c=(dx-B*df)/(df%df); B.update(c,df); //vector c=(dx-B*df)/(dx%(B*df)); B.update(c,B*df); x = z; fx = fz; if (fx.norm() < eps) { break; } } return(nsteps); }//broyden
public static Tuple <vector, int> qnewton(Func <vector, double> f, vector x0, double acc = 1e-3, double eps = 1.0 / 4194304) { vector x = x0.copy(); double fx = f(x); matrix B = new matrix(x.size, x.size); B.set_unity(); int n = 0; vector gx = gradient(f, x, eps); while (n < 1000) { vector Dx = -B * gx; n++; if (gx.norm() < acc || Dx.norm() < eps * x.norm()) { break; } double lam = 1.0; // Commence backtracking vector z; double fz; do { z = x + Dx * lam; fz = f(z); if (fz < fx) { break; // Defines a good step } if (lam < eps) // Here we just don't care { B.set_unity(); break; } lam /= 2; }while(true); vector s = z - x; vector gz = gradient(f, z, eps); vector y = gz - gx; vector u = s - B * y; double uTy = u.dot(y); // Updating B if (Abs(uTy) > 1e-6) { for (int i = 0; i < B.size1; i++) { for (int j = 0; j < B.size2; j++) { B[i, j] += u[i] * u[j] * (1 / uTy); } } } x = z; gx = gz; fx = fz; } return(Tuple.Create(x, n)); }
public static int qnewton(Func <vector, double> f, ref vector x) { double eps = 1e-3; // Hessian Matrix int n = x.size; matrix B = new matrix(n, n); B.set_unity(); // Starting values double fx = f(x); vector gx = gradient(f, x); vector delta_x; int counts = 0; do { counts++; delta_x = -B * gx; // Back-tracking linesearch double fz, l = 1.0; while (true) { fz = f(x + l * delta_x); if (fz < fx) { break; } if (l < EPS) { B.set_unity(); break; } l = l / 2; } vector s = l * delta_x; vector gz = gradient(f, x + s); vector y = gz - gx; vector u = s - B * y; double udoty = u.dot(y); if (Abs(udoty) > 10e-6) { B = B + outer(u, u) * 1 / udoty; } x += s; gx = gz; fx = fz; } while(gx.norm() > eps && delta_x.norm() > EPS * x.norm()); return(counts); }
public static int qnewton ( Func <vector, double> f, /* objective function */ ref vector xstart, /* starting point */ double eps /* accuracy goal, on exit |gradient| should be <eps */ ) { double fx = f(xstart); vector grad_fx = gradient(f, xstart); matrix A = matrix.id(xstart.size); int n_steps = 0; while (n_steps < 999) { n_steps++; var Dx = -A * grad_fx; if (Dx.norm() < EPS * xstart.norm()) { break; } if (grad_fx.norm() < eps) { break; } double lam = 1.0; vector y; double fy; while (true) { y = xstart + Dx * lam; fy = f(y); if (fy < fx) { break; } if (lam < EPS) { A.setid(); break; } lam /= 2; } vector s = y - xstart; vector grad_fy = gradient(f, y); vector z = grad_fy - grad_fx; vector u = s - A * z; double uTz = u.dot(z); if (Abs(uTz) > 1e-6) { A.update(u, u, 1.0 / uTz); } xstart = y; grad_fx = grad_fy; fx = fy; } return(n_steps); }
public static (vector, int) qnewton(Func <vector, double> f, vector x0, double eps) { int n = 0; // Number of steps vector x = x0.copy(); // Position vector vector s; // Position vector double fx = f(x); // Function value double fxs; // Function value vector gx = gradient(f, x); // Gradient vector vector gxs; // Gradient vector double a = 1e-4; // Alpha in Armijo condition matrix B = matrix.id(x.size); // Inverse Hessian, initially set to the identity matrix while (eps < gx.norm()) // The accuracy goal { n++; vector Dx = -B * gx; // Equation 6 double lambda = 1; s = lambda * Dx; // Equation 8 fxs = f(x + s); while (!(fxs < fx + a * s.dot(gx))) // Backtracking { s = lambda * Dx; // Equation 8 fxs = f(x + s); if (lambda < 1.0 / Pow(2, 5)) { B = matrix.id(x.size); // Reset B if lambda becomes to small break; } lambda /= 2; // Halve the step size } gxs = gradient(f, x + s); vector y = gxs - gx; // Statement after eq 12 vector u = s - B * y; // Statement after eq 12 double uty = u.dot(y); // Denominatior of eq 18 if (Abs(uty) > eps) // Condition for eq 18 { B.update(u, u, 1 / uty); // SR1 update } // Prepare for next iteration x = x + s; gx = gxs; fx = fxs; } return(x, n); // Return the vector x, and the number of steps taken }
public static vector qnewton(Func<vector, double> f, vector xstart, double acc=1e-7){ int nsteps = 0, n = xstart.size; vector x = xstart; matrix B = new matrix(n,n); B.set_identity(); vector g = gradient(f,x); vector dx = -B*g; // Lineseach while(nsteps<999){ if(g.norm() < acc){ Error.WriteLine("Norm of Gradient < accuracy goal"); break; } // if if(dx.norm() < eps*x.norm()){ Error.WriteLine("|dx| < eps*|x|"); break; } // if double fx = f(x), lambda = 1; vector increm = lambda*dx; while(f(x+increm) > fx){ lambda /= 2; increm = lambda*dx; if(lambda < eps){ B.set_identity(); break; } // if } // while nsteps++; vector y = gradient(f, x+increm); vector dy = y - g; vector u = increm-B*dy; double uTdy = u%dy; if(Abs(increm.dot(dy)) > eps){ B.update(u,u,1/uTdy); }// if x += increm; fx = f(x); g = y; dx = -B*g; } // while return x; } // qnewton
public static void qr_gs_decomp(matrix A, matrix R) { int m = A.size2; for (int i = 0; i < m; i++) { vector a_i = A[i]; R[i, i] = a_i.norm(); vector q = a_i / R[i, i]; A[i] = q; for (int j = i + 1; j < m; j++) { vector a_j = A[j]; R[i, j] = q.dot(a_j); A[j] = a_j - q * R[i, j]; } } }
public static void decomp(matrix A, matrix R) { int m = A.size2; for (int i = 0; i < m; i++) { vector ai = A[i]; R[i, i] = ai.norm(); vector qi = ai / R[i, i]; A[i] = qi; for (int j = i + 1; j < m; j++) { vector aj = A[j]; R[i, j] = qi.dot(aj); A[j] = aj - qi * R[i, j]; } } }
static void Main() { ann network = new ann(5); // X^2 // Func<double,double> f = (X) => {return 10-X*X;}; Func <double, double> f = (X) => { return(Sin(X)); }; int N = 5; double a = -PI + 10, b = PI + 10; vector x = linspace(a, b, N); x.print("x"); vector y = new vector(N); double meanx = sum(x) / N; double sum2 = x.dot(x) / N; WriteLine(meanx); WriteLine(sum2); double std = Sqrt(sum2 - meanx * meanx); vector xnormalized = (x - meanx) / std; System.IO.StreamWriter outputfile = new System.IO.StreamWriter("out.tabfun.sin.txt", append: false); for (int i = 0; i < N; i++) { y[i] = f(x[i]); outputfile.WriteLine($"{x[i]} {y[i]}"); } outputfile.Close(); network.train(xnormalized, y); vector xs = linspace(a, b, 100); outputfile = new System.IO.StreamWriter("out.fitfun.sin.txt", append: false); for (int i = 0; i < 100; i++) { double xnormal = (xs[i] - meanx) / std; outputfile.WriteLine("{0} {1}", xs[i], network.feedforwad(xnormal)); } outputfile.Close(); }
// returns (min, steps) public static (vector, int) qnewton(Func <vector, double> f, vector x0, double eps) { int n = 0; // total number of steps vector x = x0.copy(), s; // position vectors double fx = f(x), fxs; // function values vector gx = gradient(f, x), gxs; // gradient vectors matrix B = matrix.id(x.size); // inverse Hessian, initialized to I while (eps < gx.norm()) // continue until the sum of diffs is almost zero { n++; vector Dx = -B * gx; // eq 6 double min = 1.0 / Pow(2, limit), l = 1.0; // min is the smallest step allowed, l is lambda do // backtracking { s = l * Dx; // step, eq 8 fxs = f(x + s); if (l < min) { B = matrix.id(x.size); // advised to reset B if l < min by the text break; } l /= 2; // halve the step size }while (!(fxs < fx + a * s.dot(gx))); // armijo condition gxs = gradient(f, x + s); vector y = gxs - gx; // eq 12 vector u = s - B * y; double denom = u.dot(y); if (Abs(denom) > eps) // eq 18, abs very important { B.update(u, u, 1 / denom); // dmitri has apparently already specified this operation } // B += u.dot(u)/denom; // prepare for next iteration x = x + s; gx = gxs; fx = fxs; } return(x, n); }
public static vector qnewton( Func <vector, double> f, // vector xstart, double eps, ref int steps, matrix B = null // the inverse of the hesse matrix ) { steps += 1; int n = xstart.size; // Initiate the B matrix if it isn't given: if (B == null) { B = new matrix(n, n); B.set_identity(); } // 1: calculate grad_f: vector grad_f = gradient(f, xstart); // 2: Calculate delta_x: vector delta_x = -B * grad_f; // 3: Do the backtracking to find the actual step s: double alpha = 1e-4; int invlambda = 1; while ((invlambda <= 64) && (f(xstart + delta_x / invlambda) >= f(xstart) + alpha * (delta_x / invlambda).dot(grad_f))) { invlambda *= 2; } vector s = delta_x / invlambda; // If lambda becomes too small, reset B: if (invlambda > 64) { B.set_identity(); } // 4: Calculate the error and compare with the accuracy goal: // Notes: err = Abs(grad_phi): // first the gradient of the next step: vector grad_f_s = gradient(f, xstart + s); double err = grad_f_s.simpleNorm(); if (steps > 999) { Error.Write($"qnewton: Maximum number of steps reached ({steps} steps), terminating minimization.\n"); return(xstart + s); } else if (err < eps) { return(xstart + s); } else { // 5: If the step was not final, // Do SR1 update and do another step: // Calculate y vector y = grad_f_s - grad_f; // Calculate u: vector u = s - B * y; if (Abs(u.dot(y)) < 1e-6) { return(qnewton(f, xstart + s, eps, ref steps, B)); } else { // Calculate delta_B: matrix delta_B = u.outer(u) / (u.dot(y)); // Do another step: return(qnewton(f, xstart + s, eps, ref steps, B + delta_B)); } } }
// Base method where the matrix A does not need to be explicitly // accecible, just that A*v can be calculated for arbitrary vector v. public static void iterations( Func <vector, vector> applyA, // The function v -> A*v matrix V, // The n x m matrix to contain V matrix T // The m x m matrix to contain the tridiagonal T matrix ) { // A is a n x n matrix: int n = V.size1; int m = T.size1; // Make the starting vector: vector v0 = matrixHelp.makeRandUnitVector(n); // Do the first itteration: vector w0prime = applyA(v0); double alpha0 = w0prime.dot(v0); vector w0 = w0prime - alpha0 * v0; // Initialize T and V: T[0, 0] = alpha0; for (int i = 0; i < n; i++) { V[i, 0] = v0[i]; } // Save the variables of the loop that will be overwritten. // (Note: This is just new names, so w0 and v0 will be overwritten // in the loop, but they are no longer needed.) vector wprev = w0; vector vprev = v0; // Do the following itterations: for (int j = 1; j < m; j++) { // Calculate betaj: double betaj = wprev.norm(); // Now for the next vector the fast way is: // vj = wj-1/(beta_j) // But stable way that Dimitri seems to want is: // Make an arbitrary unit vector, and make it orthogonal to all // the previous vj's by gram schmitt // I think I will try to use the above vj and orthogonalize that vector vj = wprev / betaj; // Now orthogonalize it to all previous vjs: vector uj = vj.copy(); for (int i = 0; i < j; i++) { // Maybe I can assume that the columns of u are normed? // This operation might be made more efficient... uj -= vj.dot(V[i]) / (V[i].norm()) * V[i]; } // normalize uj to be sure, then assign it to vj again: vj = uj / uj.norm(); // Make the new wjprime: vector wjprime = applyA(vj); // Make the alphaj and wj: double alphaj = wjprime.dot(vj); vector wj = wjprime - alphaj * vj - betaj * vprev; // Update the V and T matrix. T[j, j] = alphaj; T[j, j - 1] = betaj; T[j - 1, j] = betaj; V[j] = vj; // Set up for the next itteration: wprev = wj; vprev = vj; } }
} //end gradient public static vector qnewton(Func <vector, double> f, vector x, double eps, ref int nr) { // as a start the B matrix is set to identety: matrix B = new matrix(x.size, x.size); B.set_identity(); //define bool bool check1 = true; while (check1) { nr++; //update the nummber of steps... //calculate the gradiant of f(x): vector nabla = gradient(f, x); //delta x can be calculated as: vector delx = (-1) * B * nabla; //back-tracking linesearch: double al = 1e-4; double lam = 1; bool check2 = true; while (check2) { if (lam < 1.0 / 64) //stopping while-loop if lam to small and reset B { check2 = false; B.set_identity(); } //end if else if (f(x + lam * delx) < (f(x) + al * lam * (delx.dot(nabla)))) //accept if the step is good { check2 = false; } //end else if else { lam = lam / 2; // } //end else } //end while //updata x: x=x+s x = x + lam * delx; //checking the error: vector nabla2 = gradient(f, x); //note if the while loop runs more then one thsi gradient is calculated 2 times, there might be a way around this... double err = nabla2.norm(); if (nr > 999) //here 999 is the max number of steps the function is allowed { check1 = false; } //end if else if (err < eps) // if the error is smaller then epsilon the result is accepted... { check1 = false; } //end else if else { //befor the the nest iteration the rank-1 opdata: vector y = nabla2 - nabla; vector u = lam * delx - B * y; if (Abs(u.dot(y)) > 1e-6) //the update { matrix delB = new matrix(x.size, x.size); for (int i = 0; i < x.size; i++) //calculate delB { for (int k = 0; k < x.size; k++) { delB[i][k] = u[i] * u[k] / (u.dot(y)); } //end for } //end for B = B + delB; }// end if } //end else } //end while return(x); } //end qnewton
public static int qnewton(Func <vector, double> f, ref vector x, double eps = 1e-3) { // At the beginning we approximate the inverse Hessian matrix with the identity // matrix int n = x.size; matrix B = new matrix(n, n); B.set_identity(); double alpha = 1e-4; int nsteps = 0; vector gradx = gradient(f, x); double fx = f(x); while (true) { nsteps++; // Calculate the Newton step vector deltax = -B * gradx; double lambda = 1; vector x1; double fx1; double Armijo; // Backtracking algorithm while (true) { x1 = x + lambda * deltax; fx1 = f(x1); /* * The Armijo condition denotes when it is a good step. The new function * value should be smaller than this value */ Armijo = fx + alpha * lambda * deltax.dot(gradx); if (fx1 < Armijo) { // Accept the step break; } if (lambda < 1e-8) { // Bad step but we accept it. Reset the Hessian matrix to // the identity B.set_identity(); break; } lambda /= 2; } vector s = lambda * deltax; // Update the inverse Hessian matrix via the symmetric Broyden update vector gradx1 = gradient(f, x1); vector y = gradx1 - gradx; vector u = s - B * y; double gamma = (u.dot(y)) / (2 * s.dot(y)); vector a = (u - gamma * s) / (s.dot(y)); // We only update if the calculated a-value is 'numerically safe', that is, // if s^T*y is large enough so we do not divide by a too small number if (Abs(s.dot(y)) > 1e-6) { // We do not really have a way to multiply two vectors and get out // a matrix, but it can be handled with the update function that // we have in the matrix class B.update(a, s, 1); B.update(s, a, 1); } // Move to the new position before running the full loop again x = x1; fx = fx1; gradx = gradx1; // When the gradient is close enough to zero we break the loop if (gradx.norm() < eps) { break; } } return(nsteps); }
public static int Main() { int dim = 30; double tau = 1e-6; double eps = 1e-6; int updates = 999; int n_max = 999; // If updates=999, no Rayleigh updates are performed var rnd = new Random(); int i = rnd.Next(dim - 1); double deviation = 1.05; matrix A = misc.gen_matrix(dim); matrix Ac = A.copy(); var jacobi = new jacobi_diagonalization(A); vector e = jacobi.get_eigenvalues(); matrix V = jacobi.get_eigenvectors(); double s_0 = e[i] * deviation; double s_1 = e[i] * deviation; vector v_0 = misc.gen_vector(dim); // Random vector vector v_1 = V[i] / V[i].norm(); // Jacobi eigenvector for (int j = 0; j < v_1.size; j++) { v_1[j] = v_1[j] * deviation; } double n_0 = power_method.inverse_iteration(Ac, ref s_0, ref v_0, tau, eps, n_max, updates); // Random double n_1 = power_method.inverse_iteration(Ac, ref s_1, ref v_1, tau, eps, n_max, updates); // Jacobi var outfile = new System.IO.StreamWriter($"../test_out.txt", append: false); outfile.WriteLine($"--------------------------------------------"); outfile.WriteLine($"Inverse iteration method (Jacobi comparison)"); outfile.WriteLine($"--------------------------------------------"); outfile.WriteLine($"Matrix dimension: {dim}"); outfile.WriteLine($"Random eigenvalue index: {i}"); outfile.WriteLine($"Maximum iterations: {n_max}\n"); outfile.WriteLine($"Jacobi eigenvalues:"); outfile.WriteLine($"e_(i-1): {e[i-1]}"); outfile.WriteLine($"e_(i): {e[i]}"); outfile.WriteLine($"e_(i+1): {e[i+1]}\n"); outfile.WriteLine($"Inverse iteration method:\n"); outfile.WriteLine($"Initial deviation: {deviation}"); outfile.WriteLine($"Initial eigenvalue: {e[i]*deviation}"); outfile.WriteLine($"Absolute accuracy: {tau}"); outfile.WriteLine($"Relative accuracy: {eps}\n"); outfile.WriteLine($"Inverse iteration method with random initial eigenvector:"); outfile.WriteLine($"Algorithm result: {s_0}"); outfile.WriteLine($"v^(T)*A*v: {v_0.dot(Ac*v_0)}"); outfile.WriteLine($"Iterations: {n_0}"); outfile.WriteLine($"Errors compared to Jacobi eigenvalues:"); outfile.WriteLine($"Abs(e_(i-1) - s): {Abs(e[i-1]-s_0)}"); outfile.WriteLine($"Abs(e_(i) - s): {Abs(e[i]-s_0)}"); outfile.WriteLine($"Abs(e_(i+1) - s): {Abs(e[i+1]-s_0)}\n"); outfile.WriteLine($"Inverse iteration method with deviated Jacobi eigenvector:"); outfile.WriteLine($"Algorithm result: {s_1}"); outfile.WriteLine($"v^(T)*A*v: {v_1.dot(Ac*v_1)}"); outfile.WriteLine($"Iterations: {n_1}"); outfile.WriteLine($"Errors compared to Jacobi eigenvalues:"); outfile.WriteLine($"Abs(e_(i-1) - s): {Abs(e[i-1]-s_1)}"); outfile.WriteLine($"Abs(e_(i) - s): {Abs(e[i]-s_1)}"); outfile.WriteLine($"Abs(e_(i+1) - s): {Abs(e[i+1]-s_1)}\n"); outfile.Close(); return(0); }
public static vector qnewton( Func <vector, double> f, // vector xstart, double eps, ref int steps, matrix B = null // the inverse of the hesse matrix ) { steps += 1; Error.Write("\nOne call to qnewton!\n"); Error.Write($"Used xstart: {xstart}!\n"); double dx = 1e-8; int n = xstart.size; // Initiate the B matrix if it isn't given: if (B == null) { B = new matrix(n, n); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { if (i == j) { B[i, j] = 1; } else { B[i, j] = 0; } } } } // 1: calculate grad_f: vector grad_f = new vector(n); for (int i = 0; i < n; i++) { vector x = xstart.copy(); x[i] += dx; grad_f[i] = (f(x) - f(xstart)) / dx; } Error.Write($"Found grad_f: {grad_f}\n"); // 2: Calculate delta_x: vector delta_x = -B * grad_f; Error.Write($"Found delta_x: {delta_x}\n"); // 3: Do the backtracking to find the actual step s: double alpha = 1e-4; int invlambda = 1; // TODO: This seems okay, but im not sure. while ((invlambda <= 64) && (f(xstart + delta_x / invlambda) >= f(xstart) + alpha * (delta_x / invlambda).dot(grad_f))) { invlambda *= 2; } vector s = delta_x.copy() / invlambda; // If lambda becomes too small, reset B: if (invlambda > 64) { for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { if (i == j) { B[i, j] = 1; } else { B[i, j] = 0; } } } } // 4: Calculate the error and compare with the accuracy goal: // Notes: err = Abs(grad_phi): // first the gradient of the next step: Error.Write($"xstart + s: {xstart+s}\n"); Error.Write($"dx: {dx}\n"); vector grad_f_s = new vector(n); for (int i = 0; i < n; i++) { Error.Write("Is this loop running?\n"); vector x = xstart.copy() + s.copy(); x[i] += dx; Error.Write($"{1}'th entry: f(x) = {f(x)}, f(xstart+s) = {f(xstart + s)} \n"); grad_f_s[i] = (f(x) - f(xstart + s)) / dx; } Error.Write($"Found grad_f_s: {grad_f_s}\n"); double err = grad_f_s.simpleNorm(); Error.Write($"Found error: {err}\n"); Error.Write($"Found s: {s}\n"); /* * Error.Write("ABORTING FOR DEBUGGNING!"); * return xstart+s; */ if (steps > 999) { Error.Write($"Maximum steps reached ({steps} steps), terminating minization\n"); return(xstart + s); } else if (err < eps) { return(xstart + s); } else { // 5: If the step was not final, update the B matrix and // do another step: // Calculate y vector y = grad_f_s - grad_f; // If s.dot(y) is too small, update is dangerous, don't // do it: if (Abs(s.dot(y)) < 1e-6) { return(qnewton(f, xstart + s, eps, steps, B)); } else { // Calculate c: vector c = (s - B * y) / (s.dot(y)); // Calculate delta_B: matrix delta_B = c.outer(s); // Do another step: return(qnewton(f, xstart + s, eps, steps, B + delta_B)); } } }
public static (vector, int) qnewton( Func <vector, double> f, // Function to evaluate vector x, // starting point double acc = 1e-3, // accuracy goal, |gradient|<acc on exit double alpha = 1e-4, // alpha param for Armijo condition double dx = 1e-7, // dx used in gradient calculation double minlam = 1e-7, // minimum lambda value before reset int limit = 999, // limit on recursion steps double eps = 1.0 / 4194304 ) { // Quasi-newton minimization method for multivariable function int n = x.size; // Approximate inverse Hessian matrix B with identity matrix matrix B = new matrix(n, n); B.set_identity(); // Gradient of f(x) vector gradx = gradient(f, x, dx: dx); vector Dx; // Precalc fx double fx = f(x); int nsteps = 0; do { nsteps++; // Calculate Newton step Dx = -B * gradx; // Lambda factor double lam = 1.0; // Armijo condition step check (Bracktracking line-search) while (f(x + lam * Dx) > fx + alpha * lam * Dx.dot(gradx)) { // Check if B needs to be reset and begrudgingly accept if (lam < minlam) { B.set_identity(); break; } // else update lambda lam /= 2; } // Calc new point z and gradz vector z = x + lam * Dx; vector gradz = gradient(f, z, dx: dx); // Calc u and <u, y> vector y = gradz - gradx; vector s = lam * Dx; vector u = s - B * y; double uTy = u.dot(y); // Do SR1 update of B if uTy numerically safe if (Abs(uTy) > 1e-6) { B.update(u, u, 1 / uTy); } // Update x, gradx, fx x = z; gradx = gradz; fx = f(x); }while (gradx.norm2() > acc & nsteps <limit& Dx.norm2()> eps * x.norm2()); Error.WriteLine($"\nminimization.qnewton returning (x, nsteps)"); Error.WriteLine($"gradx.norm2() {gradx.norm2()}"); Error.WriteLine($"acc {acc}"); Error.WriteLine($"nsteps / limit {nsteps} / {limit}"); Error.WriteLine($"Dx.norm2() {Dx.norm2()}"); Error.WriteLine($"eps*x.norm2() {eps*x.norm2()}\n"); return(x, nsteps); } // end qnewton
// We generate our next step by updating(SR1) the Hessian based on the computed gradient public static Tuple <int, vector> QuasiNewtonMinimization(Func <vector, double> φ, vector x, double Ɛ) { int n = x.size; matrix B = new matrix(n, n); matrix δB = new matrix(n, n); B.set_unity(); // Initial zero't approximation to H double EPS = 1.0 / 4194304; vector δx; vector s; // Rank 1 update to H int Counter = 0; double α = 10e-4; vector test = new vector(2); int testCounter = 0; do { Counter++; double λ = 1.0; δx = -B *Gradient(φ, x); s = λ * δx; do // Take a more conservative step s { λ = λ * 0.5; s = λ * δx; // Gentler backtracking if (φ(x + s) < φ(x) + α * (s).dot(Gradient(φ, s))) { break; } // Armijo condition if (λ < EPS) { B.set_unity(); break; } }while(true); test[testCounter] = Gradient(φ, x + s).norm(); Error.WriteLine($"norm(Gradient)={test[testCounter]}"); if (testCounter != test.size - 1) { testCounter++; } if (testCounter == test.size - 1) { if (test[test.size - 1] > test[0] && testCounter > 1) { B.set_unity(); // On divergence } } vector y = Gradient(φ, x + s) - Gradient(φ, x); vector u = s - B * y; matrix U = new matrix(n, n); // Constructed from <u,u> with real entries for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { U[i, j] = u[i] * u[j]; } } δB = (1 / u.dot(y)) * (U); // Update matrix if (Abs(u.dot(y)) > 10e-6) { B = B + δB; } x = x + s; if (Counter > 600) { Error.WriteLine($"No or slow divergence: Program terminated after {Counter} steps"); break; } }while(Gradient(φ, x + s).norm() > Ɛ); return(Tuple.Create(Counter, x)); }
public static vector qnewton(Func <vector, double> f, vector xstart, double acc = 1e-3) { int nstep = 0; int n = xstart.size; vector x = xstart; matrix B = new matrix(n, n); B.set_identity(); vector fgrad, deltax; fgrad = gradient(f, x); deltax = -B * fgrad; /* Perform step with linesearch */ while (nstep < 999) { if (fgrad.norm() < acc) { break; } if (deltax.norm() < eps * x.norm()) { break; } double fx = f(x); double lambda = 1; vector step = lambda * deltax; while (f(x + step) > fx) { lambda /= 2; step = lambda * deltax; if (lambda < eps) { B.set_identity(); /* * for(int i=0; i<step.size; i++) { * step[i] = 0; * } */ break; } } /* Update inverse Hessian matrix */ vector fstepgrad = gradient(f, x + step); vector dy = fstepgrad - fgrad; //y vector u = step - B * dy; if (Abs(step.dot(dy)) > eps) { B.update(u, u, 1 / (u % dy)); } x += step; fx = f(x); fgrad = fstepgrad; deltax = -B * fgrad; nstep++; } return(x); }//qnewton