/************************************************************************* * LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION * * The subroutine minimizes function F(x) of N arguments by using a quasi- * Newton method (LBFGS scheme) which is optimized to use a minimum amount * of memory. * * The subroutine generates the approximation of an inverse Hessian matrix by * using information about the last M steps of the algorithm (instead of N). * It lessens a required amount of memory from a value of order N^2 to a * value of order 2*N*M. * * INPUT PARAMETERS: * N - problem dimension. N>0 * M - number of corrections in the BFGS scheme of Hessian * approximation update. Recommended value: 3<=M<=7. The smaller * value causes worse convergence, the bigger will not cause a * considerably better convergence, but will cause a fall in the * performance. M<=N. * X - initial solution approximation, array[0..N-1]. * * OUTPUT PARAMETERS: * State - structure used for reverse communication. * * This function initializes State structure with default optimization * parameters (stopping conditions, step size, etc.). Use MinLBFGSSet??????() * functions to tune optimization parameters. * * After all optimization parameters are tuned, you should use * MinLBFGSIteration() function to advance algorithm iterations. * * NOTES: * * 1. you may tune stopping conditions with MinLBFGSSetCond() function * 2. if target function contains exp() or other fast growing functions, and * optimization algorithm makes too large steps which leads to overflow, * use MinLBFGSSetStpMax() function to bound algorithm's steps. However, * L-BFGS rarely needs such a tuning. * * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgscreate(int n, int m, ref double[] x, ref minlbfgsstate state) { minlbfgscreatex(n, m, ref x, 0, ref state); }
/************************************************************************* * L-BFGS algorithm results * * Called after MinLBFGSIteration() returned False. * * INPUT PARAMETERS: * State - algorithm state (used by MinLBFGSIteration). * * OUTPUT PARAMETERS: * X - array[0..N-1], solution * Rep - optimization report: * Rep.TerminationType completetion code: * -2 rounding errors prevent further improvement. * X contains best point found. * -1 incorrect parameters were specified * 1 relative function improvement is no more than * EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, * further improvement is impossible * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsresults(ref minlbfgsstate state, ref double[] x, ref minlbfgsreport rep) { int i_ = 0; x = new double[state.n - 1 + 1]; for (i_ = 0; i_ <= state.n - 1; i_++) { x[i_] = state.x[i_]; } rep.iterationscount = state.repiterationscount; rep.nfev = state.repnfev; rep.terminationtype = state.repterminationtype; }
/************************************************************************* * Extended subroutine for internal use only. * * Accepts additional parameters: * * Flags - additional settings: * Flags = 0 means no additional settings * Flags = 1 "do not allocate memory". used when solving * a many subsequent tasks with same N/M values. * First call MUST be without this flag bit set, * subsequent calls of MinLBFGS with same * MinLBFGSState structure can set Flags to 1. * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgscreatex(int n, int m, ref double[] x, int flags, ref minlbfgsstate state) { bool allocatemem = new bool(); int i_ = 0; System.Diagnostics.Debug.Assert(n >= 1, "MinLBFGS: N too small!"); System.Diagnostics.Debug.Assert(m >= 1, "MinLBFGS: M too small!"); System.Diagnostics.Debug.Assert(m <= n, "MinLBFGS: M too large!"); // // Initialize // state.n = n; state.m = m; state.flags = flags; allocatemem = flags % 2 == 0; flags = flags / 2; if (allocatemem) { state.rho = new double[m - 1 + 1]; state.theta = new double[m - 1 + 1]; state.y = new double[m - 1 + 1, n - 1 + 1]; state.s = new double[m - 1 + 1, n - 1 + 1]; state.d = new double[n - 1 + 1]; state.x = new double[n - 1 + 1]; state.g = new double[n - 1 + 1]; state.work = new double[n - 1 + 1]; } minlbfgssetcond(ref state, 0, 0, 0, 0); minlbfgssetxrep(ref state, false); minlbfgssetstpmax(ref state, 0); // // Prepare first run // state.k = 0; for (i_ = 0; i_ <= n - 1; i_++) { state.x[i_] = x[i_]; } state.rstate.ia = new int[6 + 1]; state.rstate.ra = new double[4 + 1]; state.rstate.stage = -1; }
/************************************************************************* * This function sets stopping conditions for L-BFGS optimization algorithm. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be initialized * with MinLBFGSCreate() * EpsG - >=0 * The subroutine finishes its work if the condition ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, * G - gradient. * EpsF - >=0 * The subroutine finishes its work if on k+1-th iteration * the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} * is satisfied. * EpsX - >=0 * The subroutine finishes its work if on k+1-th iteration * the condition |X(k+1)-X(k)| <= EpsX is fulfilled. * MaxIts - maximum number of iterations. If MaxIts=0, the number of * iterations is unlimited. * * Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to * automatic stopping criterion selection (small EpsX). * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetcond(ref minlbfgsstate state, double epsg, double epsf, double epsx, int maxits) { System.Diagnostics.Debug.Assert((double)(epsg) >= (double)(0), "MinLBFGSSetCond: negative EpsG!"); System.Diagnostics.Debug.Assert((double)(epsf) >= (double)(0), "MinLBFGSSetCond: negative EpsF!"); System.Diagnostics.Debug.Assert((double)(epsx) >= (double)(0), "MinLBFGSSetCond: negative EpsX!"); System.Diagnostics.Debug.Assert(maxits >= 0, "MinLBFGSSetCond: negative MaxIts!"); if ((double)(epsg) == (double)(0) & (double)(epsf) == (double)(0) & (double)(epsx) == (double)(0) & maxits == 0) { epsx = 1.0E-6; } state.epsg = epsg; state.epsf = epsf; state.epsx = epsx; state.maxits = maxits; }
/************************************************************************* This function turns on/off reporting. INPUT PARAMETERS: State - structure which stores algorithm state NeedXRep- whether iteration reports are needed or not If NeedXRep is True, algorithm will call rep() callback function if it is provided to MinLBFGSOptimize(). -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetxrep(minlbfgsstate state, bool needxrep) { minlbfgs.minlbfgssetxrep(state.innerobj, needxrep); return; }
/************************************************************************* This subroutine restarts LBFGS algorithm from new point. All optimization parameters are left unchanged. This function allows to solve multiple optimization problems (which must have same number of dimensions) without object reallocation penalty. INPUT PARAMETERS: State - structure used to store algorithm state X - new starting point. -- ALGLIB -- Copyright 30.07.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsrestartfrom(minlbfgsstate state, double[] x) { int i_ = 0; alglib.ap.assert(alglib.ap.len(x)>=state.n, "MinLBFGSRestartFrom: Length(X)<N!"); alglib.ap.assert(apserv.isfinitevector(x, state.n), "MinLBFGSRestartFrom: X contains infinite or NaN values!"); for(i_=0; i_<=state.n-1;i_++) { state.x[i_] = x[i_]; } state.rstate.ia = new int[5+1]; state.rstate.ra = new double[1+1]; state.rstate.stage = -1; clearrequestfields(state); }
/************************************************************************* This subroutine turns on verification of the user-supplied analytic gradient: * user calls this subroutine before optimization begins * MinLBFGSOptimize() is called * prior to actual optimization, for each component of parameters being optimized X[i] algorithm performs following steps: * two trial steps are made to X[i]-TestStep*S[i] and X[i]+TestStep*S[i], where X[i] is i-th component of the initial point and S[i] is a scale of i-th parameter * if needed, steps are bounded with respect to constraints on X[] * F(X) is evaluated at these trial points * we perform one more evaluation in the middle point of the interval * we build cubic model using function values and derivatives at trial points and we compare its prediction with actual value in the middle point * in case difference between prediction and actual value is higher than some predetermined threshold, algorithm stops with completion code -7; Rep.VarIdx is set to index of the parameter with incorrect derivative. * after verification is over, algorithm proceeds to the actual optimization. NOTE 1: verification needs N (parameters count) gradient evaluations. It is very costly and you should use it only for low dimensional problems, when you want to be sure that you've correctly calculated analytic derivatives. You should not use it in the production code (unless you want to check derivatives provided by some third party). NOTE 2: you should carefully choose TestStep. Value which is too large (so large that function behaviour is significantly non-cubic) will lead to false alarms. You may use different step for different parameters by means of setting scale with MinLBFGSSetScale(). NOTE 3: this function may lead to false positives. In case it reports that I-th derivative was calculated incorrectly, you may decrease test step and try one more time - maybe your function changes too sharply and your step is too large for such rapidly chanding function. INPUT PARAMETERS: State - structure used to store algorithm state TestStep - verification step: * TestStep=0 turns verification off * TestStep>0 activates verification -- ALGLIB -- Copyright 24.05.2012 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetgradientcheck(minlbfgsstate state, double teststep) { alglib.ap.assert(math.isfinite(teststep), "MinLBFGSSetGradientCheck: TestStep contains NaN or Infinite"); alglib.ap.assert((double)(teststep)>=(double)(0), "MinLBFGSSetGradientCheck: invalid argument TestStep(TestStep<0)"); state.teststep = teststep; }
/************************************************************************* This function sets exact low-rank preconditioner for Hessian matrix H=D+W'*C*W, where: * H is a Hessian matrix, which is approximated by D/W/C * D is a NxN diagonal positive definite matrix * W is a KxN low-rank correction * C is a KxK semidefinite diagonal factor of low-rank correction This preconditioner is exact but slow - it requires O(N*K^2) time to be built and O(N*K) time to be applied. Woodbury matrix identity is used to build inverse matrix. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetpreclowrankexact(minlbfgsstate state, double[] d, double[] c, double[,] w, int cnt) { state.prectype = 5; optserv.preparelowrankpreconditioner(d, c, w, state.n, cnt, state.lowrankbuf); }
/************************************************************************* L-BFGS algorithm results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: X - array[0..N-1], solution Rep - optimization report: * Rep.TerminationType completetion code: * -8 internal integrity control detected infinite or NAN values in function/gradient. Abnormal termination signalled. * -7 gradient verification failed. See MinLBFGSSetGradientCheck() for more information. * -2 rounding errors prevent further improvement. X contains best point found. * -1 incorrect parameters were specified * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible * 8 terminated by user who called minlbfgsrequesttermination(). X contains point which was "current accepted" when termination request was submitted. * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsresults(minlbfgsstate state, ref double[] x, minlbfgsreport rep) { x = new double[0]; minlbfgsresultsbuf(state, ref x, rep); }
/************************************************************************* Modification of the preconditioner: Cholesky factorization of approximate Hessian is used. INPUT PARAMETERS: State - structure which stores algorithm state P - triangular preconditioner, Cholesky factorization of the approximate Hessian. array[0..N-1,0..N-1], (if larger, only leading N elements are used). IsUpper - whether upper or lower triangle of P is given (other triangle is not referenced) After call to this function preconditioner is changed to P (P is copied into the internal buffer). NOTE: you can change preconditioner "on the fly", during algorithm iterations. NOTE 2: P should be nonsingular. Exception will be thrown otherwise. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetpreccholesky(minlbfgsstate state, double[,] p, bool isupper) { int i = 0; double mx = 0; alglib.ap.assert(apserv.isfinitertrmatrix(p, state.n, isupper), "MinLBFGSSetPrecCholesky: P contains infinite or NAN values!"); mx = 0; for(i=0; i<=state.n-1; i++) { mx = Math.Max(mx, Math.Abs(p[i,i])); } alglib.ap.assert((double)(mx)>(double)(0), "MinLBFGSSetPrecCholesky: P is strictly singular!"); if( alglib.ap.rows(state.denseh)<state.n || alglib.ap.cols(state.denseh)<state.n ) { state.denseh = new double[state.n, state.n]; } state.prectype = 1; if( isupper ) { ablas.rmatrixcopy(state.n, state.n, p, 0, 0, ref state.denseh, 0, 0); } else { ablas.rmatrixtranspose(state.n, state.n, p, 0, 0, state.denseh, 0, 0); } }
/************************************************************************* Modification of the preconditioner: scale-based diagonal preconditioning. This preconditioning mode can be useful when you don't have approximate diagonal of Hessian, but you know that your variables are badly scaled (for example, one variable is in [1,10], and another in [1000,100000]), and most part of the ill-conditioning comes from different scales of vars. In this case simple scale-based preconditioner, with H[i] = 1/(s[i]^2), can greatly improve convergence. IMPRTANT: you should set scale of your variables with MinLBFGSSetScale() call (before or after MinLBFGSSetPrecScale() call). Without knowledge of the scale of your variables scale-based preconditioner will be just unit matrix. INPUT PARAMETERS: State - structure which stores algorithm state -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetprecscale(minlbfgsstate state) { state.prectype = 3; }
/************************************************************************* This function sets maximum step length INPUT PARAMETERS: State - structure which stores algorithm state StpMax - maximum step length, >=0. Set StpMax to 0.0 (default), if you don't want to limit step length. Use this subroutine when you optimize target function which contains exp() or other fast growing functions, and optimization algorithm makes too large steps which leads to overflow. This function allows us to reject steps that are too large (and therefore expose us to the possible overflow) without actually calculating function value at the x+stp*d. -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetstpmax(minlbfgsstate state, double stpmax) { alglib.ap.assert(math.isfinite(stpmax), "MinLBFGSSetStpMax: StpMax is not finite!"); alglib.ap.assert((double)(stpmax)>=(double)(0), "MinLBFGSSetStpMax: StpMax<0!"); state.stpmax = stpmax; }
/************************************************************************* Extended subroutine for internal use only. Accepts additional parameters: Flags - additional settings: * Flags = 0 means no additional settings * Flags = 1 "do not allocate memory". used when solving a many subsequent tasks with same N/M values. First call MUST be without this flag bit set, subsequent calls of MinLBFGS with same MinLBFGSState structure can set Flags to 1. DiffStep - numerical differentiation step -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgscreatex(int n, int m, double[] x, int flags, double diffstep, minlbfgsstate state) { bool allocatemem = new bool(); int i = 0; alglib.ap.assert(n>=1, "MinLBFGS: N too small!"); alglib.ap.assert(m>=1, "MinLBFGS: M too small!"); alglib.ap.assert(m<=n, "MinLBFGS: M too large!"); // // Initialize // state.teststep = 0; state.diffstep = diffstep; state.n = n; state.m = m; allocatemem = flags%2==0; flags = flags/2; if( allocatemem ) { state.rho = new double[m]; state.theta = new double[m]; state.yk = new double[m, n]; state.sk = new double[m, n]; state.d = new double[n]; state.xp = new double[n]; state.x = new double[n]; state.s = new double[n]; state.g = new double[n]; state.work = new double[n]; } minlbfgssetcond(state, 0, 0, 0, 0); minlbfgssetxrep(state, false); minlbfgssetstpmax(state, 0); minlbfgsrestartfrom(state, x); for(i=0; i<=n-1; i++) { state.s[i] = 1.0; } state.prectype = 0; }
/************************************************************************* -- ALGLIB -- Copyright 20.03.2009 by Bochkanov Sergey *************************************************************************/ public static bool minlbfgsiteration(minlbfgsstate state) { bool result = new bool(); int n = 0; int m = 0; int maxits = 0; double epsf = 0; double epsg = 0; double epsx = 0; int i = 0; int j = 0; int ic = 0; int mcinfo = 0; double v = 0; double vv = 0; int i_ = 0; // // Reverse communication preparations // I know it looks ugly, but it works the same way // anywhere from C++ to Python. // // This code initializes locals by: // * random values determined during code // generation - on first subroutine call // * values from previous call - on subsequent calls // if( state.rstate.stage>=0 ) { n = state.rstate.ia[0]; m = state.rstate.ia[1]; maxits = state.rstate.ia[2]; i = state.rstate.ia[3]; j = state.rstate.ia[4]; ic = state.rstate.ia[5]; mcinfo = state.rstate.ia[6]; epsf = state.rstate.ra[0]; epsg = state.rstate.ra[1]; epsx = state.rstate.ra[2]; v = state.rstate.ra[3]; vv = state.rstate.ra[4]; } else { n = -983; m = -989; maxits = -834; i = 900; j = -287; ic = 364; mcinfo = 214; epsf = -338; epsg = -686; epsx = 912; v = 585; vv = 497; } if( state.rstate.stage==0 ) { goto lbl_0; } if( state.rstate.stage==1 ) { goto lbl_1; } if( state.rstate.stage==2 ) { goto lbl_2; } if( state.rstate.stage==3 ) { goto lbl_3; } // // Routine body // // // Unload frequently used variables from State structure // (just for typing convinience) // n = state.n; m = state.m; epsg = state.epsg; epsf = state.epsf; epsx = state.epsx; maxits = state.maxits; state.repterminationtype = 0; state.repiterationscount = 0; state.repnfev = 0; // // Calculate F/G at the initial point // clearrequestfields(state); state.needfg = true; state.rstate.stage = 0; goto lbl_rcomm; lbl_0: state.needfg = false; if( !state.xrep ) { goto lbl_4; } clearrequestfields(state); state.xupdated = true; state.rstate.stage = 1; goto lbl_rcomm; lbl_1: state.xupdated = false; lbl_4: state.repnfev = 1; state.fold = state.f; v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.g[i_]*state.g[i_]; } v = Math.Sqrt(v); if( (double)(v)<=(double)(epsg) ) { state.repterminationtype = 4; result = false; return result; } // // Choose initial step // if( (double)(state.stpmax)==(double)(0) ) { state.stp = Math.Min(1.0/v, 1); } else { state.stp = Math.Min(1.0/v, state.stpmax); } for(i_=0; i_<=n-1;i_++) { state.d[i_] = -state.g[i_]; } // // Main cycle // lbl_6: if( false ) { goto lbl_7; } // // Main cycle: prepare to 1-D line search // state.p = state.k%m; state.q = Math.Min(state.k, m-1); // // Store X[k], G[k] // for(i_=0; i_<=n-1;i_++) { state.s[state.p,i_] = -state.x[i_]; } for(i_=0; i_<=n-1;i_++) { state.y[state.p,i_] = -state.g[i_]; } // // Minimize F(x+alpha*d) // Calculate S[k], Y[k] // state.mcstage = 0; if( state.k!=0 ) { state.stp = 1.0; } linmin.linminnormalized(ref state.d, ref state.stp, n); linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, state.d, ref state.stp, state.stpmax, ref mcinfo, ref state.nfev, ref state.work, state.lstate, ref state.mcstage); lbl_8: if( state.mcstage==0 ) { goto lbl_9; } clearrequestfields(state); state.needfg = true; state.rstate.stage = 2; goto lbl_rcomm; lbl_2: state.needfg = false; linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, state.d, ref state.stp, state.stpmax, ref mcinfo, ref state.nfev, ref state.work, state.lstate, ref state.mcstage); goto lbl_8; lbl_9: if( !state.xrep ) { goto lbl_10; } // // report // clearrequestfields(state); state.xupdated = true; state.rstate.stage = 3; goto lbl_rcomm; lbl_3: state.xupdated = false; lbl_10: state.repnfev = state.repnfev+state.nfev; state.repiterationscount = state.repiterationscount+1; for(i_=0; i_<=n-1;i_++) { state.s[state.p,i_] = state.s[state.p,i_] + state.x[i_]; } for(i_=0; i_<=n-1;i_++) { state.y[state.p,i_] = state.y[state.p,i_] + state.g[i_]; } // // Stopping conditions // if( state.repiterationscount>=maxits & maxits>0 ) { // // Too many iterations // state.repterminationtype = 5; result = false; return result; } v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.g[i_]*state.g[i_]; } if( (double)(Math.Sqrt(v))<=(double)(epsg) ) { // // Gradient is small enough // state.repterminationtype = 4; result = false; return result; } if( (double)(state.fold-state.f)<=(double)(epsf*Math.Max(Math.Abs(state.fold), Math.Max(Math.Abs(state.f), 1.0))) ) { // // F(k+1)-F(k) is small enough // state.repterminationtype = 1; result = false; return result; } v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.s[state.p,i_]*state.s[state.p,i_]; } if( (double)(Math.Sqrt(v))<=(double)(epsx) ) { // // X(k+1)-X(k) is small enough // state.repterminationtype = 2; result = false; return result; } // // If Wolfe conditions are satisfied, we can update // limited memory model. // // However, if conditions are not satisfied (NFEV limit is met, // function is too wild, ...), we'll skip L-BFGS update // if( mcinfo!=1 ) { // // Skip update. // // In such cases we'll initialize search direction by // antigradient vector, because it leads to more // transparent code with less number of special cases // state.fold = state.f; for(i_=0; i_<=n-1;i_++) { state.d[i_] = -state.g[i_]; } } else { // // Calculate Rho[k], GammaK // v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.y[state.p,i_]*state.s[state.p,i_]; } vv = 0.0; for(i_=0; i_<=n-1;i_++) { vv += state.y[state.p,i_]*state.y[state.p,i_]; } if( (double)(v)==(double)(0) | (double)(vv)==(double)(0) ) { // // Rounding errors make further iterations impossible. // state.repterminationtype = -2; result = false; return result; } state.rho[state.p] = 1/v; state.gammak = v/vv; // // Calculate d(k+1) = -H(k+1)*g(k+1) // // for I:=K downto K-Q do // V = s(i)^T * work(iteration:I) // theta(i) = V // work(iteration:I+1) = work(iteration:I) - V*Rho(i)*y(i) // work(last iteration) = H0*work(last iteration) // for I:=K-Q to K do // V = y(i)^T*work(iteration:I) // work(iteration:I+1) = work(iteration:I) +(-V+theta(i))*Rho(i)*s(i) // // NOW WORK CONTAINS d(k+1) // for(i_=0; i_<=n-1;i_++) { state.work[i_] = state.g[i_]; } for(i=state.k; i>=state.k-state.q; i--) { ic = i%m; v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.s[ic,i_]*state.work[i_]; } state.theta[ic] = v; vv = v*state.rho[ic]; for(i_=0; i_<=n-1;i_++) { state.work[i_] = state.work[i_] - vv*state.y[ic,i_]; } } v = state.gammak; for(i_=0; i_<=n-1;i_++) { state.work[i_] = v*state.work[i_]; } for(i=state.k-state.q; i<=state.k; i++) { ic = i%m; v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.y[ic,i_]*state.work[i_]; } vv = state.rho[ic]*(-v+state.theta[ic]); for(i_=0; i_<=n-1;i_++) { state.work[i_] = state.work[i_] + vv*state.s[ic,i_]; } } for(i_=0; i_<=n-1;i_++) { state.d[i_] = -state.work[i_]; } // // Next step // state.fold = state.f; state.k = state.k+1; } goto lbl_6; lbl_7: result = false; return result; // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = m; state.rstate.ia[2] = maxits; state.rstate.ia[3] = i; state.rstate.ia[4] = j; state.rstate.ia[5] = ic; state.rstate.ia[6] = mcinfo; state.rstate.ra[0] = epsf; state.rstate.ra[1] = epsg; state.rstate.ra[2] = epsx; state.rstate.ra[3] = v; state.rstate.ra[4] = vv; return result; }
/************************************************************************* This function sets stopping conditions for L-BFGS optimization algorithm. INPUT PARAMETERS: State - structure which stores algorithm state EpsG - >=0 The subroutine finishes its work if the condition |v|<EpsG is satisfied, where: * |.| means Euclidian norm * v - scaled gradient vector, v[i]=g[i]*s[i] * g - gradient * s - scaling coefficients set by MinLBFGSSetScale() EpsF - >=0 The subroutine finishes its work if on k+1-th iteration the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} is satisfied. EpsX - >=0 The subroutine finishes its work if on k+1-th iteration the condition |v|<=EpsX is fulfilled, where: * |.| means Euclidian norm * v - scaled step vector, v[i]=dx[i]/s[i] * dx - ste pvector, dx=X(k+1)-X(k) * s - scaling coefficients set by MinLBFGSSetScale() MaxIts - maximum number of iterations. If MaxIts=0, the number of iterations is unlimited. Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to automatic stopping criterion selection (small EpsX). -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetcond(minlbfgsstate state, double epsg, double epsf, double epsx, int maxits) { alglib.ap.assert(math.isfinite(epsg), "MinLBFGSSetCond: EpsG is not finite number!"); alglib.ap.assert((double)(epsg)>=(double)(0), "MinLBFGSSetCond: negative EpsG!"); alglib.ap.assert(math.isfinite(epsf), "MinLBFGSSetCond: EpsF is not finite number!"); alglib.ap.assert((double)(epsf)>=(double)(0), "MinLBFGSSetCond: negative EpsF!"); alglib.ap.assert(math.isfinite(epsx), "MinLBFGSSetCond: EpsX is not finite number!"); alglib.ap.assert((double)(epsx)>=(double)(0), "MinLBFGSSetCond: negative EpsX!"); alglib.ap.assert(maxits>=0, "MinLBFGSSetCond: negative MaxIts!"); if( (((double)(epsg)==(double)(0) && (double)(epsf)==(double)(0)) && (double)(epsx)==(double)(0)) && maxits==0 ) { epsx = 1.0E-6; } state.epsg = epsg; state.epsf = epsf; state.epsx = epsx; state.maxits = maxits; }
/************************************************************************* Extended subroutine for internal use only. Accepts additional parameters: Flags - additional settings: * Flags = 0 means no additional settings * Flags = 1 "do not allocate memory". used when solving a many subsequent tasks with same N/M values. First call MUST be without this flag bit set, subsequent calls of MinLBFGS with same MinLBFGSState structure can set Flags to 1. -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgscreatex(int n, int m, double[] x, int flags, minlbfgsstate state) { bool allocatemem = new bool(); ap.assert(n>=1, "MinLBFGS: N too small!"); ap.assert(m>=1, "MinLBFGS: M too small!"); ap.assert(m<=n, "MinLBFGS: M too large!"); // // Initialize // state.n = n; state.m = m; state.flags = flags; allocatemem = flags%2==0; flags = flags/2; if( allocatemem ) { state.rho = new double[m-1+1]; state.theta = new double[m-1+1]; state.y = new double[m-1+1, n-1+1]; state.s = new double[m-1+1, n-1+1]; state.d = new double[n-1+1]; state.x = new double[n-1+1]; state.g = new double[n-1+1]; state.work = new double[n-1+1]; } minlbfgssetcond(state, 0, 0, 0, 0); minlbfgssetxrep(state, false); minlbfgssetstpmax(state, 0); minlbfgsrestartfrom(state, x); }
/************************************************************************* * Clears request fileds (to be sure that we don't forgot to clear something) *************************************************************************/ private static void clearrequestfields(ref minlbfgsstate state) { state.needfg = false; state.xupdated = false; }
/************************************************************************* * This function turns on/off reporting. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be * initialized with MinLBFGSCreate() * NeedXRep- whether iteration reports are needed or not * * Usually algorithm returns from MinLBFGSIteration() only when it needs * function/gradient/ (which is indicated by NeedFG field. However, with this * function we can let it stop after each iteration (one iteration may * include more than one function evaluation), which is indicated by XUpdated * field. * * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetxrep(ref minlbfgsstate state, bool needxrep) { state.xrep = needxrep; }
/************************************************************************* * L-BFGS iterations * * Called after initialization with MinLBFGSCreate() function. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be initialized * with MinLBFGSCreate() * * RESULT: * if function returned False, iterative proces has converged. * Use MinLBFGSResults() to obtain optimization results. * if subroutine returned True, then, depending on structure fields, we * have one of the following situations * * * === FUNC/GRAD REQUEST === * State.NeedFG is True => function value/gradient are needed. * Caller should calculate function value State.F and gradient * State.G[0..N-1] at State.X[0..N-1] and call MinLBFGSIteration() again. * * === NEW INTERATION IS REPORTED === * State.XUpdated is True => one more iteration was made. * State.X contains current position, State.F contains function value at X. * You can read info from these fields, but never modify them because they * contain the only copy of optimization algorithm state. * * * One and only one of these fields (NeedFG, XUpdated) is true on return. New * iterations are reported only when reports are explicitly turned on by * MinLBFGSSetXRep() function, so if you never called it, you can expect that * NeedFG is always True. * * * -- ALGLIB -- * Copyright 20.03.2009 by Bochkanov Sergey *************************************************************************/ public static bool minlbfgsiteration(ref minlbfgsstate state) { bool result = new bool(); int n = 0; int m = 0; int maxits = 0; double epsf = 0; double epsg = 0; double epsx = 0; int i = 0; int j = 0; int ic = 0; int mcinfo = 0; double v = 0; double vv = 0; int i_ = 0; // // Reverse communication preparations // I know it looks ugly, but it works the same way // anywhere from C++ to Python. // // This code initializes locals by: // * random values determined during code // generation - on first subroutine call // * values from previous call - on subsequent calls // if (state.rstate.stage >= 0) { n = state.rstate.ia[0]; m = state.rstate.ia[1]; maxits = state.rstate.ia[2]; i = state.rstate.ia[3]; j = state.rstate.ia[4]; ic = state.rstate.ia[5]; mcinfo = state.rstate.ia[6]; epsf = state.rstate.ra[0]; epsg = state.rstate.ra[1]; epsx = state.rstate.ra[2]; v = state.rstate.ra[3]; vv = state.rstate.ra[4]; } else { n = -983; m = -989; maxits = -834; i = 900; j = -287; ic = 364; mcinfo = 214; epsf = -338; epsg = -686; epsx = 912; v = 585; vv = 497; } if (state.rstate.stage == 0) { goto lbl_0; } if (state.rstate.stage == 1) { goto lbl_1; } if (state.rstate.stage == 2) { goto lbl_2; } if (state.rstate.stage == 3) { goto lbl_3; } // // Routine body // // // Unload frequently used variables from State structure // (just for typing convinience) // n = state.n; m = state.m; epsg = state.epsg; epsf = state.epsf; epsx = state.epsx; maxits = state.maxits; state.repterminationtype = 0; state.repiterationscount = 0; state.repnfev = 0; // // Calculate F/G at the initial point // clearrequestfields(ref state); state.needfg = true; state.rstate.stage = 0; goto lbl_rcomm; lbl_0: if (!state.xrep) { goto lbl_4; } clearrequestfields(ref state); state.xupdated = true; state.rstate.stage = 1; goto lbl_rcomm; lbl_1: lbl_4: state.repnfev = 1; state.fold = state.f; v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.g[i_] * state.g[i_]; } v = Math.Sqrt(v); if ((double)(v) <= (double)(epsg)) { state.repterminationtype = 4; result = false; return(result); } // // Choose initial step // if ((double)(state.stpmax) == (double)(0)) { state.stp = Math.Min(1.0 / v, 1); } else { state.stp = Math.Min(1.0 / v, state.stpmax); } for (i_ = 0; i_ <= n - 1; i_++) { state.d[i_] = -state.g[i_]; } // // Main cycle // lbl_6: if (false) { goto lbl_7; } // // Main cycle: prepare to 1-D line search // state.p = state.k % m; state.q = Math.Min(state.k, m - 1); // // Store X[k], G[k] // for (i_ = 0; i_ <= n - 1; i_++) { state.s[state.p, i_] = -state.x[i_]; } for (i_ = 0; i_ <= n - 1; i_++) { state.y[state.p, i_] = -state.g[i_]; } // // Minimize F(x+alpha*d) // Calculate S[k], Y[k] // state.mcstage = 0; if (state.k != 0) { state.stp = 1.0; } linmin.linminnormalized(ref state.d, ref state.stp, n); linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, ref state.d, ref state.stp, state.stpmax, ref mcinfo, ref state.nfev, ref state.work, ref state.lstate, ref state.mcstage); lbl_8: if (state.mcstage == 0) { goto lbl_9; } clearrequestfields(ref state); state.needfg = true; state.rstate.stage = 2; goto lbl_rcomm; lbl_2: linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, ref state.d, ref state.stp, state.stpmax, ref mcinfo, ref state.nfev, ref state.work, ref state.lstate, ref state.mcstage); goto lbl_8; lbl_9: if (!state.xrep) { goto lbl_10; } // // report // clearrequestfields(ref state); state.xupdated = true; state.rstate.stage = 3; goto lbl_rcomm; lbl_3: lbl_10: state.repnfev = state.repnfev + state.nfev; state.repiterationscount = state.repiterationscount + 1; for (i_ = 0; i_ <= n - 1; i_++) { state.s[state.p, i_] = state.s[state.p, i_] + state.x[i_]; } for (i_ = 0; i_ <= n - 1; i_++) { state.y[state.p, i_] = state.y[state.p, i_] + state.g[i_]; } // // Stopping conditions // if (state.repiterationscount >= maxits & maxits > 0) { // // Too many iterations // state.repterminationtype = 5; result = false; return(result); } v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.g[i_] * state.g[i_]; } if ((double)(Math.Sqrt(v)) <= (double)(epsg)) { // // Gradient is small enough // state.repterminationtype = 4; result = false; return(result); } if ((double)(state.fold - state.f) <= (double)(epsf * Math.Max(Math.Abs(state.fold), Math.Max(Math.Abs(state.f), 1.0)))) { // // F(k+1)-F(k) is small enough // state.repterminationtype = 1; result = false; return(result); } v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.s[state.p, i_] * state.s[state.p, i_]; } if ((double)(Math.Sqrt(v)) <= (double)(epsx)) { // // X(k+1)-X(k) is small enough // state.repterminationtype = 2; result = false; return(result); } // // If Wolfe conditions are satisfied, we can update // limited memory model. // // However, if conditions are not satisfied (NFEV limit is met, // function is too wild, ...), we'll skip L-BFGS update // if (mcinfo != 1) { // // Skip update. // // In such cases we'll initialize search direction by // antigradient vector, because it leads to more // transparent code with less number of special cases // state.fold = state.f; for (i_ = 0; i_ <= n - 1; i_++) { state.d[i_] = -state.g[i_]; } } else { // // Calculate Rho[k], GammaK // v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.y[state.p, i_] * state.s[state.p, i_]; } vv = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { vv += state.y[state.p, i_] * state.y[state.p, i_]; } if ((double)(v) == (double)(0) | (double)(vv) == (double)(0)) { // // Rounding errors make further iterations impossible. // state.repterminationtype = -2; result = false; return(result); } state.rho[state.p] = 1 / v; state.gammak = v / vv; // // Calculate d(k+1) = -H(k+1)*g(k+1) // // for I:=K downto K-Q do // V = s(i)^T * work(iteration:I) // theta(i) = V // work(iteration:I+1) = work(iteration:I) - V*Rho(i)*y(i) // work(last iteration) = H0*work(last iteration) // for I:=K-Q to K do // V = y(i)^T*work(iteration:I) // work(iteration:I+1) = work(iteration:I) +(-V+theta(i))*Rho(i)*s(i) // // NOW WORK CONTAINS d(k+1) // for (i_ = 0; i_ <= n - 1; i_++) { state.work[i_] = state.g[i_]; } for (i = state.k; i >= state.k - state.q; i--) { ic = i % m; v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.s[ic, i_] * state.work[i_]; } state.theta[ic] = v; vv = v * state.rho[ic]; for (i_ = 0; i_ <= n - 1; i_++) { state.work[i_] = state.work[i_] - vv * state.y[ic, i_]; } } v = state.gammak; for (i_ = 0; i_ <= n - 1; i_++) { state.work[i_] = v * state.work[i_]; } for (i = state.k - state.q; i <= state.k; i++) { ic = i % m; v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.y[ic, i_] * state.work[i_]; } vv = state.rho[ic] * (-v + state.theta[ic]); for (i_ = 0; i_ <= n - 1; i_++) { state.work[i_] = state.work[i_] + vv * state.s[ic, i_]; } } for (i_ = 0; i_ <= n - 1; i_++) { state.d[i_] = -state.work[i_]; } // // Next step // state.fold = state.f; state.k = state.k + 1; } goto lbl_6; lbl_7: result = false; return(result); // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = m; state.rstate.ia[2] = maxits; state.rstate.ia[3] = i; state.rstate.ia[4] = j; state.rstate.ia[5] = ic; state.rstate.ia[6] = mcinfo; state.rstate.ra[0] = epsf; state.rstate.ra[1] = epsg; state.rstate.ra[2] = epsx; state.rstate.ra[3] = v; state.rstate.ra[4] = vv; return(result); }
/************************************************************************* * This function sets maximum step length * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be * initialized with MinLBFGSCreate() * StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't * want to limit step length. * * Use this subroutine when you optimize target function which contains exp() * or other fast growing functions, and optimization algorithm makes too * large steps which leads to overflow. This function allows us to reject * steps that are too large (and therefore expose us to the possible * overflow) without actually calculating function value at the x+stp*d. * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetstpmax(ref minlbfgsstate state, double stpmax) { System.Diagnostics.Debug.Assert((double)(stpmax) >= (double)(0), "MinLBFGSSetStpMax: StpMax<0!"); state.stpmax = stpmax; }
/************************************************************************* This function turns on/off reporting. INPUT PARAMETERS: State - structure which stores algorithm state NeedXRep- whether iteration reports are needed or not If NeedXRep is True, algorithm will call rep() callback function if it is provided to MinLBFGSOptimize(). -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetxrep(minlbfgsstate state, bool needxrep) { state.xrep = needxrep; }
/************************************************************************* This function sets maximum step length INPUT PARAMETERS: State - structure which stores algorithm state StpMax - maximum step length, >=0. Set StpMax to 0.0 (default), if you don't want to limit step length. Use this subroutine when you optimize target function which contains exp() or other fast growing functions, and optimization algorithm makes too large steps which leads to overflow. This function allows us to reject steps that are too large (and therefore expose us to the possible overflow) without actually calculating function value at the x+stp*d. -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetstpmax(minlbfgsstate state, double stpmax) { minlbfgs.minlbfgssetstpmax(state.innerobj, stpmax); return; }
/************************************************************************* This function sets scaling coefficients for LBFGS optimizer. ALGLIB optimizers use scaling matrices to test stopping conditions (step size and gradient are scaled before comparison with tolerances). Scale of the I-th variable is a translation invariant measure of: a) "how large" the variable is b) how large the step should be to make significant changes in the function Scaling is also used by finite difference variant of the optimizer - step along I-th axis is equal to DiffStep*S[I]. In most optimizers (and in the LBFGS too) scaling is NOT a form of preconditioning. It just affects stopping conditions. You should set preconditioner by separate call to one of the MinLBFGSSetPrec...() functions. There is special preconditioning mode, however, which uses scaling coefficients to form diagonal preconditioning matrix. You can turn this mode on, if you want. But you should understand that scaling is not the same thing as preconditioning - these are two different, although related forms of tuning solver. INPUT PARAMETERS: State - structure stores algorithm state S - array[N], non-zero scaling coefficients S[i] may be negative, sign doesn't matter. -- ALGLIB -- Copyright 14.01.2011 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetscale(minlbfgsstate state, double[] s) { int i = 0; alglib.ap.assert(alglib.ap.len(s)>=state.n, "MinLBFGSSetScale: Length(S)<N"); for(i=0; i<=state.n-1; i++) { alglib.ap.assert(math.isfinite(s[i]), "MinLBFGSSetScale: S contains infinite or NAN elements"); alglib.ap.assert((double)(s[i])!=(double)(0), "MinLBFGSSetScale: S contains zero elements"); state.s[i] = Math.Abs(s[i]); } }
/************************************************************************* This function provides reverse communication interface Reverse communication interface is not documented or recommended to use. See below for functions which provide better documented API *************************************************************************/ public static bool minlbfgsiteration(minlbfgsstate state) { bool result = minlbfgs.minlbfgsiteration(state.innerobj); return result; }
/************************************************************************* Modification of the preconditioner: default preconditioner (simple scaling, same for all elements of X) is used. INPUT PARAMETERS: State - structure which stores algorithm state NOTE: you can change preconditioner "on the fly", during algorithm iterations. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetprecdefault(minlbfgsstate state) { state.prectype = 0; }
/************************************************************************* This family of functions is used to launcn iterations of nonlinear optimizer These functions accept following parameters: grad - callback which calculates function (or merit function) value func and gradient grad at given point x rep - optional callback which is called after each iteration can be null obj - optional object which is passed to func/grad/hess/jac/rep can be null -- ALGLIB -- Copyright 20.03.2009 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsoptimize(minlbfgsstate state, ndimensional_grad grad, ndimensional_rep rep, object obj) { if( grad==null ) throw new alglibexception("ALGLIB: error in 'minlbfgsoptimize()' (grad is null)"); while( alglib.minlbfgsiteration(state) ) { if( state.needfg ) { grad(state.x, ref state.innerobj.f, state.innerobj.g, obj); continue; } if( state.innerobj.xupdated ) { if( rep!=null ) rep(state.innerobj.x, state.innerobj.f, obj); continue; } throw new alglibexception("ALGLIB: error in 'minlbfgsoptimize' (some derivatives were not provided?)"); } }
/************************************************************************* Modification of the preconditioner: diagonal of approximate Hessian is used. INPUT PARAMETERS: State - structure which stores algorithm state D - diagonal of the approximate Hessian, array[0..N-1], (if larger, only leading N elements are used). NOTE: you can change preconditioner "on the fly", during algorithm iterations. NOTE 2: D[i] should be positive. Exception will be thrown otherwise. NOTE 3: you should pass diagonal of approximate Hessian - NOT ITS INVERSE. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetprecdiag(minlbfgsstate state, double[] d) { int i = 0; alglib.ap.assert(alglib.ap.len(d)>=state.n, "MinLBFGSSetPrecDiag: D is too short"); for(i=0; i<=state.n-1; i++) { alglib.ap.assert(math.isfinite(d[i]), "MinLBFGSSetPrecDiag: D contains infinite or NAN elements"); alglib.ap.assert((double)(d[i])>(double)(0), "MinLBFGSSetPrecDiag: D contains non-positive elements"); } apserv.rvectorsetlengthatleast(ref state.diagh, state.n); state.prectype = 2; for(i=0; i<=state.n-1; i++) { state.diagh[i] = d[i]; } }
/************************************************************************* L-BFGS algorithm results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: X - array[0..N-1], solution Rep - optimization report: * Rep.TerminationType completetion code: * -2 rounding errors prevent further improvement. X contains best point found. * -1 incorrect parameters were specified * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsresults(minlbfgsstate state, out double[] x, out minlbfgsreport rep) { x = new double[0]; rep = new minlbfgsreport(); minlbfgs.minlbfgsresults(state.innerobj, ref x, rep.innerobj); return; }
/************************************************************************* This function sets low-rank preconditioner for Hessian matrix H=D+W'*C*W, where: * H is a Hessian matrix, which is approximated by D/W/C * D is a NxN diagonal positive definite matrix * W is a KxN low-rank correction * C is a KxK positive definite diagonal factor of low-rank correction This preconditioner is inexact but fast - it requires O(N*K) time to be applied. Preconditioner P is calculated by artificially constructing a set of BFGS updates which tries to reproduce behavior of H: * Sk = Wk (k-th row of W) * Yk = (D+Wk'*Ck*Wk)*Sk * Yk/Sk are reordered by ascending of C[k]*norm(Wk)^2 Here we assume that rows of Wk are orthogonal or nearly orthogonal, which allows us to have O(N*K+K^2) update instead of O(N*K^2) one. Reordering of updates is essential for having good performance on non-orthogonal problems (updates which do not add much of curvature are added first, and updates which add very large eigenvalues are added last and override effect of the first updates). In practice, this preconditioner is perfect when ortogonal correction is applied; on non-orthogonal problems sometimes it allows to achieve 5x speedup (when compared to non-preconditioned solver). -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgssetprecrankklbfgsfast(minlbfgsstate state, double[] d, double[] c, double[,] w, int cnt) { int i = 0; int j = 0; int n = 0; n = state.n; state.prectype = 4; state.preck = cnt; apserv.rvectorsetlengthatleast(ref state.precc, cnt); apserv.rvectorsetlengthatleast(ref state.precd, n); apserv.rmatrixsetlengthatleast(ref state.precw, cnt, n); for(i=0; i<=n-1; i++) { state.precd[i] = d[i]; } for(i=0; i<=cnt-1; i++) { state.precc[i] = c[i]; for(j=0; j<=n-1; j++) { state.precw[i,j] = w[i,j]; } } }
/************************************************************************* L-BFGS algorithm results Buffered implementation of MinLBFGSResults which uses pre-allocated buffer to store X[]. If buffer size is too small, it resizes buffer. It is intended to be used in the inner cycles of performance critical algorithms where array reallocation penalty is too large to be ignored. -- ALGLIB -- Copyright 20.08.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsresultsbuf(minlbfgsstate state, ref double[] x, minlbfgsreport rep) { minlbfgs.minlbfgsresultsbuf(state.innerobj, ref x, rep.innerobj); return; }
/************************************************************************* NOTES: 1. This function has two different implementations: one which uses exact (analytical) user-supplied gradient, and one which uses function value only and numerically differentiates function in order to obtain gradient. Depending on the specific function used to create optimizer object (either MinLBFGSCreate() for analytical gradient or MinLBFGSCreateF() for numerical differentiation) you should choose appropriate variant of MinLBFGSOptimize() - one which accepts function AND gradient or one which accepts function ONLY. Be careful to choose variant of MinLBFGSOptimize() which corresponds to your optimization scheme! Table below lists different combinations of callback (function/gradient) passed to MinLBFGSOptimize() and specific function used to create optimizer. | USER PASSED TO MinLBFGSOptimize() CREATED WITH | function only | function and gradient ------------------------------------------------------------ MinLBFGSCreateF() | work FAIL MinLBFGSCreate() | FAIL work Here "FAIL" denotes inappropriate combinations of optimizer creation function and MinLBFGSOptimize() version. Attemps to use such combination (for example, to create optimizer with MinLBFGSCreateF() and to pass gradient information to MinCGOptimize()) will lead to exception being thrown. Either you did not pass gradient when it WAS needed or you passed gradient when it was NOT needed. -- ALGLIB -- Copyright 20.03.2009 by Bochkanov Sergey *************************************************************************/ public static bool minlbfgsiteration(minlbfgsstate state) { bool result = new bool(); int n = 0; int m = 0; int i = 0; int j = 0; int ic = 0; int mcinfo = 0; double v = 0; double vv = 0; int i_ = 0; // // Reverse communication preparations // I know it looks ugly, but it works the same way // anywhere from C++ to Python. // // This code initializes locals by: // * random values determined during code // generation - on first subroutine call // * values from previous call - on subsequent calls // if( state.rstate.stage>=0 ) { n = state.rstate.ia[0]; m = state.rstate.ia[1]; i = state.rstate.ia[2]; j = state.rstate.ia[3]; ic = state.rstate.ia[4]; mcinfo = state.rstate.ia[5]; v = state.rstate.ra[0]; vv = state.rstate.ra[1]; } else { n = -983; m = -989; i = -834; j = 900; ic = -287; mcinfo = 364; v = 214; vv = -338; } if( state.rstate.stage==0 ) { goto lbl_0; } if( state.rstate.stage==1 ) { goto lbl_1; } if( state.rstate.stage==2 ) { goto lbl_2; } if( state.rstate.stage==3 ) { goto lbl_3; } if( state.rstate.stage==4 ) { goto lbl_4; } if( state.rstate.stage==5 ) { goto lbl_5; } if( state.rstate.stage==6 ) { goto lbl_6; } if( state.rstate.stage==7 ) { goto lbl_7; } if( state.rstate.stage==8 ) { goto lbl_8; } if( state.rstate.stage==9 ) { goto lbl_9; } if( state.rstate.stage==10 ) { goto lbl_10; } if( state.rstate.stage==11 ) { goto lbl_11; } if( state.rstate.stage==12 ) { goto lbl_12; } if( state.rstate.stage==13 ) { goto lbl_13; } if( state.rstate.stage==14 ) { goto lbl_14; } if( state.rstate.stage==15 ) { goto lbl_15; } if( state.rstate.stage==16 ) { goto lbl_16; } // // Routine body // // // Unload frequently used variables from State structure // (just for typing convinience) // n = state.n; m = state.m; state.userterminationneeded = false; state.repterminationtype = 0; state.repiterationscount = 0; state.repvaridx = -1; state.repnfev = 0; // // Check, that transferred derivative value is right // clearrequestfields(state); if( !((double)(state.diffstep)==(double)(0) && (double)(state.teststep)>(double)(0)) ) { goto lbl_17; } state.needfg = true; i = 0; lbl_19: if( i>n-1 ) { goto lbl_21; } v = state.x[i]; state.x[i] = v-state.teststep*state.s[i]; state.rstate.stage = 0; goto lbl_rcomm; lbl_0: state.fm1 = state.f; state.fp1 = state.g[i]; state.x[i] = v+state.teststep*state.s[i]; state.rstate.stage = 1; goto lbl_rcomm; lbl_1: state.fm2 = state.f; state.fp2 = state.g[i]; state.x[i] = v; state.rstate.stage = 2; goto lbl_rcomm; lbl_2: // // 2*State.TestStep - scale parameter // width of segment [Xi-TestStep;Xi+TestStep] // if( !optserv.derivativecheck(state.fm1, state.fp1, state.fm2, state.fp2, state.f, state.g[i], 2*state.teststep) ) { state.repvaridx = i; state.repterminationtype = -7; result = false; return result; } i = i+1; goto lbl_19; lbl_21: state.needfg = false; lbl_17: // // Calculate F/G at the initial point // clearrequestfields(state); if( (double)(state.diffstep)!=(double)(0) ) { goto lbl_22; } state.needfg = true; state.rstate.stage = 3; goto lbl_rcomm; lbl_3: state.needfg = false; goto lbl_23; lbl_22: state.needf = true; state.rstate.stage = 4; goto lbl_rcomm; lbl_4: state.fbase = state.f; i = 0; lbl_24: if( i>n-1 ) { goto lbl_26; } v = state.x[i]; state.x[i] = v-state.diffstep*state.s[i]; state.rstate.stage = 5; goto lbl_rcomm; lbl_5: state.fm2 = state.f; state.x[i] = v-0.5*state.diffstep*state.s[i]; state.rstate.stage = 6; goto lbl_rcomm; lbl_6: state.fm1 = state.f; state.x[i] = v+0.5*state.diffstep*state.s[i]; state.rstate.stage = 7; goto lbl_rcomm; lbl_7: state.fp1 = state.f; state.x[i] = v+state.diffstep*state.s[i]; state.rstate.stage = 8; goto lbl_rcomm; lbl_8: state.fp2 = state.f; state.x[i] = v; state.g[i] = (8*(state.fp1-state.fm1)-(state.fp2-state.fm2))/(6*state.diffstep*state.s[i]); i = i+1; goto lbl_24; lbl_26: state.f = state.fbase; state.needf = false; lbl_23: optserv.trimprepare(state.f, ref state.trimthreshold); if( !state.xrep ) { goto lbl_27; } clearrequestfields(state); state.xupdated = true; state.rstate.stage = 9; goto lbl_rcomm; lbl_9: state.xupdated = false; lbl_27: if( state.userterminationneeded ) { // // User requested termination // state.repterminationtype = 8; result = false; return result; } state.repnfev = 1; state.fold = state.f; v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.g[i]*state.s[i]); } if( (double)(Math.Sqrt(v))<=(double)(state.epsg) ) { state.repterminationtype = 4; result = false; return result; } // // Choose initial step and direction. // Apply preconditioner, if we have something other than default. // for(i_=0; i_<=n-1;i_++) { state.d[i_] = -state.g[i_]; } if( state.prectype==0 ) { // // Default preconditioner is used, but we can't use it before iterations will start // v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.g[i_]*state.g[i_]; } v = Math.Sqrt(v); if( (double)(state.stpmax)==(double)(0) ) { state.stp = Math.Min(1.0/v, 1); } else { state.stp = Math.Min(1.0/v, state.stpmax); } } if( state.prectype==1 ) { // // Cholesky preconditioner is used // fbls.fblscholeskysolve(state.denseh, 1.0, n, true, state.d, ref state.autobuf); state.stp = 1; } if( state.prectype==2 ) { // // diagonal approximation is used // for(i=0; i<=n-1; i++) { state.d[i] = state.d[i]/state.diagh[i]; } state.stp = 1; } if( state.prectype==3 ) { // // scale-based preconditioner is used // for(i=0; i<=n-1; i++) { state.d[i] = state.d[i]*state.s[i]*state.s[i]; } state.stp = 1; } if( state.prectype==4 ) { // // rank-k BFGS-based preconditioner is used // optserv.inexactlbfgspreconditioner(state.d, n, state.precd, state.precc, state.precw, state.preck, state.precbuf); state.stp = 1; } if( state.prectype==5 ) { // // exact low-rank preconditioner is used // optserv.applylowrankpreconditioner(state.d, state.lowrankbuf); state.stp = 1; } // // Main cycle // state.k = 0; lbl_29: if( false ) { goto lbl_30; } // // Main cycle: prepare to 1-D line search // state.p = state.k%m; state.q = Math.Min(state.k, m-1); // // Store X[k], G[k] // for(i_=0; i_<=n-1;i_++) { state.xp[i_] = state.x[i_]; } for(i_=0; i_<=n-1;i_++) { state.sk[state.p,i_] = -state.x[i_]; } for(i_=0; i_<=n-1;i_++) { state.yk[state.p,i_] = -state.g[i_]; } // // Minimize F(x+alpha*d) // Calculate S[k], Y[k] // state.mcstage = 0; if( state.k!=0 ) { state.stp = 1.0; } linmin.linminnormalized(ref state.d, ref state.stp, n); linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, state.d, ref state.stp, state.stpmax, gtol, ref mcinfo, ref state.nfev, ref state.work, state.lstate, ref state.mcstage); lbl_31: if( state.mcstage==0 ) { goto lbl_32; } clearrequestfields(state); if( (double)(state.diffstep)!=(double)(0) ) { goto lbl_33; } state.needfg = true; state.rstate.stage = 10; goto lbl_rcomm; lbl_10: state.needfg = false; goto lbl_34; lbl_33: state.needf = true; state.rstate.stage = 11; goto lbl_rcomm; lbl_11: state.fbase = state.f; i = 0; lbl_35: if( i>n-1 ) { goto lbl_37; } v = state.x[i]; state.x[i] = v-state.diffstep*state.s[i]; state.rstate.stage = 12; goto lbl_rcomm; lbl_12: state.fm2 = state.f; state.x[i] = v-0.5*state.diffstep*state.s[i]; state.rstate.stage = 13; goto lbl_rcomm; lbl_13: state.fm1 = state.f; state.x[i] = v+0.5*state.diffstep*state.s[i]; state.rstate.stage = 14; goto lbl_rcomm; lbl_14: state.fp1 = state.f; state.x[i] = v+state.diffstep*state.s[i]; state.rstate.stage = 15; goto lbl_rcomm; lbl_15: state.fp2 = state.f; state.x[i] = v; state.g[i] = (8*(state.fp1-state.fm1)-(state.fp2-state.fm2))/(6*state.diffstep*state.s[i]); i = i+1; goto lbl_35; lbl_37: state.f = state.fbase; state.needf = false; lbl_34: optserv.trimfunction(ref state.f, ref state.g, n, state.trimthreshold); linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, state.d, ref state.stp, state.stpmax, gtol, ref mcinfo, ref state.nfev, ref state.work, state.lstate, ref state.mcstage); goto lbl_31; lbl_32: if( state.userterminationneeded ) { // // User requested termination. // Restore previous point and return. // for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.xp[i_]; } state.repterminationtype = 8; result = false; return result; } if( !state.xrep ) { goto lbl_38; } // // report // clearrequestfields(state); state.xupdated = true; state.rstate.stage = 16; goto lbl_rcomm; lbl_16: state.xupdated = false; lbl_38: state.repnfev = state.repnfev+state.nfev; state.repiterationscount = state.repiterationscount+1; for(i_=0; i_<=n-1;i_++) { state.sk[state.p,i_] = state.sk[state.p,i_] + state.x[i_]; } for(i_=0; i_<=n-1;i_++) { state.yk[state.p,i_] = state.yk[state.p,i_] + state.g[i_]; } // // Stopping conditions // v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.g[i]*state.s[i]); } if( !math.isfinite(v) || !math.isfinite(state.f) ) { // // Abnormal termination - infinities in function/gradient // state.repterminationtype = -8; result = false; return result; } if( state.repiterationscount>=state.maxits && state.maxits>0 ) { // // Too many iterations // state.repterminationtype = 5; result = false; return result; } if( (double)(Math.Sqrt(v))<=(double)(state.epsg) ) { // // Gradient is small enough // state.repterminationtype = 4; result = false; return result; } if( (double)(state.fold-state.f)<=(double)(state.epsf*Math.Max(Math.Abs(state.fold), Math.Max(Math.Abs(state.f), 1.0))) ) { // // F(k+1)-F(k) is small enough // state.repterminationtype = 1; result = false; return result; } v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.sk[state.p,i]/state.s[i]); } if( (double)(Math.Sqrt(v))<=(double)(state.epsx) ) { // // X(k+1)-X(k) is small enough // state.repterminationtype = 2; result = false; return result; } // // If Wolfe conditions are satisfied, we can update // limited memory model. // // However, if conditions are not satisfied (NFEV limit is met, // function is too wild, ...), we'll skip L-BFGS update // if( mcinfo!=1 ) { // // Skip update. // // In such cases we'll initialize search direction by // antigradient vector, because it leads to more // transparent code with less number of special cases // state.fold = state.f; for(i_=0; i_<=n-1;i_++) { state.d[i_] = -state.g[i_]; } } else { // // Calculate Rho[k], GammaK // v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.yk[state.p,i_]*state.sk[state.p,i_]; } vv = 0.0; for(i_=0; i_<=n-1;i_++) { vv += state.yk[state.p,i_]*state.yk[state.p,i_]; } if( (double)(v)==(double)(0) || (double)(vv)==(double)(0) ) { // // Rounding errors make further iterations impossible. // state.repterminationtype = -2; result = false; return result; } state.rho[state.p] = 1/v; state.gammak = v/vv; // // Calculate d(k+1) = -H(k+1)*g(k+1) // // for I:=K downto K-Q do // V = s(i)^T * work(iteration:I) // theta(i) = V // work(iteration:I+1) = work(iteration:I) - V*Rho(i)*y(i) // work(last iteration) = H0*work(last iteration) - preconditioner // for I:=K-Q to K do // V = y(i)^T*work(iteration:I) // work(iteration:I+1) = work(iteration:I) +(-V+theta(i))*Rho(i)*s(i) // // NOW WORK CONTAINS d(k+1) // for(i_=0; i_<=n-1;i_++) { state.work[i_] = state.g[i_]; } for(i=state.k; i>=state.k-state.q; i--) { ic = i%m; v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.sk[ic,i_]*state.work[i_]; } state.theta[ic] = v; vv = v*state.rho[ic]; for(i_=0; i_<=n-1;i_++) { state.work[i_] = state.work[i_] - vv*state.yk[ic,i_]; } } if( state.prectype==0 ) { // // Simple preconditioner is used // v = state.gammak; for(i_=0; i_<=n-1;i_++) { state.work[i_] = v*state.work[i_]; } } if( state.prectype==1 ) { // // Cholesky preconditioner is used // fbls.fblscholeskysolve(state.denseh, 1, n, true, state.work, ref state.autobuf); } if( state.prectype==2 ) { // // diagonal approximation is used // for(i=0; i<=n-1; i++) { state.work[i] = state.work[i]/state.diagh[i]; } } if( state.prectype==3 ) { // // scale-based preconditioner is used // for(i=0; i<=n-1; i++) { state.work[i] = state.work[i]*state.s[i]*state.s[i]; } } if( state.prectype==4 ) { // // Rank-K BFGS-based preconditioner is used // optserv.inexactlbfgspreconditioner(state.work, n, state.precd, state.precc, state.precw, state.preck, state.precbuf); } if( state.prectype==5 ) { // // Exact low-rank preconditioner is used // optserv.applylowrankpreconditioner(state.work, state.lowrankbuf); } for(i=state.k-state.q; i<=state.k; i++) { ic = i%m; v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.yk[ic,i_]*state.work[i_]; } vv = state.rho[ic]*(-v+state.theta[ic]); for(i_=0; i_<=n-1;i_++) { state.work[i_] = state.work[i_] + vv*state.sk[ic,i_]; } } for(i_=0; i_<=n-1;i_++) { state.d[i_] = -state.work[i_]; } // // Next step // state.fold = state.f; state.k = state.k+1; } goto lbl_29; lbl_30: result = false; return result; // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = m; state.rstate.ia[2] = i; state.rstate.ia[3] = j; state.rstate.ia[4] = ic; state.rstate.ia[5] = mcinfo; state.rstate.ra[0] = v; state.rstate.ra[1] = vv; return result; }
/************************************************************************* This subroutine restarts LBFGS algorithm from new point. All optimization parameters are left unchanged. This function allows to solve multiple optimization problems (which must have same number of dimensions) without object reallocation penalty. INPUT PARAMETERS: State - structure used to store algorithm state X - new starting point. -- ALGLIB -- Copyright 30.07.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsrestartfrom(minlbfgsstate state, double[] x) { minlbfgs.minlbfgsrestartfrom(state.innerobj, x); return; }
/************************************************************************* L-BFGS algorithm results Buffered implementation of MinLBFGSResults which uses pre-allocated buffer to store X[]. If buffer size is too small, it resizes buffer. It is intended to be used in the inner cycles of performance critical algorithms where array reallocation penalty is too large to be ignored. -- ALGLIB -- Copyright 20.08.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsresultsbuf(minlbfgsstate state, ref double[] x, minlbfgsreport rep) { int i_ = 0; if( alglib.ap.len(x)<state.n ) { x = new double[state.n]; } for(i_=0; i_<=state.n-1;i_++) { x[i_] = state.x[i_]; } rep.iterationscount = state.repiterationscount; rep.nfev = state.repnfev; rep.varidx = state.repvaridx; rep.terminationtype = state.repterminationtype; }
public override alglib.apobject make_copy() { minlbfgsstate _result = new minlbfgsstate(); _result.n = n; _result.m = m; _result.epsg = epsg; _result.epsf = epsf; _result.epsx = epsx; _result.maxits = maxits; _result.xrep = xrep; _result.stpmax = stpmax; _result.s = (double[])s.Clone(); _result.diffstep = diffstep; _result.nfev = nfev; _result.mcstage = mcstage; _result.k = k; _result.q = q; _result.p = p; _result.rho = (double[])rho.Clone(); _result.yk = (double[,])yk.Clone(); _result.sk = (double[,])sk.Clone(); _result.xp = (double[])xp.Clone(); _result.theta = (double[])theta.Clone(); _result.d = (double[])d.Clone(); _result.stp = stp; _result.work = (double[])work.Clone(); _result.fold = fold; _result.trimthreshold = trimthreshold; _result.prectype = prectype; _result.gammak = gammak; _result.denseh = (double[,])denseh.Clone(); _result.diagh = (double[])diagh.Clone(); _result.precc = (double[])precc.Clone(); _result.precd = (double[])precd.Clone(); _result.precw = (double[,])precw.Clone(); _result.preck = preck; _result.precbuf = (optserv.precbuflbfgs)precbuf.make_copy(); _result.lowrankbuf = (optserv.precbuflowrank)lowrankbuf.make_copy(); _result.fbase = fbase; _result.fm2 = fm2; _result.fm1 = fm1; _result.fp1 = fp1; _result.fp2 = fp2; _result.autobuf = (double[])autobuf.Clone(); _result.x = (double[])x.Clone(); _result.f = f; _result.g = (double[])g.Clone(); _result.needf = needf; _result.needfg = needfg; _result.xupdated = xupdated; _result.userterminationneeded = userterminationneeded; _result.teststep = teststep; _result.rstate = (rcommstate)rstate.make_copy(); _result.repiterationscount = repiterationscount; _result.repnfev = repnfev; _result.repvaridx = repvaridx; _result.repterminationtype = repterminationtype; _result.lstate = (linmin.linminstate)lstate.make_copy(); return _result; }
/************************************************************************* This subroutine submits request for termination of running optimizer. It should be called from user-supplied callback when user decides that it is time to "smoothly" terminate optimization process. As result, optimizer stops at point which was "current accepted" when termination request was submitted and returns error code 8 (successful termination). INPUT PARAMETERS: State - optimizer structure NOTE: after request for termination optimizer may perform several additional calls to user-supplied callbacks. It does NOT guarantee to stop immediately - it just guarantees that these additional calls will be discarded later. NOTE: calling this function on optimizer which is NOT running will have no effect. NOTE: multiple calls to this function are possible. First call is counted, subsequent calls are silently ignored. -- ALGLIB -- Copyright 08.10.2014 by Bochkanov Sergey *************************************************************************/ public static void minlbfgsrequesttermination(minlbfgsstate state) { state.userterminationneeded = true; }
/************************************************************************* LIMITED MEMORY BFGS METHOD FOR LARGE SCALE OPTIMIZATION DESCRIPTION: The subroutine minimizes function F(x) of N arguments by using a quasi- Newton method (LBFGS scheme) which is optimized to use a minimum amount of memory. The subroutine generates the approximation of an inverse Hessian matrix by using information about the last M steps of the algorithm (instead of N). It lessens a required amount of memory from a value of order N^2 to a value of order 2*N*M. REQUIREMENTS: Algorithm will request following information during its operation: * function value F and its gradient G (simultaneously) at given point X USAGE: 1. User initializes algorithm state with MinLBFGSCreate() call 2. User tunes solver parameters with MinLBFGSSetCond() MinLBFGSSetStpMax() and other functions 3. User calls MinLBFGSOptimize() function which takes algorithm state and pointer (delegate, etc.) to callback function which calculates F/G. 4. User calls MinLBFGSResults() to get solution 5. Optionally user may call MinLBFGSRestartFrom() to solve another problem with same N/M but another starting point and/or another function. MinLBFGSRestartFrom() allows to reuse already initialized structure. INPUT PARAMETERS: N - problem dimension. N>0 M - number of corrections in the BFGS scheme of Hessian approximation update. Recommended value: 3<=M<=7. The smaller value causes worse convergence, the bigger will not cause a considerably better convergence, but will cause a fall in the performance. M<=N. X - initial solution approximation, array[0..N-1]. OUTPUT PARAMETERS: State - structure which stores algorithm state NOTES: 1. you may tune stopping conditions with MinLBFGSSetCond() function 2. if target function contains exp() or other fast growing functions, and optimization algorithm makes too large steps which leads to overflow, use MinLBFGSSetStpMax() function to bound algorithm's steps. However, L-BFGS rarely needs such a tuning. -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void minlbfgscreate(int n, int m, double[] x, minlbfgsstate state) { alglib.ap.assert(n>=1, "MinLBFGSCreate: N<1!"); alglib.ap.assert(m>=1, "MinLBFGSCreate: M<1"); alglib.ap.assert(m<=n, "MinLBFGSCreate: M>N"); alglib.ap.assert(alglib.ap.len(x)>=n, "MinLBFGSCreate: Length(X)<N!"); alglib.ap.assert(apserv.isfinitevector(x, n), "MinLBFGSCreate: X contains infinite or NaN values!"); minlbfgscreatex(n, m, x, 0, 0.0, state); }
/************************************************************************* Clears request fileds (to be sure that we don't forgot to clear something) *************************************************************************/ private static void clearrequestfields(minlbfgsstate state) { state.needf = false; state.needfg = false; state.xupdated = false; }
/************************************************************************* The subroutine is finite difference variant of MinLBFGSCreate(). It uses finite differences in order to differentiate target function. Description below contains information which is specific to this function only. We recommend to read comments on MinLBFGSCreate() in order to get more information about creation of LBFGS optimizer. INPUT PARAMETERS: N - problem dimension, N>0: * if given, only leading N elements of X are used * if not given, automatically determined from size of X M - number of corrections in the BFGS scheme of Hessian approximation update. Recommended value: 3<=M<=7. The smaller value causes worse convergence, the bigger will not cause a considerably better convergence, but will cause a fall in the performance. M<=N. X - starting point, array[0..N-1]. DiffStep- differentiation step, >0 OUTPUT PARAMETERS: State - structure which stores algorithm state NOTES: 1. algorithm uses 4-point central formula for differentiation. 2. differentiation step along I-th axis is equal to DiffStep*S[I] where S[] is scaling vector which can be set by MinLBFGSSetScale() call. 3. we recommend you to use moderate values of differentiation step. Too large step will result in too large truncation errors, while too small step will result in too large numerical errors. 1.0E-6 can be good value to start with. 4. Numerical differentiation is very inefficient - one gradient calculation needs 4*N function evaluations. This function will work for any N - either small (1...10), moderate (10...100) or large (100...). However, performance penalty will be too severe for any N's except for small ones. We should also say that code which relies on numerical differentiation is less robust and precise. LBFGS needs exact gradient values. Imprecise gradient may slow down convergence, especially on highly nonlinear problems. Thus we recommend to use this function for fast prototyping on small- dimensional problems only, and to implement analytical gradient as soon as possible. -- ALGLIB -- Copyright 16.05.2011 by Bochkanov Sergey *************************************************************************/ public static void minlbfgscreatef(int n, int m, double[] x, double diffstep, minlbfgsstate state) { alglib.ap.assert(n>=1, "MinLBFGSCreateF: N too small!"); alglib.ap.assert(m>=1, "MinLBFGSCreateF: M<1"); alglib.ap.assert(m<=n, "MinLBFGSCreateF: M>N"); alglib.ap.assert(alglib.ap.len(x)>=n, "MinLBFGSCreateF: Length(X)<N!"); alglib.ap.assert(apserv.isfinitevector(x, n), "MinLBFGSCreateF: X contains infinite or NaN values!"); alglib.ap.assert(math.isfinite(diffstep), "MinLBFGSCreateF: DiffStep is infinite or NaN!"); alglib.ap.assert((double)(diffstep)>(double)(0), "MinLBFGSCreateF: DiffStep is non-positive!"); minlbfgscreatex(n, m, x, 0, diffstep, state); }