/************************************************************************* * This function sets CG algorithm. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be * initialized with MinCGCreate() * CGType - algorithm type: * -1 automatic selection of the best algorithm * 0 DY (Dai and Yuan) algorithm * 1 Hybrid DY-HS algorithm * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetcgtype(ref mincgstate state, int cgtype) { System.Diagnostics.Debug.Assert(cgtype >= -1 & cgtype <= 1, "MinCGSetCGType: incorrect CGType!"); if (cgtype == -1) { cgtype = 1; } state.cgtype = cgtype; }
/************************************************************************* * Conjugate gradient results * * Called after MinCG returned False. * * INPUT PARAMETERS: * State - algorithm state (used by MinCGIteration). * * OUTPUT PARAMETERS: * X - array[0..N-1], solution * Rep - optimization report: * Rep.TerminationType completetion code: * -2 rounding errors prevent further improvement. * X contains best point found. * -1 incorrect parameters were specified * 1 relative function improvement is no more than * EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, * further improvement is impossible * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations * * -- ALGLIB -- * Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static void mincgresults(ref mincgstate state, ref double[] x, ref mincgreport rep) { int i_ = 0; x = new double[state.n - 1 + 1]; for (i_ = 0; i_ <= state.n - 1; i_++) { x[i_] = state.xn[i_]; } rep.iterationscount = state.repiterationscount; rep.nfev = state.repnfev; rep.terminationtype = state.repterminationtype; }
/************************************************************************* * This function sets stopping conditions for CG optimization algorithm. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be initialized * with MinCGCreate() * EpsG - >=0 * The subroutine finishes its work if the condition ||G||<EpsG is satisfied, where ||.|| means Euclidian norm, * G - gradient. * EpsF - >=0 * The subroutine finishes its work if on k+1-th iteration * the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} * is satisfied. * EpsX - >=0 * The subroutine finishes its work if on k+1-th iteration * the condition |X(k+1)-X(k)| <= EpsX is fulfilled. * MaxIts - maximum number of iterations. If MaxIts=0, the number of * iterations is unlimited. * * Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to * automatic stopping criterion selection (small EpsX). * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetcond(ref mincgstate state, double epsg, double epsf, double epsx, int maxits) { System.Diagnostics.Debug.Assert((double)(epsg) >= (double)(0), "MinCGSetCond: negative EpsG!"); System.Diagnostics.Debug.Assert((double)(epsf) >= (double)(0), "MinCGSetCond: negative EpsF!"); System.Diagnostics.Debug.Assert((double)(epsx) >= (double)(0), "MinCGSetCond: negative EpsX!"); System.Diagnostics.Debug.Assert(maxits >= 0, "MinCGSetCond: negative MaxIts!"); if ((double)(epsg) == (double)(0) & (double)(epsf) == (double)(0) & (double)(epsx) == (double)(0) & maxits == 0) { epsx = 1.0E-6; } state.epsg = epsg; state.epsf = epsf; state.epsx = epsx; state.maxits = maxits; }
/************************************************************************* * NONLINEAR CONJUGATE GRADIENT METHOD * * The subroutine minimizes function F(x) of N arguments by using one of the * nonlinear conjugate gradient methods. * * These CG methods are globally convergent (even on non-convex functions) as * long as grad(f) is Lipschitz continuous in a some neighborhood of the * L = { x : f(x)<=f(x0) }. * * INPUT PARAMETERS: * N - problem dimension. N>0 * X - initial solution approximation, array[0..N-1]. * EpsG - positive number which defines a precision of search. The * subroutine finishes its work if the condition ||G|| < EpsG is * satisfied, where ||.|| means Euclidian norm, G - gradient, X - * current approximation. * EpsF - positive number which defines a precision of search. The * subroutine finishes its work if on iteration number k+1 the * condition |F(k+1)-F(k)| <= EpsF*max{|F(k)|, |F(k+1)|, 1} is * satisfied. * EpsX - positive number which defines a precision of search. The * subroutine finishes its work if on iteration number k+1 the * condition |X(k+1)-X(k)| <= EpsX is fulfilled. * MaxIts - maximum number of iterations. If MaxIts=0, the number of * iterations is unlimited. * * OUTPUT PARAMETERS: * State - structure used for reverse communication. * * See also MinCGIteration, MinCGResults * * NOTE: * * Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to * automatic stopping criterion selection (small EpsX). * * -- ALGLIB -- * Copyright 25.03.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgcreate(int n, ref double[] x, ref mincgstate state) { int i_ = 0; System.Diagnostics.Debug.Assert(n >= 1, "MinCGCreate: N too small!"); // // Initialize // state.n = n; mincgsetcond(ref state, 0, 0, 0, 0); mincgsetxrep(ref state, false); mincgsetstpmax(ref state, 0); mincgsetcgtype(ref state, -1); state.xk = new double[n]; state.dk = new double[n]; state.xn = new double[n]; state.dn = new double[n]; state.x = new double[n]; state.d = new double[n]; state.g = new double[n]; state.work = new double[n]; state.yk = new double[n]; // // Prepare first run // for (i_ = 0; i_ <= n - 1; i_++) { state.x[i_] = x[i_]; } state.rstate.ia = new int[2 + 1]; state.rstate.ra = new double[2 + 1]; state.rstate.stage = -1; }
/************************************************************************* This subroutine restarts CG algorithm from new point. All optimization parameters are left unchanged. This function allows to solve multiple optimization problems (which must have same number of dimensions) without object reallocation penalty. INPUT PARAMETERS: State - structure used to store algorithm state. X - new starting point. -- ALGLIB -- Copyright 30.07.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgrestartfrom(mincgstate state, double[] x) { mincg.mincgrestartfrom(state.innerobj, x); return; }
/************************************************************************* Clears request fileds (to be sure that we don't forgot to clear something) *************************************************************************/ private static void clearrequestfields(mincgstate state) { state.needf = false; state.needfg = false; state.xupdated = false; state.lsstart = false; state.lsend = false; state.algpowerup = false; }
/************************************************************************* This function calculates preconditioned product x'*H^(-1)*y. Work0[] and Work1[] are used as temporaries (size must be at least N; this function doesn't allocate arrays). -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ private static double preconditionedmultiply2(mincgstate state, ref double[] x, ref double[] y, ref double[] work0, ref double[] work1) { double result = 0; int i = 0; int n = 0; int vcnt = 0; double v0 = 0; double v1 = 0; int i_ = 0; n = state.n; vcnt = state.vcnt; // // no preconditioning // if( state.prectype==0 ) { v0 = 0.0; for(i_=0; i_<=n-1;i_++) { v0 += x[i_]*y[i_]; } result = v0; return result; } if( state.prectype==3 ) { result = 0; for(i=0; i<=n-1; i++) { result = result+x[i]*state.s[i]*state.s[i]*y[i]; } return result; } alglib.ap.assert(state.prectype==2, "MinCG: internal error (unexpected PrecType)"); // // low rank preconditioning // result = 0.0; for(i=0; i<=n-1; i++) { result = result+x[i]*y[i]/(state.diagh[i]+state.diaghl2[i]); } if( vcnt>0 ) { for(i=0; i<=n-1; i++) { work0[i] = x[i]/(state.diagh[i]+state.diaghl2[i]); work1[i] = y[i]/(state.diagh[i]+state.diaghl2[i]); } for(i=0; i<=vcnt-1; i++) { v0 = 0.0; for(i_=0; i_<=n-1;i_++) { v0 += work0[i_]*state.vcorr[i,i_]; } v1 = 0.0; for(i_=0; i_<=n-1;i_++) { v1 += work1[i_]*state.vcorr[i,i_]; } result = result-v0*v1; } } return result; }
/************************************************************************* Faster version of MinCGSetPrecDiag(), for time-critical parts of code, without safety checks. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetprecdiagfast(mincgstate state, double[] d) { int i = 0; apserv.rvectorsetlengthatleast(ref state.diagh, state.n); apserv.rvectorsetlengthatleast(ref state.diaghl2, state.n); state.prectype = 2; state.vcnt = 0; state.innerresetneeded = true; for(i=0; i<=state.n-1; i++) { state.diagh[i] = d[i]; state.diaghl2[i] = 0.0; } }
/************************************************************************* This function updates variable part (diagonal matrix D2) of low-rank preconditioner. This update is very cheap and takes just O(N) time. It has no effect with default preconditioner. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetprecvarpart(mincgstate state, double[] d2) { int i = 0; int n = 0; n = state.n; for(i=0; i<=n-1; i++) { state.diaghl2[i] = d2[i]; } }
/************************************************************************* Conjugate gradient results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: X - array[0..N-1], solution Rep - optimization report: * Rep.TerminationType completetion code: * -8 internal integrity control detected infinite or NAN values in function/gradient. Abnormal termination signalled. * -7 gradient verification failed. See MinCGSetGradientCheck() for more information. * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible, we return best X found so far * 8 terminated by user * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations -- ALGLIB -- Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static void mincgresults(mincgstate state, ref double[] x, mincgreport rep) { x = new double[0]; mincgresultsbuf(state, ref x, rep); }
/************************************************************************* This subroutine restarts CG algorithm from new point. All optimization parameters are left unchanged. This function allows to solve multiple optimization problems (which must have same number of dimensions) without object reallocation penalty. INPUT PARAMETERS: State - structure used to store algorithm state. X - new starting point. -- ALGLIB -- Copyright 30.07.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgrestartfrom(mincgstate state, double[] x) { int i_ = 0; alglib.ap.assert(alglib.ap.len(x)>=state.n, "MinCGRestartFrom: Length(X)<N!"); alglib.ap.assert(apserv.isfinitevector(x, state.n), "MinCGCreate: X contains infinite or NaN values!"); for(i_=0; i_<=state.n-1;i_++) { state.x[i_] = x[i_]; } mincgsuggeststep(state, 0.0); state.rstate.ia = new int[1+1]; state.rstate.ra = new double[2+1]; state.rstate.stage = -1; clearrequestfields(state); }
/************************************************************************* Modification of the preconditioner: preconditioning is turned off. INPUT PARAMETERS: State - structure which stores algorithm state NOTE: you can change preconditioner "on the fly", during algorithm iterations. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetprecdefault(mincgstate state) { state.prectype = 0; state.innerresetneeded = true; }
/************************************************************************* Modification of the preconditioner: scale-based diagonal preconditioning. This preconditioning mode can be useful when you don't have approximate diagonal of Hessian, but you know that your variables are badly scaled (for example, one variable is in [1,10], and another in [1000,100000]), and most part of the ill-conditioning comes from different scales of vars. In this case simple scale-based preconditioner, with H[i] = 1/(s[i]^2), can greatly improve convergence. IMPRTANT: you should set scale of your variables with MinCGSetScale() call (before or after MinCGSetPrecScale() call). Without knowledge of the scale of your variables scale-based preconditioner will be just unit matrix. INPUT PARAMETERS: State - structure which stores algorithm state NOTE: you can change preconditioner "on the fly", during algorithm iterations. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetprecscale(mincgstate state) { state.prectype = 3; state.innerresetneeded = true; }
/************************************************************************* Conjugate gradient results Buffered implementation of MinCGResults(), which uses pre-allocated buffer to store X[]. If buffer size is too small, it resizes buffer. It is intended to be used in the inner cycles of performance critical algorithms where array reallocation penalty is too large to be ignored. -- ALGLIB -- Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static void mincgresultsbuf(mincgstate state, ref double[] x, mincgreport rep) { mincg.mincgresultsbuf(state.innerobj, ref x, rep.innerobj); return; }
/************************************************************************* This function allows to suggest initial step length to the CG algorithm. Suggested step length is used as starting point for the line search. It can be useful when you have badly scaled problem, i.e. when ||grad|| (which is used as initial estimate for the first step) is many orders of magnitude different from the desired step. Line search may fail on such problems without good estimate of initial step length. Imagine, for example, problem with ||grad||=10^50 and desired step equal to 0.1 Line search function will use 10^50 as initial step, then it will decrease step length by 2 (up to 20 attempts) and will get 10^44, which is still too large. This function allows us to tell than line search should be started from some moderate step length, like 1.0, so algorithm will be able to detect desired step length in a several searches. Default behavior (when no step is suggested) is to use preconditioner, if it is available, to generate initial estimate of step length. This function influences only first iteration of algorithm. It should be called between MinCGCreate/MinCGRestartFrom() call and MinCGOptimize call. Suggested step is ignored if you have preconditioner. INPUT PARAMETERS: State - structure used to store algorithm state. Stp - initial estimate of the step length. Can be zero (no estimate). -- ALGLIB -- Copyright 30.07.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsuggeststep(mincgstate state, double stp) { alglib.ap.assert(math.isfinite(stp), "MinCGSuggestStep: Stp is infinite or NAN"); alglib.ap.assert((double)(stp)>=(double)(0), "MinCGSuggestStep: Stp<0"); state.suggestedstep = stp; }
/************************************************************************* Conjugate gradient results INPUT PARAMETERS: State - algorithm state OUTPUT PARAMETERS: X - array[0..N-1], solution Rep - optimization report: * Rep.TerminationType completetion code: * 1 relative function improvement is no more than EpsF. * 2 relative step is no more than EpsX. * 4 gradient norm is no more than EpsG * 5 MaxIts steps was taken * 7 stopping conditions are too stringent, further improvement is impossible, we return best X found so far * 8 terminated by user * Rep.IterationsCount contains iterations count * NFEV countains number of function calculations -- ALGLIB -- Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static void mincgresults(mincgstate state, out double[] x, out mincgreport rep) { x = new double[0]; rep = new mincgreport(); mincg.mincgresults(state.innerobj, ref x, rep.innerobj); return; }
/************************************************************************* * Clears request fileds (to be sure that we don't forgot to clear something) *************************************************************************/ private static void clearrequestfields(ref mincgstate state) { state.needfg = false; state.xupdated = false; }
/************************************************************************* * This function turns on/off reporting. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be * initialized with MinCGCreate() * NeedXRep- whether iteration reports are needed or not * * Usually algorithm returns from MinCGIteration() only when it needs * function/gradient. However, with this function we can let it stop after * each iteration (one iteration may include more than one function * evaluation), which is indicated by XUpdated field. * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetxrep(ref mincgstate state, bool needxrep) { state.xrep = needxrep; }
/************************************************************************* * One conjugate gradient iteration * * Called after initialization with MinCG. * See HTML documentation for examples. * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be initialized * with MinCG. * * RESULT: * if function returned False, iterative proces has converged. * Use MinLBFGSResults() to obtain optimization results. * if subroutine returned True, then, depending on structure fields, we * have one of the following situations * * * === FUNC/GRAD REQUEST === * State.NeedFG is True => function value/gradient are needed. * Caller should calculate function value State.F and gradient * State.G[0..N-1] at State.X[0..N-1] and call MinLBFGSIteration() again. * * === NEW INTERATION IS REPORTED === * State.XUpdated is True => one more iteration was made. * State.X contains current position, State.F contains function value at X. * You can read info from these fields, but never modify them because they * contain the only copy of optimization algorithm state. * * One and only one of these fields (NeedFG, XUpdated) is true on return. New * iterations are reported only when reports are explicitly turned on by * MinLBFGSSetXRep() function, so if you never called it, you can expect that * NeedFG is always True. * * * -- ALGLIB -- * Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static bool mincgiteration(ref mincgstate state) { bool result = new bool(); int n = 0; int i = 0; double betak = 0; double v = 0; double vv = 0; int mcinfo = 0; int i_ = 0; // // Reverse communication preparations // I know it looks ugly, but it works the same way // anywhere from C++ to Python. // // This code initializes locals by: // * random values determined during code // generation - on first subroutine call // * values from previous call - on subsequent calls // if (state.rstate.stage >= 0) { n = state.rstate.ia[0]; i = state.rstate.ia[1]; mcinfo = state.rstate.ia[2]; betak = state.rstate.ra[0]; v = state.rstate.ra[1]; vv = state.rstate.ra[2]; } else { n = -983; i = -989; mcinfo = -834; betak = 900; v = -287; vv = 364; } if (state.rstate.stage == 0) { goto lbl_0; } if (state.rstate.stage == 1) { goto lbl_1; } if (state.rstate.stage == 2) { goto lbl_2; } if (state.rstate.stage == 3) { goto lbl_3; } // // Routine body // // // Prepare // n = state.n; state.repterminationtype = 0; state.repiterationscount = 0; state.repnfev = 0; state.debugrestartscount = 0; // // Calculate F/G, initialize algorithm // clearrequestfields(ref state); state.needfg = true; state.rstate.stage = 0; goto lbl_rcomm; lbl_0: if (!state.xrep) { goto lbl_4; } clearrequestfields(ref state); state.xupdated = true; state.rstate.stage = 1; goto lbl_rcomm; lbl_1: lbl_4: v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.g[i_] * state.g[i_]; } v = Math.Sqrt(v); if ((double)(v) == (double)(0)) { state.repterminationtype = 4; result = false; return(result); } state.repnfev = 1; state.k = 0; state.fold = state.f; for (i_ = 0; i_ <= n - 1; i_++) { state.xk[i_] = state.x[i_]; } for (i_ = 0; i_ <= n - 1; i_++) { state.dk[i_] = -state.g[i_]; } // // Main cycle // lbl_6: if (false) { goto lbl_7; } // // Store G[k] for later calculation of Y[k] // for (i_ = 0; i_ <= n - 1; i_++) { state.yk[i_] = -state.g[i_]; } // // Calculate X(k+1): minimize F(x+alpha*d) // for (i_ = 0; i_ <= n - 1; i_++) { state.d[i_] = state.dk[i_]; } for (i_ = 0; i_ <= n - 1; i_++) { state.x[i_] = state.xk[i_]; } state.mcstage = 0; state.stp = 1.0; linmin.linminnormalized(ref state.d, ref state.stp, n); linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, ref state.d, ref state.stp, state.stpmax, ref mcinfo, ref state.nfev, ref state.work, ref state.lstate, ref state.mcstage); lbl_8: if (state.mcstage == 0) { goto lbl_9; } clearrequestfields(ref state); state.needfg = true; state.rstate.stage = 2; goto lbl_rcomm; lbl_2: linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, ref state.d, ref state.stp, state.stpmax, ref mcinfo, ref state.nfev, ref state.work, ref state.lstate, ref state.mcstage); goto lbl_8; lbl_9: if (!state.xrep) { goto lbl_10; } clearrequestfields(ref state); state.xupdated = true; state.rstate.stage = 3; goto lbl_rcomm; lbl_3: lbl_10: for (i_ = 0; i_ <= n - 1; i_++) { state.xn[i_] = state.x[i_]; } if (mcinfo == 1) { // // Standard Wolfe conditions hold // Calculate Y[K] and BetaK // for (i_ = 0; i_ <= n - 1; i_++) { state.yk[i_] = state.yk[i_] + state.g[i_]; } vv = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { vv += state.yk[i_] * state.dk[i_]; } v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.g[i_] * state.g[i_]; } state.betady = v / vv; v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.g[i_] * state.yk[i_]; } state.betahs = v / vv; if (state.cgtype == 0) { betak = state.betady; } if (state.cgtype == 1) { betak = Math.Max(0, Math.Min(state.betady, state.betahs)); } } else { // // Something is wrong (may be function is too wild or too flat). // // We'll set BetaK=0, which will restart CG algorithm. // We can stop later (during normal checks) if stopping conditions are met. // betak = 0; state.debugrestartscount = state.debugrestartscount + 1; } // // Calculate D(k+1) // for (i_ = 0; i_ <= n - 1; i_++) { state.dn[i_] = -state.g[i_]; } for (i_ = 0; i_ <= n - 1; i_++) { state.dn[i_] = state.dn[i_] + betak * state.dk[i_]; } // // Update information and Hessian. // Check stopping conditions. // state.repnfev = state.repnfev + state.nfev; state.repiterationscount = state.repiterationscount + 1; if (state.repiterationscount >= state.maxits & state.maxits > 0) { // // Too many iterations // state.repterminationtype = 5; result = false; return(result); } v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.g[i_] * state.g[i_]; } if ((double)(Math.Sqrt(v)) <= (double)(state.epsg)) { // // Gradient is small enough // state.repterminationtype = 4; result = false; return(result); } if ((double)(state.fold - state.f) <= (double)(state.epsf * Math.Max(Math.Abs(state.fold), Math.Max(Math.Abs(state.f), 1.0)))) { // // F(k+1)-F(k) is small enough // state.repterminationtype = 1; result = false; return(result); } v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += state.d[i_] * state.d[i_]; } if ((double)(Math.Sqrt(v) * state.stp) <= (double)(state.epsx)) { // // X(k+1)-X(k) is small enough // state.repterminationtype = 2; result = false; return(result); } // // Shift Xk/Dk, update other information // for (i_ = 0; i_ <= n - 1; i_++) { state.xk[i_] = state.xn[i_]; } for (i_ = 0; i_ <= n - 1; i_++) { state.dk[i_] = state.dn[i_]; } state.fold = state.f; state.k = state.k + 1; goto lbl_6; lbl_7: result = false; return(result); // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = i; state.rstate.ia[2] = mcinfo; state.rstate.ra[0] = betak; state.rstate.ra[1] = v; state.rstate.ra[2] = vv; return(result); }
/************************************************************************* * This function sets maximum step length * * INPUT PARAMETERS: * State - structure which stores algorithm state between calls and * which is used for reverse communication. Must be * initialized with MinCGCreate() * StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't * want to limit step length. * * Use this subroutine when you optimize target function which contains exp() * or other fast growing functions, and optimization algorithm makes too * large steps which leads to overflow. This function allows us to reject * steps that are too large (and therefore expose us to the possible * overflow) without actually calculating function value at the x+stp*d. * * -- ALGLIB -- * Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetstpmax(ref mincgstate state, double stpmax) { System.Diagnostics.Debug.Assert((double)(stpmax) >= (double)(0), "MinCGSetStpMax: StpMax<0!"); state.stpmax = stpmax; }
/************************************************************************* This developer-only function allows to retrieve unscaled (!) length of last good step (i.e. step which resulted in sufficient decrease of target function). It can be used in for solution of sequential optimization subproblems, where MinCGSuggestStep() is called with length of previous step as parameter. INPUT PARAMETERS: State - structure used to store algorithm state. RESULT: length of last good step being accepted NOTE: result of this function is undefined if you called it before -- ALGLIB -- Copyright 30.07.2010 by Bochkanov Sergey *************************************************************************/ public static double mincglastgoodstep(mincgstate state) { double result = 0; result = state.lastgoodstep; return result; }
public override alglib.apobject make_copy() { mincgstate _result = new mincgstate(); _result.n = n; _result.epsg = epsg; _result.epsf = epsf; _result.epsx = epsx; _result.maxits = maxits; _result.stpmax = stpmax; _result.suggestedstep = suggestedstep; _result.xrep = xrep; _result.drep = drep; _result.cgtype = cgtype; _result.prectype = prectype; _result.diagh = (double[])diagh.Clone(); _result.diaghl2 = (double[])diaghl2.Clone(); _result.vcorr = (double[,])vcorr.Clone(); _result.vcnt = vcnt; _result.s = (double[])s.Clone(); _result.diffstep = diffstep; _result.nfev = nfev; _result.mcstage = mcstage; _result.k = k; _result.xk = (double[])xk.Clone(); _result.dk = (double[])dk.Clone(); _result.xn = (double[])xn.Clone(); _result.dn = (double[])dn.Clone(); _result.d = (double[])d.Clone(); _result.fold = fold; _result.stp = stp; _result.curstpmax = curstpmax; _result.yk = (double[])yk.Clone(); _result.lastgoodstep = lastgoodstep; _result.lastscaledstep = lastscaledstep; _result.mcinfo = mcinfo; _result.innerresetneeded = innerresetneeded; _result.terminationneeded = terminationneeded; _result.trimthreshold = trimthreshold; _result.rstimer = rstimer; _result.x = (double[])x.Clone(); _result.f = f; _result.g = (double[])g.Clone(); _result.needf = needf; _result.needfg = needfg; _result.xupdated = xupdated; _result.algpowerup = algpowerup; _result.lsstart = lsstart; _result.lsend = lsend; _result.userterminationneeded = userterminationneeded; _result.teststep = teststep; _result.rstate = (rcommstate)rstate.make_copy(); _result.repiterationscount = repiterationscount; _result.repnfev = repnfev; _result.repvaridx = repvaridx; _result.repterminationtype = repterminationtype; _result.debugrestartscount = debugrestartscount; _result.lstate = (linmin.linminstate)lstate.make_copy(); _result.fbase = fbase; _result.fm2 = fm2; _result.fm1 = fm1; _result.fp1 = fp1; _result.fp2 = fp2; _result.betahs = betahs; _result.betady = betady; _result.work0 = (double[])work0.Clone(); _result.work1 = (double[])work1.Clone(); return _result; }
/************************************************************************* Modification of the preconditioner: diagonal of approximate Hessian is used. INPUT PARAMETERS: State - structure which stores algorithm state D - diagonal of the approximate Hessian, array[0..N-1], (if larger, only leading N elements are used). NOTE: you can change preconditioner "on the fly", during algorithm iterations. NOTE 2: D[i] should be positive. Exception will be thrown otherwise. NOTE 3: you should pass diagonal of approximate Hessian - NOT ITS INVERSE. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetprecdiag(mincgstate state, double[] d) { int i = 0; alglib.ap.assert(alglib.ap.len(d)>=state.n, "MinCGSetPrecDiag: D is too short"); for(i=0; i<=state.n-1; i++) { alglib.ap.assert(math.isfinite(d[i]), "MinCGSetPrecDiag: D contains infinite or NAN elements"); alglib.ap.assert((double)(d[i])>(double)(0), "MinCGSetPrecDiag: D contains non-positive elements"); } mincgsetprecdiagfast(state, d); }
/************************************************************************* NONLINEAR CONJUGATE GRADIENT METHOD DESCRIPTION: The subroutine minimizes function F(x) of N arguments by using one of the nonlinear conjugate gradient methods. These CG methods are globally convergent (even on non-convex functions) as long as grad(f) is Lipschitz continuous in a some neighborhood of the L = { x : f(x)<=f(x0) }. REQUIREMENTS: Algorithm will request following information during its operation: * function value F and its gradient G (simultaneously) at given point X USAGE: 1. User initializes algorithm state with MinCGCreate() call 2. User tunes solver parameters with MinCGSetCond(), MinCGSetStpMax() and other functions 3. User calls MinCGOptimize() function which takes algorithm state and pointer (delegate, etc.) to callback function which calculates F/G. 4. User calls MinCGResults() to get solution 5. Optionally, user may call MinCGRestartFrom() to solve another problem with same N but another starting point and/or another function. MinCGRestartFrom() allows to reuse already initialized structure. INPUT PARAMETERS: N - problem dimension, N>0: * if given, only leading N elements of X are used * if not given, automatically determined from size of X X - starting point, array[0..N-1]. OUTPUT PARAMETERS: State - structure which stores algorithm state -- ALGLIB -- Copyright 25.03.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgcreate(int n, double[] x, mincgstate state) { alglib.ap.assert(n>=1, "MinCGCreate: N too small!"); alglib.ap.assert(alglib.ap.len(x)>=n, "MinCGCreate: Length(X)<N!"); alglib.ap.assert(apserv.isfinitevector(x, n), "MinCGCreate: X contains infinite or NaN values!"); mincginitinternal(n, 0.0, state); mincgrestartfrom(state, x); }
/************************************************************************* NOTES: 1. This function has two different implementations: one which uses exact (analytical) user-supplied gradient, and one which uses function value only and numerically differentiates function in order to obtain gradient. Depending on the specific function used to create optimizer object (either MinCGCreate() for analytical gradient or MinCGCreateF() for numerical differentiation) you should choose appropriate variant of MinCGOptimize() - one which accepts function AND gradient or one which accepts function ONLY. Be careful to choose variant of MinCGOptimize() which corresponds to your optimization scheme! Table below lists different combinations of callback (function/gradient) passed to MinCGOptimize() and specific function used to create optimizer. | USER PASSED TO MinCGOptimize() CREATED WITH | function only | function and gradient ------------------------------------------------------------ MinCGCreateF() | work FAIL MinCGCreate() | FAIL work Here "FAIL" denotes inappropriate combinations of optimizer creation function and MinCGOptimize() version. Attemps to use such combination (for example, to create optimizer with MinCGCreateF() and to pass gradient information to MinCGOptimize()) will lead to exception being thrown. Either you did not pass gradient when it WAS needed or you passed gradient when it was NOT needed. -- ALGLIB -- Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static bool mincgiteration(mincgstate state) { bool result = new bool(); int n = 0; int i = 0; double betak = 0; double v = 0; double vv = 0; int i_ = 0; // // Reverse communication preparations // I know it looks ugly, but it works the same way // anywhere from C++ to Python. // // This code initializes locals by: // * random values determined during code // generation - on first subroutine call // * values from previous call - on subsequent calls // if( state.rstate.stage>=0 ) { n = state.rstate.ia[0]; i = state.rstate.ia[1]; betak = state.rstate.ra[0]; v = state.rstate.ra[1]; vv = state.rstate.ra[2]; } else { n = -983; i = -989; betak = -834; v = 900; vv = -287; } if( state.rstate.stage==0 ) { goto lbl_0; } if( state.rstate.stage==1 ) { goto lbl_1; } if( state.rstate.stage==2 ) { goto lbl_2; } if( state.rstate.stage==3 ) { goto lbl_3; } if( state.rstate.stage==4 ) { goto lbl_4; } if( state.rstate.stage==5 ) { goto lbl_5; } if( state.rstate.stage==6 ) { goto lbl_6; } if( state.rstate.stage==7 ) { goto lbl_7; } if( state.rstate.stage==8 ) { goto lbl_8; } if( state.rstate.stage==9 ) { goto lbl_9; } if( state.rstate.stage==10 ) { goto lbl_10; } if( state.rstate.stage==11 ) { goto lbl_11; } if( state.rstate.stage==12 ) { goto lbl_12; } if( state.rstate.stage==13 ) { goto lbl_13; } if( state.rstate.stage==14 ) { goto lbl_14; } if( state.rstate.stage==15 ) { goto lbl_15; } if( state.rstate.stage==16 ) { goto lbl_16; } if( state.rstate.stage==17 ) { goto lbl_17; } if( state.rstate.stage==18 ) { goto lbl_18; } if( state.rstate.stage==19 ) { goto lbl_19; } // // Routine body // // // Prepare // n = state.n; state.terminationneeded = false; state.userterminationneeded = false; state.repterminationtype = 0; state.repiterationscount = 0; state.repvaridx = -1; state.repnfev = 0; state.debugrestartscount = 0; // // Check, that transferred derivative value is right // clearrequestfields(state); if( !((double)(state.diffstep)==(double)(0) && (double)(state.teststep)>(double)(0)) ) { goto lbl_20; } state.needfg = true; i = 0; lbl_22: if( i>n-1 ) { goto lbl_24; } v = state.x[i]; state.x[i] = v-state.teststep*state.s[i]; state.rstate.stage = 0; goto lbl_rcomm; lbl_0: state.fm1 = state.f; state.fp1 = state.g[i]; state.x[i] = v+state.teststep*state.s[i]; state.rstate.stage = 1; goto lbl_rcomm; lbl_1: state.fm2 = state.f; state.fp2 = state.g[i]; state.x[i] = v; state.rstate.stage = 2; goto lbl_rcomm; lbl_2: // // 2*State.TestStep - scale parameter // width of segment [Xi-TestStep;Xi+TestStep] // if( !optserv.derivativecheck(state.fm1, state.fp1, state.fm2, state.fp2, state.f, state.g[i], 2*state.teststep) ) { state.repvaridx = i; state.repterminationtype = -7; result = false; return result; } i = i+1; goto lbl_22; lbl_24: state.needfg = false; lbl_20: // // Preparations continue: // * set XK // * calculate F/G // * set DK to -G // * powerup algo (it may change preconditioner) // * apply preconditioner to DK // * report update of X // * check stopping conditions for G // for(i_=0; i_<=n-1;i_++) { state.xk[i_] = state.x[i_]; } clearrequestfields(state); if( (double)(state.diffstep)!=(double)(0) ) { goto lbl_25; } state.needfg = true; state.rstate.stage = 3; goto lbl_rcomm; lbl_3: state.needfg = false; goto lbl_26; lbl_25: state.needf = true; state.rstate.stage = 4; goto lbl_rcomm; lbl_4: state.fbase = state.f; i = 0; lbl_27: if( i>n-1 ) { goto lbl_29; } v = state.x[i]; state.x[i] = v-state.diffstep*state.s[i]; state.rstate.stage = 5; goto lbl_rcomm; lbl_5: state.fm2 = state.f; state.x[i] = v-0.5*state.diffstep*state.s[i]; state.rstate.stage = 6; goto lbl_rcomm; lbl_6: state.fm1 = state.f; state.x[i] = v+0.5*state.diffstep*state.s[i]; state.rstate.stage = 7; goto lbl_rcomm; lbl_7: state.fp1 = state.f; state.x[i] = v+state.diffstep*state.s[i]; state.rstate.stage = 8; goto lbl_rcomm; lbl_8: state.fp2 = state.f; state.x[i] = v; state.g[i] = (8*(state.fp1-state.fm1)-(state.fp2-state.fm2))/(6*state.diffstep*state.s[i]); i = i+1; goto lbl_27; lbl_29: state.f = state.fbase; state.needf = false; lbl_26: if( !state.drep ) { goto lbl_30; } // // Report algorithm powerup (if needed) // clearrequestfields(state); state.algpowerup = true; state.rstate.stage = 9; goto lbl_rcomm; lbl_9: state.algpowerup = false; lbl_30: optserv.trimprepare(state.f, ref state.trimthreshold); for(i_=0; i_<=n-1;i_++) { state.dk[i_] = -state.g[i_]; } preconditionedmultiply(state, ref state.dk, ref state.work0, ref state.work1); if( !state.xrep ) { goto lbl_32; } clearrequestfields(state); state.xupdated = true; state.rstate.stage = 10; goto lbl_rcomm; lbl_10: state.xupdated = false; lbl_32: if( state.terminationneeded || state.userterminationneeded ) { // // Combined termination point for "internal" termination by TerminationNeeded flag // and for "user" termination by MinCGRequestTermination() (UserTerminationNeeded flag). // In this location rules for both of methods are same, thus only one exit point is needed. // for(i_=0; i_<=n-1;i_++) { state.xn[i_] = state.xk[i_]; } state.repterminationtype = 8; result = false; return result; } v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.g[i]*state.s[i]); } if( (double)(Math.Sqrt(v))<=(double)(state.epsg) ) { for(i_=0; i_<=n-1;i_++) { state.xn[i_] = state.xk[i_]; } state.repterminationtype = 4; result = false; return result; } state.repnfev = 1; state.k = 0; state.fold = state.f; // // Choose initial step. // Apply preconditioner, if we have something other than default. // if( state.prectype==2 || state.prectype==3 ) { // // because we use preconditioner, step length must be equal // to the norm of DK // v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.dk[i_]*state.dk[i_]; } state.lastgoodstep = Math.Sqrt(v); } else { // // No preconditioner is used, we try to use suggested step // if( (double)(state.suggestedstep)>(double)(0) ) { state.lastgoodstep = state.suggestedstep; } else { state.lastgoodstep = 1.0; } } // // Main cycle // state.rstimer = rscountdownlen; lbl_34: if( false ) { goto lbl_35; } // // * clear reset flag // * clear termination flag // * store G[k] for later calculation of Y[k] // * prepare starting point and direction and step length for line search // state.innerresetneeded = false; state.terminationneeded = false; for(i_=0; i_<=n-1;i_++) { state.yk[i_] = -state.g[i_]; } for(i_=0; i_<=n-1;i_++) { state.d[i_] = state.dk[i_]; } for(i_=0; i_<=n-1;i_++) { state.x[i_] = state.xk[i_]; } state.mcstage = 0; state.stp = 1.0; linmin.linminnormalized(ref state.d, ref state.stp, n); if( (double)(state.lastgoodstep)!=(double)(0) ) { state.stp = state.lastgoodstep; } state.curstpmax = state.stpmax; // // Report beginning of line search (if needed) // Terminate algorithm, if user request was detected // if( !state.drep ) { goto lbl_36; } clearrequestfields(state); state.lsstart = true; state.rstate.stage = 11; goto lbl_rcomm; lbl_11: state.lsstart = false; lbl_36: if( state.terminationneeded ) { for(i_=0; i_<=n-1;i_++) { state.xn[i_] = state.x[i_]; } state.repterminationtype = 8; result = false; return result; } // // Minimization along D // linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, state.d, ref state.stp, state.curstpmax, gtol, ref state.mcinfo, ref state.nfev, ref state.work0, state.lstate, ref state.mcstage); lbl_38: if( state.mcstage==0 ) { goto lbl_39; } // // Calculate function/gradient using either // analytical gradient supplied by user // or finite difference approximation. // // "Trim" function in order to handle near-singularity points. // clearrequestfields(state); if( (double)(state.diffstep)!=(double)(0) ) { goto lbl_40; } state.needfg = true; state.rstate.stage = 12; goto lbl_rcomm; lbl_12: state.needfg = false; goto lbl_41; lbl_40: state.needf = true; state.rstate.stage = 13; goto lbl_rcomm; lbl_13: state.fbase = state.f; i = 0; lbl_42: if( i>n-1 ) { goto lbl_44; } v = state.x[i]; state.x[i] = v-state.diffstep*state.s[i]; state.rstate.stage = 14; goto lbl_rcomm; lbl_14: state.fm2 = state.f; state.x[i] = v-0.5*state.diffstep*state.s[i]; state.rstate.stage = 15; goto lbl_rcomm; lbl_15: state.fm1 = state.f; state.x[i] = v+0.5*state.diffstep*state.s[i]; state.rstate.stage = 16; goto lbl_rcomm; lbl_16: state.fp1 = state.f; state.x[i] = v+state.diffstep*state.s[i]; state.rstate.stage = 17; goto lbl_rcomm; lbl_17: state.fp2 = state.f; state.x[i] = v; state.g[i] = (8*(state.fp1-state.fm1)-(state.fp2-state.fm2))/(6*state.diffstep*state.s[i]); i = i+1; goto lbl_42; lbl_44: state.f = state.fbase; state.needf = false; lbl_41: optserv.trimfunction(ref state.f, ref state.g, n, state.trimthreshold); // // Call MCSRCH again // linmin.mcsrch(n, ref state.x, ref state.f, ref state.g, state.d, ref state.stp, state.curstpmax, gtol, ref state.mcinfo, ref state.nfev, ref state.work0, state.lstate, ref state.mcstage); goto lbl_38; lbl_39: // // * terminate algorithm if "user" request for detected // * report end of line search // * store current point to XN // * report iteration // * terminate algorithm if "internal" request was detected // if( state.userterminationneeded ) { for(i_=0; i_<=n-1;i_++) { state.xn[i_] = state.xk[i_]; } state.repterminationtype = 8; result = false; return result; } if( !state.drep ) { goto lbl_45; } // // Report end of line search (if needed) // clearrequestfields(state); state.lsend = true; state.rstate.stage = 18; goto lbl_rcomm; lbl_18: state.lsend = false; lbl_45: for(i_=0; i_<=n-1;i_++) { state.xn[i_] = state.x[i_]; } if( !state.xrep ) { goto lbl_47; } clearrequestfields(state); state.xupdated = true; state.rstate.stage = 19; goto lbl_rcomm; lbl_19: state.xupdated = false; lbl_47: if( state.terminationneeded ) { for(i_=0; i_<=n-1;i_++) { state.xn[i_] = state.x[i_]; } state.repterminationtype = 8; result = false; return result; } // // Line search is finished. // * calculate BetaK // * calculate DN // * update timers // * calculate step length: // * LastScaledStep is ALWAYS calculated because it is used in the stopping criteria // * LastGoodStep is updated only when MCINFO is equal to 1 (Wolfe conditions hold). // See below for more explanation. // if( state.mcinfo==1 && !state.innerresetneeded ) { // // Standard Wolfe conditions hold // Calculate Y[K] and D[K]'*Y[K] // for(i_=0; i_<=n-1;i_++) { state.yk[i_] = state.yk[i_] + state.g[i_]; } vv = 0.0; for(i_=0; i_<=n-1;i_++) { vv += state.yk[i_]*state.dk[i_]; } // // Calculate BetaK according to DY formula // v = preconditionedmultiply2(state, ref state.g, ref state.g, ref state.work0, ref state.work1); state.betady = v/vv; // // Calculate BetaK according to HS formula // v = preconditionedmultiply2(state, ref state.g, ref state.yk, ref state.work0, ref state.work1); state.betahs = v/vv; // // Choose BetaK // if( state.cgtype==0 ) { betak = state.betady; } if( state.cgtype==1 ) { betak = Math.Max(0, Math.Min(state.betady, state.betahs)); } } else { // // Something is wrong (may be function is too wild or too flat) // or we just have to restart algo. // // We'll set BetaK=0, which will restart CG algorithm. // We can stop later (during normal checks) if stopping conditions are met. // betak = 0; state.debugrestartscount = state.debugrestartscount+1; } if( state.repiterationscount>0 && state.repiterationscount%(3+n)==0 ) { // // clear Beta every N iterations // betak = 0; } if( state.mcinfo==1 || state.mcinfo==5 ) { state.rstimer = rscountdownlen; } else { state.rstimer = state.rstimer-1; } for(i_=0; i_<=n-1;i_++) { state.dn[i_] = -state.g[i_]; } preconditionedmultiply(state, ref state.dn, ref state.work0, ref state.work1); for(i_=0; i_<=n-1;i_++) { state.dn[i_] = state.dn[i_] + betak*state.dk[i_]; } state.lastscaledstep = 0.0; for(i=0; i<=n-1; i++) { state.lastscaledstep = state.lastscaledstep+math.sqr(state.d[i]/state.s[i]); } state.lastscaledstep = state.stp*Math.Sqrt(state.lastscaledstep); if( state.mcinfo==1 ) { // // Step is good (Wolfe conditions hold), update LastGoodStep. // // This check for MCINFO=1 is essential because sometimes in the // constrained optimization setting we may take very short steps // (like 1E-15) because we were very close to boundary of the // feasible area. Such short step does not mean that we've converged // to the solution - it was so short because we were close to the // boundary and there was a limit on step length. // // So having such short step is quite normal situation. However, we // should NOT start next iteration from step whose initial length is // estimated as 1E-15 because it may lead to the failure of the // linear minimizer (step is too short, function does not changes, // line search stagnates). // state.lastgoodstep = 0; for(i=0; i<=n-1; i++) { state.lastgoodstep = state.lastgoodstep+math.sqr(state.d[i]); } state.lastgoodstep = state.stp*Math.Sqrt(state.lastgoodstep); } // // Update information. // Check stopping conditions. // v = 0; for(i=0; i<=n-1; i++) { v = v+math.sqr(state.g[i]*state.s[i]); } if( !math.isfinite(v) || !math.isfinite(state.f) ) { // // Abnormal termination - infinities in function/gradient // state.repterminationtype = -8; result = false; return result; } state.repnfev = state.repnfev+state.nfev; state.repiterationscount = state.repiterationscount+1; if( state.repiterationscount>=state.maxits && state.maxits>0 ) { // // Too many iterations // state.repterminationtype = 5; result = false; return result; } if( (double)(Math.Sqrt(v))<=(double)(state.epsg) ) { // // Gradient is small enough // state.repterminationtype = 4; result = false; return result; } if( !state.innerresetneeded ) { // // These conditions are checked only when no inner reset was requested by user // if( (double)(state.fold-state.f)<=(double)(state.epsf*Math.Max(Math.Abs(state.fold), Math.Max(Math.Abs(state.f), 1.0))) ) { // // F(k+1)-F(k) is small enough // state.repterminationtype = 1; result = false; return result; } if( (double)(state.lastscaledstep)<=(double)(state.epsx) ) { // // X(k+1)-X(k) is small enough // state.repterminationtype = 2; result = false; return result; } } if( state.rstimer<=0 ) { // // Too many subsequent restarts // state.repterminationtype = 7; result = false; return result; } // // Shift Xk/Dk, update other information // for(i_=0; i_<=n-1;i_++) { state.xk[i_] = state.xn[i_]; } for(i_=0; i_<=n-1;i_++) { state.dk[i_] = state.dn[i_]; } state.fold = state.f; state.k = state.k+1; goto lbl_34; lbl_35: result = false; return result; // // Saving state // lbl_rcomm: result = true; state.rstate.ia[0] = n; state.rstate.ia[1] = i; state.rstate.ra[0] = betak; state.rstate.ra[1] = v; state.rstate.ra[2] = vv; return result; }
/************************************************************************* The subroutine is finite difference variant of MinCGCreate(). It uses finite differences in order to differentiate target function. Description below contains information which is specific to this function only. We recommend to read comments on MinCGCreate() in order to get more information about creation of CG optimizer. INPUT PARAMETERS: N - problem dimension, N>0: * if given, only leading N elements of X are used * if not given, automatically determined from size of X X - starting point, array[0..N-1]. DiffStep- differentiation step, >0 OUTPUT PARAMETERS: State - structure which stores algorithm state NOTES: 1. algorithm uses 4-point central formula for differentiation. 2. differentiation step along I-th axis is equal to DiffStep*S[I] where S[] is scaling vector which can be set by MinCGSetScale() call. 3. we recommend you to use moderate values of differentiation step. Too large step will result in too large truncation errors, while too small step will result in too large numerical errors. 1.0E-6 can be good value to start with. 4. Numerical differentiation is very inefficient - one gradient calculation needs 4*N function evaluations. This function will work for any N - either small (1...10), moderate (10...100) or large (100...). However, performance penalty will be too severe for any N's except for small ones. We should also say that code which relies on numerical differentiation is less robust and precise. L-BFGS needs exact gradient values. Imprecise gradient may slow down convergence, especially on highly nonlinear problems. Thus we recommend to use this function for fast prototyping on small- dimensional problems only, and to implement analytical gradient as soon as possible. -- ALGLIB -- Copyright 16.05.2011 by Bochkanov Sergey *************************************************************************/ public static void mincgcreatef(int n, double[] x, double diffstep, mincgstate state) { alglib.ap.assert(n>=1, "MinCGCreateF: N too small!"); alglib.ap.assert(alglib.ap.len(x)>=n, "MinCGCreateF: Length(X)<N!"); alglib.ap.assert(apserv.isfinitevector(x, n), "MinCGCreateF: X contains infinite or NaN values!"); alglib.ap.assert(math.isfinite(diffstep), "MinCGCreateF: DiffStep is infinite or NaN!"); alglib.ap.assert((double)(diffstep)>(double)(0), "MinCGCreateF: DiffStep is non-positive!"); mincginitinternal(n, diffstep, state); mincgrestartfrom(state, x); }
/************************************************************************* Conjugate gradient results Buffered implementation of MinCGResults(), which uses pre-allocated buffer to store X[]. If buffer size is too small, it resizes buffer. It is intended to be used in the inner cycles of performance critical algorithms where array reallocation penalty is too large to be ignored. -- ALGLIB -- Copyright 20.04.2009 by Bochkanov Sergey *************************************************************************/ public static void mincgresultsbuf(mincgstate state, ref double[] x, mincgreport rep) { int i_ = 0; if( alglib.ap.len(x)<state.n ) { x = new double[state.n]; } for(i_=0; i_<=state.n-1;i_++) { x[i_] = state.xn[i_]; } rep.iterationscount = state.repiterationscount; rep.nfev = state.repnfev; rep.varidx = state.repvaridx; rep.terminationtype = state.repterminationtype; }
/************************************************************************* This function sets stopping conditions for CG optimization algorithm. INPUT PARAMETERS: State - structure which stores algorithm state EpsG - >=0 The subroutine finishes its work if the condition |v|<EpsG is satisfied, where: * |.| means Euclidian norm * v - scaled gradient vector, v[i]=g[i]*s[i] * g - gradient * s - scaling coefficients set by MinCGSetScale() EpsF - >=0 The subroutine finishes its work if on k+1-th iteration the condition |F(k+1)-F(k)|<=EpsF*max{|F(k)|,|F(k+1)|,1} is satisfied. EpsX - >=0 The subroutine finishes its work if on k+1-th iteration the condition |v|<=EpsX is fulfilled, where: * |.| means Euclidian norm * v - scaled step vector, v[i]=dx[i]/s[i] * dx - ste pvector, dx=X(k+1)-X(k) * s - scaling coefficients set by MinCGSetScale() MaxIts - maximum number of iterations. If MaxIts=0, the number of iterations is unlimited. Passing EpsG=0, EpsF=0, EpsX=0 and MaxIts=0 (simultaneously) will lead to automatic stopping criterion selection (small EpsX). -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetcond(mincgstate state, double epsg, double epsf, double epsx, int maxits) { alglib.ap.assert(math.isfinite(epsg), "MinCGSetCond: EpsG is not finite number!"); alglib.ap.assert((double)(epsg)>=(double)(0), "MinCGSetCond: negative EpsG!"); alglib.ap.assert(math.isfinite(epsf), "MinCGSetCond: EpsF is not finite number!"); alglib.ap.assert((double)(epsf)>=(double)(0), "MinCGSetCond: negative EpsF!"); alglib.ap.assert(math.isfinite(epsx), "MinCGSetCond: EpsX is not finite number!"); alglib.ap.assert((double)(epsx)>=(double)(0), "MinCGSetCond: negative EpsX!"); alglib.ap.assert(maxits>=0, "MinCGSetCond: negative MaxIts!"); if( (((double)(epsg)==(double)(0) && (double)(epsf)==(double)(0)) && (double)(epsx)==(double)(0)) && maxits==0 ) { epsx = 1.0E-6; } state.epsg = epsg; state.epsf = epsf; state.epsx = epsx; state.maxits = maxits; }
/************************************************************************* This subroutine submits request for termination of running optimizer. It should be called from user-supplied callback when user decides that it is time to "smoothly" terminate optimization process. As result, optimizer stops at point which was "current accepted" when termination request was submitted and returns error code 8 (successful termination). INPUT PARAMETERS: State - optimizer structure NOTE: after request for termination optimizer may perform several additional calls to user-supplied callbacks. It does NOT guarantee to stop immediately - it just guarantees that these additional calls will be discarded later. NOTE: calling this function on optimizer which is NOT running will have no effect. NOTE: multiple calls to this function are possible. First call is counted, subsequent calls are silently ignored. -- ALGLIB -- Copyright 08.10.2014 by Bochkanov Sergey *************************************************************************/ public static void mincgrequesttermination(mincgstate state) { state.userterminationneeded = true; }
/************************************************************************* This function sets scaling coefficients for CG optimizer. ALGLIB optimizers use scaling matrices to test stopping conditions (step size and gradient are scaled before comparison with tolerances). Scale of the I-th variable is a translation invariant measure of: a) "how large" the variable is b) how large the step should be to make significant changes in the function Scaling is also used by finite difference variant of CG optimizer - step along I-th axis is equal to DiffStep*S[I]. In most optimizers (and in the CG too) scaling is NOT a form of preconditioning. It just affects stopping conditions. You should set preconditioner by separate call to one of the MinCGSetPrec...() functions. There is special preconditioning mode, however, which uses scaling coefficients to form diagonal preconditioning matrix. You can turn this mode on, if you want. But you should understand that scaling is not the same thing as preconditioning - these are two different, although related forms of tuning solver. INPUT PARAMETERS: State - structure stores algorithm state S - array[N], non-zero scaling coefficients S[i] may be negative, sign doesn't matter. -- ALGLIB -- Copyright 14.01.2011 by Bochkanov Sergey *************************************************************************/ public static void mincgsetscale(mincgstate state, double[] s) { int i = 0; alglib.ap.assert(alglib.ap.len(s)>=state.n, "MinCGSetScale: Length(S)<N"); for(i=0; i<=state.n-1; i++) { alglib.ap.assert(math.isfinite(s[i]), "MinCGSetScale: S contains infinite or NAN elements"); alglib.ap.assert((double)(s[i])!=(double)(0), "MinCGSetScale: S contains zero elements"); state.s[i] = Math.Abs(s[i]); } }
/************************************************************************* This function sets low-rank preconditioner for Hessian matrix H=D+V'*C*V, where: * H is a Hessian matrix, which is approximated by D/V/C * D=D1+D2 is a diagonal matrix, which includes two positive definite terms: * constant term D1 (is not updated or infrequently updated) * variable term D2 (can be cheaply updated from iteration to iteration) * V is a low-rank correction * C is a diagonal factor of low-rank correction Preconditioner P is calculated using approximate Woodburry formula: P = D^(-1) - D^(-1)*V'*(C^(-1)+V*D1^(-1)*V')^(-1)*V*D^(-1) = D^(-1) - D^(-1)*VC'*VC*D^(-1), where VC = sqrt(B)*V B = (C^(-1)+V*D1^(-1)*V')^(-1) Note that B is calculated using constant term (D1) only, which allows us to update D2 without recalculation of B or VC. Such preconditioner is exact when D2 is zero. When D2 is non-zero, it is only approximation, but very good and cheap one. This function accepts D1, V, C. D2 is set to zero by default. Cost of this update is O(N*VCnt*VCnt), but D2 can be updated in just O(N) by MinCGSetPrecVarPart. -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetpreclowrankfast(mincgstate state, double[] d1, double[] c, double[,] v, int vcnt) { int i = 0; int j = 0; int k = 0; int n = 0; double t = 0; double[,] b = new double[0,0]; int i_ = 0; if( vcnt==0 ) { mincgsetprecdiagfast(state, d1); return; } n = state.n; b = new double[vcnt, vcnt]; apserv.rvectorsetlengthatleast(ref state.diagh, n); apserv.rvectorsetlengthatleast(ref state.diaghl2, n); apserv.rmatrixsetlengthatleast(ref state.vcorr, vcnt, n); state.prectype = 2; state.vcnt = vcnt; state.innerresetneeded = true; for(i=0; i<=n-1; i++) { state.diagh[i] = d1[i]; state.diaghl2[i] = 0.0; } for(i=0; i<=vcnt-1; i++) { for(j=i; j<=vcnt-1; j++) { t = 0; for(k=0; k<=n-1; k++) { t = t+v[i,k]*v[j,k]/d1[k]; } b[i,j] = t; } b[i,i] = b[i,i]+1.0/c[i]; } if( !trfac.spdmatrixcholeskyrec(ref b, 0, vcnt, true, ref state.work0) ) { state.vcnt = 0; return; } for(i=0; i<=vcnt-1; i++) { for(i_=0; i_<=n-1;i_++) { state.vcorr[i,i_] = v[i,i_]; } for(j=0; j<=i-1; j++) { t = b[j,i]; for(i_=0; i_<=n-1;i_++) { state.vcorr[i,i_] = state.vcorr[i,i_] - t*state.vcorr[j,i_]; } } t = 1/b[i,i]; for(i_=0; i_<=n-1;i_++) { state.vcorr[i,i_] = t*state.vcorr[i,i_]; } } }
/************************************************************************* This function turns on/off reporting. INPUT PARAMETERS: State - structure which stores algorithm state NeedXRep- whether iteration reports are needed or not If NeedXRep is True, algorithm will call rep() callback function if it is provided to MinCGOptimize(). -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetxrep(mincgstate state, bool needxrep) { state.xrep = needxrep; }
/************************************************************************* This subroutine turns on verification of the user-supplied analytic gradient: * user calls this subroutine before optimization begins * MinCGOptimize() is called * prior to actual optimization, for each component of parameters being optimized X[i] algorithm performs following steps: * two trial steps are made to X[i]-TestStep*S[i] and X[i]+TestStep*S[i], where X[i] is i-th component of the initial point and S[i] is a scale of i-th parameter * F(X) is evaluated at these trial points * we perform one more evaluation in the middle point of the interval * we build cubic model using function values and derivatives at trial points and we compare its prediction with actual value in the middle point * in case difference between prediction and actual value is higher than some predetermined threshold, algorithm stops with completion code -7; Rep.VarIdx is set to index of the parameter with incorrect derivative. * after verification is over, algorithm proceeds to the actual optimization. NOTE 1: verification needs N (parameters count) gradient evaluations. It is very costly and you should use it only for low dimensional problems, when you want to be sure that you've correctly calculated analytic derivatives. You should not use it in the production code (unless you want to check derivatives provided by some third party). NOTE 2: you should carefully choose TestStep. Value which is too large (so large that function behaviour is significantly non-cubic) will lead to false alarms. You may use different step for different parameters by means of setting scale with MinCGSetScale(). NOTE 3: this function may lead to false positives. In case it reports that I-th derivative was calculated incorrectly, you may decrease test step and try one more time - maybe your function changes too sharply and your step is too large for such rapidly chanding function. INPUT PARAMETERS: State - structure used to store algorithm state TestStep - verification step: * TestStep=0 turns verification off * TestStep>0 activates verification -- ALGLIB -- Copyright 31.05.2012 by Bochkanov Sergey *************************************************************************/ public static void mincgsetgradientcheck(mincgstate state, double teststep) { alglib.ap.assert(math.isfinite(teststep), "MinCGSetGradientCheck: TestStep contains NaN or Infinite"); alglib.ap.assert((double)(teststep)>=(double)(0), "MinCGSetGradientCheck: invalid argument TestStep(TestStep<0)"); state.teststep = teststep; }
/************************************************************************* This function turns on/off line search reports. These reports are described in more details in developer-only comments on MinCGState object. INPUT PARAMETERS: State - structure which stores algorithm state NeedDRep- whether line search reports are needed or not This function is intended for private use only. Turning it on artificially may cause program failure. -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetdrep(mincgstate state, bool needdrep) { state.drep = needdrep; }
/************************************************************************* This function calculates preconditioned product H^(-1)*x and stores result back into X. Work0[] and Work1[] are used as temporaries (size must be at least N; this function doesn't allocate arrays). -- ALGLIB -- Copyright 13.10.2010 by Bochkanov Sergey *************************************************************************/ private static void preconditionedmultiply(mincgstate state, ref double[] x, ref double[] work0, ref double[] work1) { int i = 0; int n = 0; int vcnt = 0; double v = 0; int i_ = 0; n = state.n; vcnt = state.vcnt; if( state.prectype==0 ) { return; } if( state.prectype==3 ) { for(i=0; i<=n-1; i++) { x[i] = x[i]*state.s[i]*state.s[i]; } return; } alglib.ap.assert(state.prectype==2, "MinCG: internal error (unexpected PrecType)"); // // handle part common for VCnt=0 and VCnt<>0 // for(i=0; i<=n-1; i++) { x[i] = x[i]/(state.diagh[i]+state.diaghl2[i]); } // // if VCnt>0 // if( vcnt>0 ) { for(i=0; i<=vcnt-1; i++) { v = 0.0; for(i_=0; i_<=n-1;i_++) { v += state.vcorr[i,i_]*x[i_]; } work0[i] = v; } for(i=0; i<=n-1; i++) { work1[i] = 0; } for(i=0; i<=vcnt-1; i++) { v = work0[i]; for(i_=0; i_<=n-1;i_++) { state.work1[i_] = state.work1[i_] + v*state.vcorr[i,i_]; } } for(i=0; i<=n-1; i++) { x[i] = x[i]-state.work1[i]/(state.diagh[i]+state.diaghl2[i]); } } }
/************************************************************************* This function sets CG algorithm. INPUT PARAMETERS: State - structure which stores algorithm state CGType - algorithm type: * -1 automatic selection of the best algorithm * 0 DY (Dai and Yuan) algorithm * 1 Hybrid DY-HS algorithm -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetcgtype(mincgstate state, int cgtype) { alglib.ap.assert(cgtype>=-1 && cgtype<=1, "MinCGSetCGType: incorrect CGType!"); if( cgtype==-1 ) { cgtype = 1; } state.cgtype = cgtype; }
/************************************************************************* Internal initialization subroutine -- ALGLIB -- Copyright 16.05.2011 by Bochkanov Sergey *************************************************************************/ private static void mincginitinternal(int n, double diffstep, mincgstate state) { int i = 0; // // Initialize // state.teststep = 0; state.n = n; state.diffstep = diffstep; state.lastgoodstep = 0; mincgsetcond(state, 0, 0, 0, 0); mincgsetxrep(state, false); mincgsetdrep(state, false); mincgsetstpmax(state, 0); mincgsetcgtype(state, -1); mincgsetprecdefault(state); state.xk = new double[n]; state.dk = new double[n]; state.xn = new double[n]; state.dn = new double[n]; state.x = new double[n]; state.d = new double[n]; state.g = new double[n]; state.work0 = new double[n]; state.work1 = new double[n]; state.yk = new double[n]; state.s = new double[n]; for(i=0; i<=n-1; i++) { state.s[i] = 1.0; } }
/************************************************************************* This function sets maximum step length INPUT PARAMETERS: State - structure which stores algorithm state StpMax - maximum step length, >=0. Set StpMax to 0.0, if you don't want to limit step length. Use this subroutine when you optimize target function which contains exp() or other fast growing functions, and optimization algorithm makes too large steps which leads to overflow. This function allows us to reject steps that are too large (and therefore expose us to the possible overflow) without actually calculating function value at the x+stp*d. -- ALGLIB -- Copyright 02.04.2010 by Bochkanov Sergey *************************************************************************/ public static void mincgsetstpmax(mincgstate state, double stpmax) { alglib.ap.assert(math.isfinite(stpmax), "MinCGSetStpMax: StpMax is not finite!"); alglib.ap.assert((double)(stpmax)>=(double)(0), "MinCGSetStpMax: StpMax<0!"); state.stpmax = stpmax; }