/************************************************************************* * Calculate test function #2 * * Simple variation of #1, much more nonlinear, which makes unlikely premature * convergence of algorithm . *************************************************************************/ private static void testfunc2(ref minlbfgs.minlbfgsstate state) { if ((double)(state.x[0]) < (double)(100)) { state.f = AP.Math.Sqr(Math.Exp(state.x[0]) - 2) + AP.Math.Sqr(AP.Math.Sqr(state.x[1])) + AP.Math.Sqr(state.x[2] - state.x[0]); state.g[0] = 2 * (Math.Exp(state.x[0]) - 2) * Math.Exp(state.x[0]) + 2 * (state.x[0] - state.x[2]); state.g[1] = 4 * state.x[1] * AP.Math.Sqr(state.x[1]); state.g[2] = 2 * (state.x[2] - state.x[0]); } else { state.f = Math.Sqrt(AP.Math.MaxRealNumber); state.g[0] = Math.Sqrt(AP.Math.MaxRealNumber); state.g[1] = 0; state.g[2] = 0; } }
/************************************************************************* Neural network training using modified Levenberg-Marquardt with exact Hessian calculation and regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for small and medium scale problems (hundreds of weights). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, ref int info, ref mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0,0]; double[,] hmod = new double[0,0]; double[,] z = new double[0,0]; bool spd = new bool(); double nu = 0; double lambda = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if( npoints<=0 | restarts<1 ) { info = -1; return; } if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 | (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; hmod = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wt = new double[wcount-1+1]; wx = new double[wcount-1+1]; ebest = AP.Math.MaxRealNumber; // // Multiple passes // for(pass=1; pass<=restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(ref network); // // First stage of the hybrid algorithm: LBFGS // for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), ref wbase, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, Math.Max(25, wcount)); while( minlbfgs.minlbfgsiteration(ref state) ) { // // gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; lambda = 0.001; nu = 2; while( true ) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for(i=0; i<=wcount-1; i++) { for(i_=0; i_<=wcount-1;i_++) { hmod[i,i_] = h[i,i_]; } hmod[i,i] = hmod[i,i]+lambda; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky+1; if( !spd ) { lambda = lambda*lambdaup*nu; nu = nu*2; continue; } densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir); if( solverinfo<0 ) { lambda = lambda*lambdaup*nu; nu = nu*2; continue; } for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for(i_=0; i_<=wcount-1;i_++) { xnorm2 += network.weights[i_]*network.weights[i_]; } stepnorm = 0.0; for(i_=0; i_<=wcount-1;i_++) { stepnorm += wdir[i_]*wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5*decay*xnorm2; if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) ) { break; } if( (double)(enew)>(double)(e) ) { lambda = lambda*lambdaup*nu; nu = nu*2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, ref invrep); if( invinfo<=0 ) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } for(i=0; i<=wcount-1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, ref wt, 1, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 5); while( minlbfgs.minlbfgsiteration(ref state) ) { // // gradient // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += state.x[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g); for(i=0; i<=wcount-1; i++) { state.g[i] = 0; } for(i=0; i<=wcount-1; i++) { v = g[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i=0; i<=wcount-1; i++) { v = decay*network.weights[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(ref state, ref wt, ref internalrep); // // Accept new position. // Calculate Hessian // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += wt[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Update lambda // lambda = lambda*lambdadown; nu = 2; } // // update WBest // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = 0.5*decay*v+mlpbase.mlperror(ref network, ref xy, npoints); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* Neural network training using early stopping (base algorithm - L-BFGS with regularization). INPUT PARAMETERS: Network - neural network with initialized geometry TrnXY - training set TrnSize - training set size ValXY - validation set ValSize - validation set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1, ...). * 2, task has been solved, stopping criterion met - sufficiently small step size. Not expected (we use EARLY stopping) but possible and not an error. * 6, task has been solved, stopping criterion met - increasing of validation set error. Rep - training report NOTE: Algorithm stops if validation set error increases for a long enough or step size is small enought (there are task where validation set may decrease for eternity). In any case solution returned corresponds to the minimum of validation set error. -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptraines(ref mlpbase.multilayerperceptron network, ref double[,] trnxy, int trnsize, ref double[,] valxy, int valsize, double decay, int restarts, ref int info, ref mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; double[] wfinal = new double[0]; double efinal = 0; int itbest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double wstep = 0; int i_ = 0; wstep = 0.001; // // Test inputs, parse flags, read network geometry // if( trnsize<=0 | valsize<=0 | restarts<1 | (double)(decay)<(double)(0) ) { info = -1; return; } mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=trnsize-1; i++) { if( (int)Math.Round(trnxy[i,nin])<0 | (int)Math.Round(trnxy[i,nin])>=nout ) { info = -2; return; } } for(i=0; i<=valsize-1; i++) { if( (int)Math.Round(valxy[i,nin])<0 | (int)Math.Round(valxy[i,nin])>=nout ) { info = -2; return; } } } info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wfinal = new double[wcount-1+1]; efinal = AP.Math.MaxRealNumber; for(i=0; i<=wcount-1; i++) { wfinal[i] = 0; } // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // mlpbase.mlprandomize(ref network); ebest = mlpbase.mlperror(ref network, ref valxy, valsize); for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = 0; for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state); minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, 0); minlbfgs.minlbfgssetxrep(ref state, true); while( minlbfgs.minlbfgsiteration(ref state) ) { // // Calculate gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; // // Validation set // if( state.xupdated ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = w[i_]; } e = mlpbase.mlperror(ref network, ref valxy, valsize); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = internalrep.iterationscount; } if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) ) { info = 6; break; } } } minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep); // // Compare with final answer // if( (double)(ebest)<(double)(efinal) ) { for(i_=0; i_<=wcount-1;i_++) { wfinal[i_] = wbest[i_]; } efinal = ebest; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wfinal[i_]; } }
/************************************************************************* Neural network training using L-BFGS algorithm with regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for problems of any dimensionality (memory requirements and step complexity are linear by weights number). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, ref int info, ref mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); int i_ = 0; // // Test inputs, parse flags, read network geometry // if( (double)(wstep)==(double)(0) & maxits==0 ) { info = -8; return; } if( npoints<=0 | restarts<1 | (double)(wstep)<(double)(0) | maxits<0 ) { info = -1; return; } mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 | (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; ebest = AP.Math.MaxRealNumber; // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // mlpbase.mlprandomize(ref network); for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state); minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, maxits); while( minlbfgs.minlbfgsiteration(ref state) ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = w[i_]; } // // Compare with best // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5*decay*v; if( (double)(e)<(double)(ebest) ) { for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } ebest = e; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* * Neural network training using modified Levenberg-Marquardt with exact * Hessian calculation and regularization. Subroutine trains neural network * with restarts from random positions. Algorithm is well suited for small * and medium scale problems (hundreds of weights). * * INPUT PARAMETERS: * Network - neural network with initialized geometry * XY - training set * NPoints - training set size * Decay - weight decay constant, >=0.001 * Decay term 'Decay*||Weights||^2' is added to error * function. * If you don't know what Decay to choose, use 0.001. * Restarts - number of restarts from random position, >0. * If you don't know what Restarts to choose, use 2. * * OUTPUT PARAMETERS: * Network - trained neural network. * Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number * outside of [0..NOut-1]. * -1, if wrong parameters specified * (NPoints<0, Restarts<1). * 2, if task has been solved. * Rep - training report * * -- ALGLIB -- * Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, ref int info, ref mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0, 0]; double[,] hmod = new double[0, 0]; double[,] z = new double[0, 0]; bool spd = new bool(); double nu = 0; double lambda = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if (npoints <= 0 | restarts < 1) { info = -1; return; } if (mlpbase.mlpissoftmax(ref network)) { for (i = 0; i <= npoints - 1; i++) { if ((int)Math.Round(xy[i, nin]) < 0 | (int)Math.Round(xy[i, nin]) >= nout) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); g = new double[wcount - 1 + 1]; h = new double[wcount - 1 + 1, wcount - 1 + 1]; hmod = new double[wcount - 1 + 1, wcount - 1 + 1]; wbase = new double[wcount - 1 + 1]; wdir = new double[wcount - 1 + 1]; wbest = new double[wcount - 1 + 1]; wt = new double[wcount - 1 + 1]; wx = new double[wcount - 1 + 1]; ebest = AP.Math.MaxRealNumber; // // Multiple passes // for (pass = 1; pass <= restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(ref network); // // First stage of the hybrid algorithm: LBFGS // for (i_ = 0; i_ <= wcount - 1; i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), ref wbase, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, Math.Max(25, wcount)); while (minlbfgs.minlbfgsiteration(ref state)) { // // gradient // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } state.f = state.f + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + decay * network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad + 1; } minlbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep); for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = e + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { g[i_] = g[i_] + decay * network.weights[i_]; } for (k = 0; k <= wcount - 1; k++) { h[k, k] = h[k, k] + decay; } rep.nhess = rep.nhess + 1; lambda = 0.001; nu = 2; while (true) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for (i = 0; i <= wcount - 1; i++) { for (i_ = 0; i_ <= wcount - 1; i_++) { hmod[i, i_] = h[i, i_]; } hmod[i, i] = hmod[i, i] + lambda; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky + 1; if (!spd) { lambda = lambda * lambdaup * nu; nu = nu * 2; continue; } densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir); if (solverinfo < 0) { lambda = lambda * lambdaup * nu; nu = nu * 2; continue; } for (i_ = 0; i_ <= wcount - 1; i_++) { wdir[i_] = -1 * wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { xnorm2 += network.weights[i_] * network.weights[i_]; } stepnorm = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { stepnorm += wdir[i_] * wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(ref network, ref xy, npoints) + 0.5 * decay * xnorm2; if ((double)(stepnorm) < (double)(lmsteptol * (1 + Math.Sqrt(xnorm2)))) { break; } if ((double)(enew) > (double)(e)) { lambda = lambda * lambdaup * nu; nu = nu * 2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, ref invrep); if (invinfo <= 0) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for (i_ = 0; i_ <= wcount - 1; i_++) { wbase[i_] = network.weights[i_]; } for (i = 0; i <= wcount - 1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, ref wt, 1, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 5); while (minlbfgs.minlbfgsiteration(ref state)) { // // gradient // for (i = 0; i <= wcount - 1; i++) { v = 0.0; for (i_ = i; i_ <= wcount - 1; i_++) { v += state.x[i_] * hmod[i, i_]; } network.weights[i] = wbase[i] + v; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g); for (i = 0; i <= wcount - 1; i++) { state.g[i] = 0; } for (i = 0; i <= wcount - 1; i++) { v = g[i]; for (i_ = i; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + v * hmod[i, i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } state.f = state.f + 0.5 * decay * v; for (i = 0; i <= wcount - 1; i++) { v = decay * network.weights[i]; for (i_ = i; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + v * hmod[i, i_]; } } // // next iteration // rep.ngrad = rep.ngrad + 1; } minlbfgs.minlbfgsresults(ref state, ref wt, ref internalrep); // // Accept new position. // Calculate Hessian // for (i = 0; i <= wcount - 1; i++) { v = 0.0; for (i_ = i; i_ <= wcount - 1; i_++) { v += wt[i_] * hmod[i, i_]; } network.weights[i] = wbase[i] + v; } mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = e + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { g[i_] = g[i_] + decay * network.weights[i_]; } for (k = 0; k <= wcount - 1; k++) { h[k, k] = h[k, k] + decay; } rep.nhess = rep.nhess + 1; // // Update lambda // lambda = lambda * lambdadown; nu = 2; } // // update WBest // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = 0.5 * decay * v + mlpbase.mlperror(ref network, ref xy, npoints); if ((double)(e) < (double)(ebest)) { ebest = e; for (i_ = 0; i_ <= wcount - 1; i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* * Neural network training using early stopping (base algorithm - L-BFGS with * regularization). * * INPUT PARAMETERS: * Network - neural network with initialized geometry * TrnXY - training set * TrnSize - training set size * ValXY - validation set * ValSize - validation set size * Decay - weight decay constant, >=0.001 * Decay term 'Decay*||Weights||^2' is added to error * function. * If you don't know what Decay to choose, use 0.001. * Restarts - number of restarts from random position, >0. * If you don't know what Restarts to choose, use 2. * * OUTPUT PARAMETERS: * Network - trained neural network. * Info - return code: * -2, if there is a point with class number * outside of [0..NOut-1]. * -1, if wrong parameters specified * (NPoints<0, Restarts<1, ...). * 2, task has been solved, stopping criterion met - * sufficiently small step size. Not expected (we * use EARLY stopping) but possible and not an * error. * 6, task has been solved, stopping criterion met - * increasing of validation set error. * Rep - training report * * NOTE: * * Algorithm stops if validation set error increases for a long enough or * step size is small enought (there are task where validation set may * decrease for eternity). In any case solution returned corresponds to the * minimum of validation set error. * * -- ALGLIB -- * Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptraines(ref mlpbase.multilayerperceptron network, ref double[,] trnxy, int trnsize, ref double[,] valxy, int valsize, double decay, int restarts, ref int info, ref mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; double[] wfinal = new double[0]; double efinal = 0; int itbest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double wstep = 0; int i_ = 0; wstep = 0.001; // // Test inputs, parse flags, read network geometry // if (trnsize <= 0 | valsize <= 0 | restarts < 1 | (double)(decay) < (double)(0)) { info = -1; return; } mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); if (mlpbase.mlpissoftmax(ref network)) { for (i = 0; i <= trnsize - 1; i++) { if ((int)Math.Round(trnxy[i, nin]) < 0 | (int)Math.Round(trnxy[i, nin]) >= nout) { info = -2; return; } } for (i = 0; i <= valsize - 1; i++) { if ((int)Math.Round(valxy[i, nin]) < 0 | (int)Math.Round(valxy[i, nin]) >= nout) { info = -2; return; } } } info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize); w = new double[wcount - 1 + 1]; wbest = new double[wcount - 1 + 1]; wfinal = new double[wcount - 1 + 1]; efinal = AP.Math.MaxRealNumber; for (i = 0; i <= wcount - 1; i++) { wfinal[i] = 0; } // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for (pass = 1; pass <= restarts; pass++) { // // Process // mlpbase.mlprandomize(ref network); ebest = mlpbase.mlperror(ref network, ref valxy, valsize); for (i_ = 0; i_ <= wcount - 1; i_++) { wbest[i_] = network.weights[i_]; } itbest = 0; for (i_ = 0; i_ <= wcount - 1; i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state); minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, 0); minlbfgs.minlbfgssetxrep(ref state, true); while (minlbfgs.minlbfgsiteration(ref state)) { // // Calculate gradient // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g); v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } state.f = state.f + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + decay * network.weights[i_]; } rep.ngrad = rep.ngrad + 1; // // Validation set // if (state.xupdated) { for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = w[i_]; } e = mlpbase.mlperror(ref network, ref valxy, valsize); if ((double)(e) < (double)(ebest)) { ebest = e; for (i_ = 0; i_ <= wcount - 1; i_++) { wbest[i_] = network.weights[i_]; } itbest = internalrep.iterationscount; } if (internalrep.iterationscount > 30 & (double)(internalrep.iterationscount) > (double)(1.5 * itbest)) { info = 6; break; } } } minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep); // // Compare with final answer // if ((double)(ebest) < (double)(efinal)) { for (i_ = 0; i_ <= wcount - 1; i_++) { wfinal[i_] = wbest[i_]; } efinal = ebest; } } // // The best network // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = wfinal[i_]; } }
/************************************************************************* * Neural network training using L-BFGS algorithm with regularization. * Subroutine trains neural network with restarts from random positions. * Algorithm is well suited for problems of any dimensionality (memory * requirements and step complexity are linear by weights number). * * INPUT PARAMETERS: * Network - neural network with initialized geometry * XY - training set * NPoints - training set size * Decay - weight decay constant, >=0.001 * Decay term 'Decay*||Weights||^2' is added to error * function. * If you don't know what Decay to choose, use 0.001. * Restarts - number of restarts from random position, >0. * If you don't know what Restarts to choose, use 2. * WStep - stopping criterion. Algorithm stops if step size is * less than WStep. Recommended value - 0.01. Zero step * size means stopping after MaxIts iterations. * MaxIts - stopping criterion. Algorithm stops after MaxIts * iterations (NOT gradient calculations). Zero MaxIts * means stopping when step is sufficiently small. * * OUTPUT PARAMETERS: * Network - trained neural network. * Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number * outside of [0..NOut-1]. * -1, if wrong parameters specified * (NPoints<0, Restarts<1). * 2, if task has been solved. * Rep - training report * * -- ALGLIB -- * Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, ref int info, ref mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); int i_ = 0; // // Test inputs, parse flags, read network geometry // if ((double)(wstep) == (double)(0) & maxits == 0) { info = -8; return; } if (npoints <= 0 | restarts < 1 | (double)(wstep) < (double)(0) | maxits < 0) { info = -1; return; } mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); if (mlpbase.mlpissoftmax(ref network)) { for (i = 0; i <= npoints - 1; i++) { if ((int)Math.Round(xy[i, nin]) < 0 | (int)Math.Round(xy[i, nin]) >= nout) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); w = new double[wcount - 1 + 1]; wbest = new double[wcount - 1 + 1]; ebest = AP.Math.MaxRealNumber; // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for (pass = 1; pass <= restarts; pass++) { // // Process // mlpbase.mlprandomize(ref network); for (i_ = 0; i_ <= wcount - 1; i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state); minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, maxits); while (minlbfgs.minlbfgsiteration(ref state)) { for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g); v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } state.f = state.f + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + decay * network.weights[i_]; } rep.ngrad = rep.ngrad + 1; } minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep); for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = w[i_]; } // // Compare with best // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = mlpbase.mlperrorn(ref network, ref xy, npoints) + 0.5 * decay * v; if ((double)(e) < (double)(ebest)) { for (i_ = 0; i_ <= wcount - 1; i_++) { wbest[i_] = network.weights[i_]; } ebest = e; } } // // The best network // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = wbest[i_]; } }
public static bool testminlbfgs(bool silent) { bool result = new bool(); bool waserrors = new bool(); bool referror = new bool(); bool nonconverror = new bool(); bool eqerror = new bool(); bool converror = new bool(); bool crashtest = new bool(); bool othererrors = new bool(); int n = 0; int m = 0; double[] x = new double[0]; double[] xe = new double[0]; double[] b = new double[0]; double[] xlast = new double[0]; int i = 0; int j = 0; double v = 0; double[,] a = new double[0, 0]; int maxits = 0; minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); minlbfgs.minlbfgsreport rep = new minlbfgs.minlbfgsreport(); double fprev = 0; double xprev = 0; double stpmax = 0; int i_ = 0; waserrors = false; // // Reference problem // x = new double[2 + 1]; n = 3; m = 2; x[0] = 100 * AP.Math.RandomReal() - 50; x[1] = 100 * AP.Math.RandomReal() - 50; x[2] = 100 * AP.Math.RandomReal() - 50; minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 0); while (minlbfgs.minlbfgsiteration(ref state)) { state.f = AP.Math.Sqr(state.x[0] - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]); state.g[0] = 2 * (state.x[0] - 2) + 2 * (state.x[0] - state.x[2]); state.g[1] = 2 * state.x[1]; state.g[2] = 2 * (state.x[2] - state.x[0]); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); referror = rep.terminationtype <= 0 | (double)(Math.Abs(x[0] - 2)) > (double)(0.001) | (double)(Math.Abs(x[1])) > (double)(0.001) | (double)(Math.Abs(x[2] - 2)) > (double)(0.001); // // nonconvex problems with hard relief: we start from point with very small // gradient, but we need ever smaller gradient in the next step due to // Wolfe conditions. // nonconverror = false; x = new double[1]; n = 1; m = 1; v = -100; while ((double)(v) < (double)(0.1)) { x[0] = v; minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 1.0E-9, 0, 0, 0); while (minlbfgs.minlbfgsiteration(ref state)) { state.f = AP.Math.Sqr(state.x[0]) / (1 + AP.Math.Sqr(state.x[0])); state.g[0] = (2 * state.x[0] * (1 + AP.Math.Sqr(state.x[0])) - AP.Math.Sqr(state.x[0]) * 2 * state.x[0]) / AP.Math.Sqr(1 + AP.Math.Sqr(state.x[0])); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); nonconverror = nonconverror | rep.terminationtype <= 0 | (double)(Math.Abs(x[0])) > (double)(0.001); v = v + 0.1; } // // Linear equations // eqerror = false; for (n = 1; n <= 10; n++) { // // Prepare task // a = new double[n - 1 + 1, n - 1 + 1]; x = new double[n - 1 + 1]; xe = new double[n - 1 + 1]; b = new double[n - 1 + 1]; for (i = 0; i <= n - 1; i++) { xe[i] = 2 * AP.Math.RandomReal() - 1; } for (i = 0; i <= n - 1; i++) { for (j = 0; j <= n - 1; j++) { a[i, j] = 2 * AP.Math.RandomReal() - 1; } a[i, i] = a[i, i] + 3 * Math.Sign(a[i, i]); } for (i = 0; i <= n - 1; i++) { v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += a[i, i_] * xe[i_]; } b[i] = v; } // // Test different M // for (m = 1; m <= n; m++) { // // Solve task // for (i = 0; i <= n - 1; i++) { x[i] = 2 * AP.Math.RandomReal() - 1; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 0); while (minlbfgs.minlbfgsiteration(ref state)) { state.f = 0; for (i = 0; i <= n - 1; i++) { state.g[i] = 0; } for (i = 0; i <= n - 1; i++) { v = 0.0; for (i_ = 0; i_ <= n - 1; i_++) { v += a[i, i_] * state.x[i_]; } state.f = state.f + AP.Math.Sqr(v - b[i]); for (j = 0; j <= n - 1; j++) { state.g[j] = state.g[j] + 2 * (v - b[i]) * a[i, j]; } } } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); eqerror = eqerror | rep.terminationtype <= 0; for (i = 0; i <= n - 1; i++) { eqerror = eqerror | (double)(Math.Abs(x[i] - xe[i])) > (double)(0.001); } } } // // Testing convergence properties // converror = false; x = new double[2 + 1]; n = 3; m = 2; for (i = 0; i <= 2; i++) { x[i] = 6 * AP.Math.RandomReal() - 3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0.001, 0, 0, 0); while (minlbfgs.minlbfgsiteration(ref state)) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype != 4; for (i = 0; i <= 2; i++) { x[i] = 6 * AP.Math.RandomReal() - 3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0.001, 0, 0); while (minlbfgs.minlbfgsiteration(ref state)) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype != 1; for (i = 0; i <= 2; i++) { x[i] = 6 * AP.Math.RandomReal() - 3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0.001, 0); while (minlbfgs.minlbfgsiteration(ref state)) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype != 2; for (i = 0; i <= 2; i++) { x[i] = 2 * AP.Math.RandomReal() - 1; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 10); while (minlbfgs.minlbfgsiteration(ref state)) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype != 5 | rep.iterationscount != 10; // // Crash test: too many iterations on a simple tasks // May fail when encounter zero step, underflow or something like that // crashtest = false; x = new double[2 + 1]; n = 3; m = 2; maxits = 10000; for (i = 0; i <= 2; i++) { x[i] = 6 * AP.Math.RandomReal() - 3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, maxits); while (minlbfgs.minlbfgsiteration(ref state)) { state.f = AP.Math.Sqr(Math.Exp(state.x[0]) - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]); state.g[0] = 2 * (Math.Exp(state.x[0]) - 2) * Math.Exp(state.x[0]) + 2 * (state.x[0] - state.x[2]); state.g[1] = 2 * state.x[1]; state.g[2] = 2 * (state.x[2] - state.x[0]); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); crashtest = crashtest | rep.terminationtype <= 0; // // Other properties: // 1. test reports (F should form monotone sequence) // 2. test maximum step // othererrors = false; n = 50; m = 2; x = new double[n]; xlast = new double[n]; for (i = 0; i <= n - 1; i++) { x[i] = 1; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 100); minlbfgs.minlbfgssetxrep(ref state, true); fprev = AP.Math.MaxRealNumber; while (minlbfgs.minlbfgsiteration(ref state)) { if (state.needfg) { state.f = 0; for (i = 0; i <= n - 1; i++) { state.f = state.f + AP.Math.Sqr((1 + i) * state.x[i]); state.g[i] = 2 * (1 + i) * state.x[i]; } } if (state.xupdated) { othererrors = othererrors | (double)(state.f) > (double)(fprev); if ((double)(fprev) == (double)(AP.Math.MaxRealNumber)) { for (i = 0; i <= n - 1; i++) { othererrors = othererrors | (double)(state.x[i]) != (double)(x[i]); } } fprev = state.f; for (i_ = 0; i_ <= n - 1; i_++) { xlast[i_] = state.x[i_]; } } } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); for (i = 0; i <= n - 1; i++) { othererrors = othererrors | (double)(x[i]) != (double)(xlast[i]); } n = 1; m = 1; x = new double[n]; x[0] = 100; stpmax = 0.05 + 0.05 * AP.Math.RandomReal(); minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 1.0E-9, 0, 0, 0); minlbfgs.minlbfgssetstpmax(ref state, stpmax); minlbfgs.minlbfgssetxrep(ref state, true); xprev = x[0]; while (minlbfgs.minlbfgsiteration(ref state)) { if (state.needfg) { state.f = Math.Exp(state.x[0]) + Math.Exp(-state.x[0]); state.g[0] = Math.Exp(state.x[0]) - Math.Exp(-state.x[0]); othererrors = othererrors | (double)(Math.Abs(state.x[0] - xprev)) > (double)((1 + Math.Sqrt(AP.Math.MachineEpsilon)) * stpmax); } if (state.xupdated) { othererrors = othererrors | (double)(Math.Abs(state.x[0] - xprev)) > (double)((1 + Math.Sqrt(AP.Math.MachineEpsilon)) * stpmax); xprev = state.x[0]; } } // // end // waserrors = referror | nonconverror | eqerror | converror | crashtest | othererrors; if (!silent) { System.Console.Write("TESTING L-BFGS OPTIMIZATION"); System.Console.WriteLine(); System.Console.Write("REFERENCE PROBLEM: "); if (referror) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("NON-CONVEX PROBLEM: "); if (nonconverror) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("LINEAR EQUATIONS: "); if (eqerror) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("CONVERGENCE PROPERTIES: "); if (converror) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("CRASH TEST: "); if (crashtest) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("OTHER PROPERTIES: "); if (othererrors) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } if (waserrors) { System.Console.Write("TEST FAILED"); System.Console.WriteLine(); } else { System.Console.Write("TEST PASSED"); System.Console.WriteLine(); } System.Console.WriteLine(); System.Console.WriteLine(); } result = !waserrors; return(result); }
public static bool testminlbfgs(bool silent) { bool result = new bool(); bool waserrors = new bool(); bool referror = new bool(); bool nonconverror = new bool(); bool eqerror = new bool(); bool converror = new bool(); bool crashtest = new bool(); bool othererrors = new bool(); int n = 0; int m = 0; double[] x = new double[0]; double[] xe = new double[0]; double[] b = new double[0]; double[] xlast = new double[0]; int i = 0; int j = 0; double v = 0; double[,] a = new double[0,0]; int maxits = 0; minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); minlbfgs.minlbfgsreport rep = new minlbfgs.minlbfgsreport(); double fprev = 0; double xprev = 0; double stpmax = 0; int i_ = 0; waserrors = false; // // Reference problem // x = new double[2+1]; n = 3; m = 2; x[0] = 100*AP.Math.RandomReal()-50; x[1] = 100*AP.Math.RandomReal()-50; x[2] = 100*AP.Math.RandomReal()-50; minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 0); while( minlbfgs.minlbfgsiteration(ref state) ) { state.f = AP.Math.Sqr(state.x[0]-2)+AP.Math.Sqr(state.x[1])+AP.Math.Sqr(state.x[2]-state.x[0]); state.g[0] = 2*(state.x[0]-2)+2*(state.x[0]-state.x[2]); state.g[1] = 2*state.x[1]; state.g[2] = 2*(state.x[2]-state.x[0]); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); referror = rep.terminationtype<=0 | (double)(Math.Abs(x[0]-2))>(double)(0.001) | (double)(Math.Abs(x[1]))>(double)(0.001) | (double)(Math.Abs(x[2]-2))>(double)(0.001); // // nonconvex problems with hard relief: we start from point with very small // gradient, but we need ever smaller gradient in the next step due to // Wolfe conditions. // nonconverror = false; x = new double[1]; n = 1; m = 1; v = -100; while( (double)(v)<(double)(0.1) ) { x[0] = v; minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 1.0E-9, 0, 0, 0); while( minlbfgs.minlbfgsiteration(ref state) ) { state.f = AP.Math.Sqr(state.x[0])/(1+AP.Math.Sqr(state.x[0])); state.g[0] = (2*state.x[0]*(1+AP.Math.Sqr(state.x[0]))-AP.Math.Sqr(state.x[0])*2*state.x[0])/AP.Math.Sqr(1+AP.Math.Sqr(state.x[0])); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); nonconverror = nonconverror | rep.terminationtype<=0 | (double)(Math.Abs(x[0]))>(double)(0.001); v = v+0.1; } // // Linear equations // eqerror = false; for(n=1; n<=10; n++) { // // Prepare task // a = new double[n-1+1, n-1+1]; x = new double[n-1+1]; xe = new double[n-1+1]; b = new double[n-1+1]; for(i=0; i<=n-1; i++) { xe[i] = 2*AP.Math.RandomReal()-1; } for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 2*AP.Math.RandomReal()-1; } a[i,i] = a[i,i]+3*Math.Sign(a[i,i]); } for(i=0; i<=n-1; i++) { v = 0.0; for(i_=0; i_<=n-1;i_++) { v += a[i,i_]*xe[i_]; } b[i] = v; } // // Test different M // for(m=1; m<=n; m++) { // // Solve task // for(i=0; i<=n-1; i++) { x[i] = 2*AP.Math.RandomReal()-1; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 0); while( minlbfgs.minlbfgsiteration(ref state) ) { state.f = 0; for(i=0; i<=n-1; i++) { state.g[i] = 0; } for(i=0; i<=n-1; i++) { v = 0.0; for(i_=0; i_<=n-1;i_++) { v += a[i,i_]*state.x[i_]; } state.f = state.f+AP.Math.Sqr(v-b[i]); for(j=0; j<=n-1; j++) { state.g[j] = state.g[j]+2*(v-b[i])*a[i,j]; } } } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); eqerror = eqerror | rep.terminationtype<=0; for(i=0; i<=n-1; i++) { eqerror = eqerror | (double)(Math.Abs(x[i]-xe[i]))>(double)(0.001); } } } // // Testing convergence properties // converror = false; x = new double[2+1]; n = 3; m = 2; for(i=0; i<=2; i++) { x[i] = 6*AP.Math.RandomReal()-3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0.001, 0, 0, 0); while( minlbfgs.minlbfgsiteration(ref state) ) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype!=4; for(i=0; i<=2; i++) { x[i] = 6*AP.Math.RandomReal()-3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0.001, 0, 0); while( minlbfgs.minlbfgsiteration(ref state) ) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype!=1; for(i=0; i<=2; i++) { x[i] = 6*AP.Math.RandomReal()-3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0.001, 0); while( minlbfgs.minlbfgsiteration(ref state) ) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype!=2; for(i=0; i<=2; i++) { x[i] = 2*AP.Math.RandomReal()-1; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 10); while( minlbfgs.minlbfgsiteration(ref state) ) { testfunc3(ref state); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); converror = converror | rep.terminationtype!=5 | rep.iterationscount!=10; // // Crash test: too many iterations on a simple tasks // May fail when encounter zero step, underflow or something like that // crashtest = false; x = new double[2+1]; n = 3; m = 2; maxits = 10000; for(i=0; i<=2; i++) { x[i] = 6*AP.Math.RandomReal()-3; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, maxits); while( minlbfgs.minlbfgsiteration(ref state) ) { state.f = AP.Math.Sqr(Math.Exp(state.x[0])-2)+AP.Math.Sqr(state.x[1])+AP.Math.Sqr(state.x[2]-state.x[0]); state.g[0] = 2*(Math.Exp(state.x[0])-2)*Math.Exp(state.x[0])+2*(state.x[0]-state.x[2]); state.g[1] = 2*state.x[1]; state.g[2] = 2*(state.x[2]-state.x[0]); } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); crashtest = crashtest | rep.terminationtype<=0; // // Other properties: // 1. test reports (F should form monotone sequence) // 2. test maximum step // othererrors = false; n = 50; m = 2; x = new double[n]; xlast = new double[n]; for(i=0; i<=n-1; i++) { x[i] = 1; } minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 100); minlbfgs.minlbfgssetxrep(ref state, true); fprev = AP.Math.MaxRealNumber; while( minlbfgs.minlbfgsiteration(ref state) ) { if( state.needfg ) { state.f = 0; for(i=0; i<=n-1; i++) { state.f = state.f+AP.Math.Sqr((1+i)*state.x[i]); state.g[i] = 2*(1+i)*state.x[i]; } } if( state.xupdated ) { othererrors = othererrors | (double)(state.f)>(double)(fprev); if( (double)(fprev)==(double)(AP.Math.MaxRealNumber) ) { for(i=0; i<=n-1; i++) { othererrors = othererrors | (double)(state.x[i])!=(double)(x[i]); } } fprev = state.f; for(i_=0; i_<=n-1;i_++) { xlast[i_] = state.x[i_]; } } } minlbfgs.minlbfgsresults(ref state, ref x, ref rep); for(i=0; i<=n-1; i++) { othererrors = othererrors | (double)(x[i])!=(double)(xlast[i]); } n = 1; m = 1; x = new double[n]; x[0] = 100; stpmax = 0.05+0.05*AP.Math.RandomReal(); minlbfgs.minlbfgscreate(n, m, ref x, ref state); minlbfgs.minlbfgssetcond(ref state, 1.0E-9, 0, 0, 0); minlbfgs.minlbfgssetstpmax(ref state, stpmax); minlbfgs.minlbfgssetxrep(ref state, true); xprev = x[0]; while( minlbfgs.minlbfgsiteration(ref state) ) { if( state.needfg ) { state.f = Math.Exp(state.x[0])+Math.Exp(-state.x[0]); state.g[0] = Math.Exp(state.x[0])-Math.Exp(-state.x[0]); othererrors = othererrors | (double)(Math.Abs(state.x[0]-xprev))>(double)((1+Math.Sqrt(AP.Math.MachineEpsilon))*stpmax); } if( state.xupdated ) { othererrors = othererrors | (double)(Math.Abs(state.x[0]-xprev))>(double)((1+Math.Sqrt(AP.Math.MachineEpsilon))*stpmax); xprev = state.x[0]; } } // // end // waserrors = referror | nonconverror | eqerror | converror | crashtest | othererrors; if( !silent ) { System.Console.Write("TESTING L-BFGS OPTIMIZATION"); System.Console.WriteLine(); System.Console.Write("REFERENCE PROBLEM: "); if( referror ) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("NON-CONVEX PROBLEM: "); if( nonconverror ) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("LINEAR EQUATIONS: "); if( eqerror ) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("CONVERGENCE PROPERTIES: "); if( converror ) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("CRASH TEST: "); if( crashtest ) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } System.Console.Write("OTHER PROPERTIES: "); if( othererrors ) { System.Console.Write("FAILED"); System.Console.WriteLine(); } else { System.Console.Write("OK"); System.Console.WriteLine(); } if( waserrors ) { System.Console.Write("TEST FAILED"); System.Console.WriteLine(); } else { System.Console.Write("TEST PASSED"); System.Console.WriteLine(); } System.Console.WriteLine(); System.Console.WriteLine(); } result = !waserrors; return result; }