/************************************************************************* Internal cross-validation subroutine *************************************************************************/ private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n, ref double[,] xy, int npoints, double decay, int restarts, int foldscount, bool lmalgorithm, double wstep, int maxits, ref int info, ref mlpreport rep, ref mlpcvreport cvrep) { int i = 0; int fold = 0; int j = 0; int k = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int rowlen = 0; int wcount = 0; int nclasses = 0; int tssize = 0; int cvssize = 0; double[,] cvset = new double[0,0]; double[,] testset = new double[0,0]; int[] folds = new int[0]; int relcnt = 0; mlpreport internalrep = new mlpreport(); double[] x = new double[0]; double[] y = new double[0]; int i_ = 0; // // Read network geometry, test parameters // mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(ref n) ) { nclasses = nout; rowlen = nin+1; } else { nclasses = -nout; rowlen = nin+nout; } if( npoints<=0 | foldscount<2 | foldscount>npoints ) { info = -1; return; } mlpbase.mlpcopy(ref n, ref network); // // K-fold out cross-validation. // First, estimate generalization error // testset = new double[npoints-1+1, rowlen-1+1]; cvset = new double[npoints-1+1, rowlen-1+1]; x = new double[nin-1+1]; y = new double[nout-1+1]; mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds); cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; relcnt = 0; for(fold=0; fold<=foldscount-1; fold++) { // // Separate set // tssize = 0; cvssize = 0; for(i=0; i<=npoints-1; i++) { if( folds[i]==fold ) { for(i_=0; i_<=rowlen-1;i_++) { testset[tssize,i_] = xy[i,i_]; } tssize = tssize+1; } else { for(i_=0; i_<=rowlen-1;i_++) { cvset[cvssize,i_] = xy[i,i_]; } cvssize = cvssize+1; } } // // Train on CV training set // if( lmalgorithm ) { mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep); } else { mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep); } if( info<0 ) { cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; return; } rep.ngrad = rep.ngrad+internalrep.ngrad; rep.nhess = rep.nhess+internalrep.nhess; rep.ncholesky = rep.ncholesky+internalrep.ncholesky; // // Estimate error using CV test set // if( mlpbase.mlpissoftmax(ref network) ) { // // classification-only code // cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize); cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize); } for(i=0; i<=tssize-1; i++) { for(i_=0; i_<=nin-1;i_++) { x[i_] = testset[i,i_]; } mlpbase.mlpprocess(ref network, ref x, ref y); if( mlpbase.mlpissoftmax(ref network) ) { // // Classification-specific code // k = (int)Math.Round(testset[i,nin]); for(j=0; j<=nout-1; j++) { if( j==k ) { cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1); cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1); relcnt = relcnt+1; } else { cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]); } } } else { // // Regression-specific code // for(j=0; j<=nout-1; j++) { cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]); if( (double)(testset[i,nin+j])!=(double)(0) ) { cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]); relcnt = relcnt+1; } } } } } if( mlpbase.mlpissoftmax(ref network) ) { cvrep.relclserror = cvrep.relclserror/npoints; cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints); } cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout)); cvrep.avgerror = cvrep.avgerror/(npoints*nout); cvrep.avgrelerror = cvrep.avgrelerror/relcnt; info = 1; }
/************************************************************************* Creates ensemble from network. Only network geometry is copied. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatefromnetwork(ref mlpbase.multilayerperceptron network, int ensemblesize, ref mlpensemble ensemble) { int i = 0; int ccount = 0; int i_ = 0; int i1_ = 0; System.Diagnostics.Debug.Assert(ensemblesize>0, "MLPECreate: incorrect ensemble size!"); // // network properties // mlpbase.mlpproperties(ref network, ref ensemble.nin, ref ensemble.nout, ref ensemble.wcount); if( mlpbase.mlpissoftmax(ref network) ) { ccount = ensemble.nin; } else { ccount = ensemble.nin+ensemble.nout; } ensemble.postprocessing = false; ensemble.issoftmax = mlpbase.mlpissoftmax(ref network); ensemble.ensemblesize = ensemblesize; // // structure information // ensemble.structinfo = new int[network.structinfo[0]-1+1]; for(i=0; i<=network.structinfo[0]-1; i++) { ensemble.structinfo[i] = network.structinfo[i]; } // // weights, means, sigmas // ensemble.weights = new double[ensemblesize*ensemble.wcount-1+1]; ensemble.columnmeans = new double[ensemblesize*ccount-1+1]; ensemble.columnsigmas = new double[ensemblesize*ccount-1+1]; for(i=0; i<=ensemblesize*ensemble.wcount-1; i++) { ensemble.weights[i] = AP.Math.RandomReal()-0.5; } for(i=0; i<=ensemblesize-1; i++) { i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnmeans[i_] = network.columnmeans[i_+i1_]; } i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnsigmas[i_] = network.columnsigmas[i_+i1_]; } } // // serialized part // mlpbase.mlpserialize(ref network, ref ensemble.serializedmlp, ref ensemble.serializedlen); // // temporaries, internal buffers // ensemble.tmpweights = new double[ensemble.wcount-1+1]; ensemble.tmpmeans = new double[ccount-1+1]; ensemble.tmpsigmas = new double[ccount-1+1]; ensemble.neurons = new double[ensemble.structinfo[mlpntotaloffset]-1+1]; ensemble.dfdnet = new double[ensemble.structinfo[mlpntotaloffset]-1+1]; ensemble.y = new double[ensemble.nout-1+1]; }
/************************************************************************* Cross-validation estimate of generalization error. Base algorithm - L-BFGS. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. WStep - stopping criterion, same as in MLPTrainLBFGS MaxIts - stopping criterion, same as in MLPTrainLBFGS FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlbfgs(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, int foldscount, ref int info, ref mlpreport rep, ref mlpcvreport cvrep) { mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, ref rep, ref cvrep); }
/************************************************************************* Cross-validation estimate of generalization error. Base algorithm - Levenberg-Marquardt. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlm(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, int foldscount, ref int info, ref mlpreport rep, ref mlpcvreport cvrep) { mlpkfoldcvgeneral(ref network, ref xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, ref rep, ref cvrep); }
/************************************************************************* Neural network training using modified Levenberg-Marquardt with exact Hessian calculation and regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for small and medium scale problems (hundreds of weights). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, ref int info, ref mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0,0]; double[,] hmod = new double[0,0]; double[,] z = new double[0,0]; bool spd = new bool(); double nu = 0; double lambda = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if( npoints<=0 | restarts<1 ) { info = -1; return; } if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 | (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; hmod = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wt = new double[wcount-1+1]; wx = new double[wcount-1+1]; ebest = AP.Math.MaxRealNumber; // // Multiple passes // for(pass=1; pass<=restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(ref network); // // First stage of the hybrid algorithm: LBFGS // for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), ref wbase, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, Math.Max(25, wcount)); while( minlbfgs.minlbfgsiteration(ref state) ) { // // gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; lambda = 0.001; nu = 2; while( true ) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for(i=0; i<=wcount-1; i++) { for(i_=0; i_<=wcount-1;i_++) { hmod[i,i_] = h[i,i_]; } hmod[i,i] = hmod[i,i]+lambda; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky+1; if( !spd ) { lambda = lambda*lambdaup*nu; nu = nu*2; continue; } densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir); if( solverinfo<0 ) { lambda = lambda*lambdaup*nu; nu = nu*2; continue; } for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for(i_=0; i_<=wcount-1;i_++) { xnorm2 += network.weights[i_]*network.weights[i_]; } stepnorm = 0.0; for(i_=0; i_<=wcount-1;i_++) { stepnorm += wdir[i_]*wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(ref network, ref xy, npoints)+0.5*decay*xnorm2; if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) ) { break; } if( (double)(enew)>(double)(e) ) { lambda = lambda*lambdaup*nu; nu = nu*2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, ref invrep); if( invinfo<=0 ) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } for(i=0; i<=wcount-1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, ref wt, 1, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 5); while( minlbfgs.minlbfgsiteration(ref state) ) { // // gradient // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += state.x[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g); for(i=0; i<=wcount-1; i++) { state.g[i] = 0; } for(i=0; i<=wcount-1; i++) { v = g[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i=0; i<=wcount-1; i++) { v = decay*network.weights[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(ref state, ref wt, ref internalrep); // // Accept new position. // Calculate Hessian // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += wt[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Update lambda // lambda = lambda*lambdadown; nu = 2; } // // update WBest // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = 0.5*decay*v+mlpbase.mlperror(ref network, ref xy, npoints); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* Neural network training using early stopping (base algorithm - L-BFGS with regularization). INPUT PARAMETERS: Network - neural network with initialized geometry TrnXY - training set TrnSize - training set size ValXY - validation set ValSize - validation set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1, ...). * 2, task has been solved, stopping criterion met - sufficiently small step size. Not expected (we use EARLY stopping) but possible and not an error. * 6, task has been solved, stopping criterion met - increasing of validation set error. Rep - training report NOTE: Algorithm stops if validation set error increases for a long enough or step size is small enought (there are task where validation set may decrease for eternity). In any case solution returned corresponds to the minimum of validation set error. -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptraines(ref mlpbase.multilayerperceptron network, ref double[,] trnxy, int trnsize, ref double[,] valxy, int valsize, double decay, int restarts, ref int info, ref mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; double[] wfinal = new double[0]; double efinal = 0; int itbest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double wstep = 0; int i_ = 0; wstep = 0.001; // // Test inputs, parse flags, read network geometry // if( trnsize<=0 | valsize<=0 | restarts<1 | (double)(decay)<(double)(0) ) { info = -1; return; } mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=trnsize-1; i++) { if( (int)Math.Round(trnxy[i,nin])<0 | (int)Math.Round(trnxy[i,nin])>=nout ) { info = -2; return; } } for(i=0; i<=valsize-1; i++) { if( (int)Math.Round(valxy[i,nin])<0 | (int)Math.Round(valxy[i,nin])>=nout ) { info = -2; return; } } } info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wfinal = new double[wcount-1+1]; efinal = AP.Math.MaxRealNumber; for(i=0; i<=wcount-1; i++) { wfinal[i] = 0; } // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // mlpbase.mlprandomize(ref network); ebest = mlpbase.mlperror(ref network, ref valxy, valsize); for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = 0; for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state); minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, 0); minlbfgs.minlbfgssetxrep(ref state, true); while( minlbfgs.minlbfgsiteration(ref state) ) { // // Calculate gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; // // Validation set // if( state.xupdated ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = w[i_]; } e = mlpbase.mlperror(ref network, ref valxy, valsize); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = internalrep.iterationscount; } if( internalrep.iterationscount>30 & (double)(internalrep.iterationscount)>(double)(1.5*itbest) ) { info = 6; break; } } } minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep); // // Compare with final answer // if( (double)(ebest)<(double)(efinal) ) { for(i_=0; i_<=wcount-1;i_++) { wfinal[i_] = wbest[i_]; } efinal = ebest; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wfinal[i_]; } }
/************************************************************************* Neural network training using L-BFGS algorithm with regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for problems of any dimensionality (memory requirements and step complexity are linear by weights number). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, ref int info, ref mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); int i_ = 0; // // Test inputs, parse flags, read network geometry // if( (double)(wstep)==(double)(0) & maxits==0 ) { info = -8; return; } if( npoints<=0 | restarts<1 | (double)(wstep)<(double)(0) | maxits<0 ) { info = -1; return; } mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 | (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; ebest = AP.Math.MaxRealNumber; // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // mlpbase.mlprandomize(ref network); for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), ref w, ref state); minlbfgs.minlbfgssetcond(ref state, 0.0, 0.0, wstep, maxits); while( minlbfgs.minlbfgsiteration(ref state) ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(ref state, ref w, ref internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = w[i_]; } // // Compare with best // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = mlpbase.mlperrorn(ref network, ref xy, npoints)+0.5*decay*v; if( (double)(e)<(double)(ebest) ) { for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } ebest = e; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* Unsets network (initialize it to smallest network possible *************************************************************************/ private static void unsetnetwork(ref mlpbase.multilayerperceptron network) { mlpbase.mlpcreate0(1, 1, ref network); }
/************************************************************************* Network creation *************************************************************************/ private static void createnetwork(ref mlpbase.multilayerperceptron network, int nkind, double a1, double a2, int nin, int nhid1, int nhid2, int nout) { System.Diagnostics.Debug.Assert(nin>0 & nhid1>=0 & nhid2>=0 & nout>0, "CreateNetwork error"); System.Diagnostics.Debug.Assert(nhid1!=0 | nhid2==0, "CreateNetwork error"); System.Diagnostics.Debug.Assert(nkind!=1 | nout>=2, "CreateNetwork error"); if( nhid1==0 ) { // // No hidden layers // if( nkind==0 ) { mlpbase.mlpcreate0(nin, nout, ref network); } else { if( nkind==1 ) { mlpbase.mlpcreatec0(nin, nout, ref network); } else { if( nkind==2 ) { mlpbase.mlpcreateb0(nin, nout, a1, a2, ref network); } else { if( nkind==3 ) { mlpbase.mlpcreater0(nin, nout, a1, a2, ref network); } } } } return; } if( nhid2==0 ) { // // One hidden layer // if( nkind==0 ) { mlpbase.mlpcreate1(nin, nhid1, nout, ref network); } else { if( nkind==1 ) { mlpbase.mlpcreatec1(nin, nhid1, nout, ref network); } else { if( nkind==2 ) { mlpbase.mlpcreateb1(nin, nhid1, nout, a1, a2, ref network); } else { if( nkind==3 ) { mlpbase.mlpcreater1(nin, nhid1, nout, a1, a2, ref network); } } } } return; } // // Two hidden layers // if( nkind==0 ) { mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, ref network); } else { if( nkind==1 ) { mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, ref network); } else { if( nkind==2 ) { mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, a1, a2, ref network); } else { if( nkind==3 ) { mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a1, a2, ref network); } } } } }