/************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. Training is performed using current training algorithm. INPUT PARAMETERS: S - trainer object; Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object; TNetwork - the training neural network. User may look weights in parameter Network while continue training process. It has architecture like Network. You have to copy or create new network with architecture like Network. State - created LBFGS optimizer; NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed, best network is chosen after training * NRestarts=0 means that current state of the network is used for training. TrnSubset - some subset from training set(it stores row's numbers), used as trainig set; TrnSubsetSize- size of subset(if TrnSubsetSize<0 - used full dataset); when TrnSubsetSize=0, network is filled by zero value, and ValSubset parameter is IGNORED; ValSubset - some subset from training set(it stores row's numbers), used as validation set; ValSubsetSize- size of subset(if ValSubsetSize<0 - used full dataset); when ValSubsetSize<>0 this mean that is used early stopping training algorithm; BufWBest - buffer for storing interim resuls (BufWBest[0:WCOunt-1] it has be allocated by user); BufWFinal - buffer for storing interim resuls(BufWFinal[0:WCOunt-1] it has be allocated by user). OUTPUT PARAMETERS: Network - trained network; Rep - training report. NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), network is filled by zero values. Same behavior for functions MLPStartTraining and MLPContinueTraining. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlptrainnetworkx(mlptrainer s, mlpbase.multilayerperceptron network, mlpbase.multilayerperceptron tnetwork, minlbfgs.minlbfgsstate state, int nrestarts, int[] trnsubset, int trnsubsetsize, int[] valsubset, int valsubsetsize, double[] bufwbest, double[] bufwfinal, mlpreport rep) { mlpbase.modelerrors modrep = new mlpbase.modelerrors(); double eval = 0; double v = 0; double ebestcur = 0; double efinal = 0; int ngradbatch = 0; int nin = 0; int nout = 0; int wcount = 0; int twcount = 0; int itbest = 0; int itcnt = 0; int ntype = 0; int ttype = 0; bool rndstart = new bool(); int pass = 0; int i = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the resulting network is not similar to network type in trainer object"); if( !mlpbase.mlpissoftmax(tnetwork) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the network."); mlpbase.mlpproperties(tnetwork, ref nin, ref nout, ref twcount); alglib.ap.assert(s.nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(twcount==wcount, "MLPTrainNetworkX: internal error - number of weights the resulting network is not equal to number of weights in the training network."); alglib.ap.assert(nrestarts>=0, "MLPTrainNetworkX: internal error - NRestarts<0."); alglib.ap.assert(alglib.ap.len(trnsubset)>=trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)<TrnSubsetSize)"); for(i=0; i<=trnsubsetsize-1; i++) { alglib.ap.assert(trnsubset[i]>=0 && trnsubset[i]<=s.npoints-1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)"); } alglib.ap.assert(alglib.ap.len(valsubset)>=valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)<ValSubsetSize)"); for(i=0; i<=valsubsetsize-1; i++) { alglib.ap.assert(valsubset[i]>=0 && valsubset[i]<=s.npoints-1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)"); } // // Initialize parameter Rep // rep.relclserror = 0; rep.avgce = 0; rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && trnsubsetsize!=0 ) { // // Prepare // efinal = math.maxrealnumber; if( nrestarts!=0 ) { rndstart = true; } else { rndstart = false; nrestarts = 1; } ngradbatch = 0; eval = 0; ebestcur = 0; for(pass=1; pass<=nrestarts; pass++) { mlpstarttrainingx(s, network, tnetwork, state, rndstart, trnsubset, trnsubsetsize); itbest = 0; itcnt = 0; if( s.datatype==0 ) { ebestcur = mlpbase.mlperrorsubset(network, s.densexy, s.npoints, valsubset, valsubsetsize); } if( s.datatype==1 ) { ebestcur = mlpbase.mlperrorsparsesubset(network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } for(i_=0; i_<=wcount-1;i_++) { bufwbest[i_] = network.weights[i_]; } while( mlpcontinuetrainingx(s, network, tnetwork, state, trnsubset, trnsubsetsize, ref ngradbatch) ) { if( s.datatype==0 ) { eval = mlpbase.mlperrorsubset(network, s.densexy, s.npoints, valsubset, valsubsetsize); } if( s.datatype==1 ) { eval = mlpbase.mlperrorsparsesubset(network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } if( (double)(eval)<=(double)(ebestcur) ) { for(i_=0; i_<=wcount-1;i_++) { bufwbest[i_] = network.weights[i_]; } ebestcur = eval; itbest = itcnt; } if( itcnt>30 && (double)(itcnt)>(double)(1.5*itbest) ) { break; } itcnt = itcnt+1; } for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = bufwbest[i_]; } // // Compare with final(the best) answer. // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += bufwbest[i_]*bufwbest[i_]; } if( s.datatype==0 ) { ebestcur = mlpbase.mlperrorsubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize)+0.5*s.decay*v; } if( s.datatype==1 ) { ebestcur = mlpbase.mlperrorsparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize)+0.5*s.decay*v; } if( (double)(ebestcur)<(double)(efinal) ) { for(i_=0; i_<=wcount-1;i_++) { bufwfinal[i_] = bufwbest[i_]; } efinal = ebestcur; } } // // Final network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = bufwfinal[i_]; } rep.ngrad = ngradbatch; } else { for(i=0; i<=wcount-1; i++) { network.weights[i] = 0; } } // // Calculate errors. // if( s.datatype==0 ) { mlpbase.mlpallerrorssubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if( s.datatype==1 ) { mlpbase.mlpallerrorssparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } rep.relclserror = modrep.relclserror; rep.avgce = modrep.avgce; rep.rmserror = modrep.rmserror; rep.avgerror = modrep.avgerror; rep.avgrelerror = modrep.avgrelerror; }
/************************************************************************* Cross-validation estimate of generalization error. Base algorithm - L-BFGS. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. WStep - stopping criterion, same as in MLPTrainLBFGS MaxIts - stopping criterion, same as in MLPTrainLBFGS FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlbfgs(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, int foldscount, ref int info, mlpreport rep, mlpcvreport cvrep) { info = 0; mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, false, wstep, maxits, ref info, rep, cvrep); }
/************************************************************************* Internal cross-validation subroutine *************************************************************************/ private static void mlpkfoldcvgeneral(mlpbase.multilayerperceptron n, double[,] xy, int npoints, double decay, int restarts, int foldscount, bool lmalgorithm, double wstep, int maxits, ref int info, mlpreport rep, mlpcvreport cvrep) { int i = 0; int fold = 0; int j = 0; int k = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int rowlen = 0; int wcount = 0; int nclasses = 0; int tssize = 0; int cvssize = 0; double[,] cvset = new double[0,0]; double[,] testset = new double[0,0]; int[] folds = new int[0]; int relcnt = 0; mlpreport internalrep = new mlpreport(); double[] x = new double[0]; double[] y = new double[0]; int i_ = 0; info = 0; // // Read network geometry, test parameters // mlpbase.mlpproperties(n, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(n) ) { nclasses = nout; rowlen = nin+1; } else { nclasses = -nout; rowlen = nin+nout; } if( (npoints<=0 || foldscount<2) || foldscount>npoints ) { info = -1; return; } mlpbase.mlpcopy(n, network); // // K-fold out cross-validation. // First, estimate generalization error // testset = new double[npoints-1+1, rowlen-1+1]; cvset = new double[npoints-1+1, rowlen-1+1]; x = new double[nin-1+1]; y = new double[nout-1+1]; mlpkfoldsplit(xy, npoints, nclasses, foldscount, false, ref folds); cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; relcnt = 0; for(fold=0; fold<=foldscount-1; fold++) { // // Separate set // tssize = 0; cvssize = 0; for(i=0; i<=npoints-1; i++) { if( folds[i]==fold ) { for(i_=0; i_<=rowlen-1;i_++) { testset[tssize,i_] = xy[i,i_]; } tssize = tssize+1; } else { for(i_=0; i_<=rowlen-1;i_++) { cvset[cvssize,i_] = xy[i,i_]; } cvssize = cvssize+1; } } // // Train on CV training set // if( lmalgorithm ) { mlptrainlm(network, cvset, cvssize, decay, restarts, ref info, internalrep); } else { mlptrainlbfgs(network, cvset, cvssize, decay, restarts, wstep, maxits, ref info, internalrep); } if( info<0 ) { cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; return; } rep.ngrad = rep.ngrad+internalrep.ngrad; rep.nhess = rep.nhess+internalrep.nhess; rep.ncholesky = rep.ncholesky+internalrep.ncholesky; // // Estimate error using CV test set // if( mlpbase.mlpissoftmax(network) ) { // // classification-only code // cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(network, testset, tssize); cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(network, testset, tssize); } for(i=0; i<=tssize-1; i++) { for(i_=0; i_<=nin-1;i_++) { x[i_] = testset[i,i_]; } mlpbase.mlpprocess(network, x, ref y); if( mlpbase.mlpissoftmax(network) ) { // // Classification-specific code // k = (int)Math.Round(testset[i,nin]); for(j=0; j<=nout-1; j++) { if( j==k ) { cvrep.rmserror = cvrep.rmserror+math.sqr(y[j]-1); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1); cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1); relcnt = relcnt+1; } else { cvrep.rmserror = cvrep.rmserror+math.sqr(y[j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]); } } } else { // // Regression-specific code // for(j=0; j<=nout-1; j++) { cvrep.rmserror = cvrep.rmserror+math.sqr(y[j]-testset[i,nin+j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]); if( (double)(testset[i,nin+j])!=(double)(0) ) { cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]); relcnt = relcnt+1; } } } } } if( mlpbase.mlpissoftmax(network) ) { cvrep.relclserror = cvrep.relclserror/npoints; cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints); } cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout)); cvrep.avgerror = cvrep.avgerror/(npoints*nout); cvrep.avgrelerror = cvrep.avgrelerror/relcnt; info = 1; }
public multilayerperceptron(mlpbase.multilayerperceptron obj) { _innerobj = obj; }
/************************************************************************* Neural network training using L-BFGS algorithm with regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for problems of any dimensionality (memory requirements and step complexity are linear by weights number). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. WStep - stopping criterion. Algorithm stops if step size is less than WStep. Recommended value - 0.01. Zero step size means stopping after MaxIts iterations. MaxIts - stopping criterion. Algorithm stops after MaxIts iterations (NOT gradient calculations). Zero MaxIts means stopping when step is sufficiently small. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -8, if both WStep=0 and MaxIts=0 * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlbfgs(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, ref int info, mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); int i_ = 0; info = 0; // // Test inputs, parse flags, read network geometry // if( (double)(wstep)==(double)(0) && maxits==0 ) { info = -8; return; } if( ((npoints<=0 || restarts<1) || (double)(wstep)<(double)(0)) || maxits<0 ) { info = -1; return; } mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(network, xy, npoints); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; ebest = math.maxrealnumber; // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // mlpbase.mlprandomize(network); for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), w, state); minlbfgs.minlbfgssetcond(state, 0.0, 0.0, wstep, maxits); while( minlbfgs.minlbfgsiteration(state) ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(network, xy, npoints, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref w, internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = w[i_]; } // // Compare with best // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = mlpbase.mlperrorn(network, xy, npoints)+0.5*decay*v; if( (double)(e)<(double)(ebest) ) { for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } ebest = e; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* This function initializes temporaries needed for training session. *************************************************************************/ private static void initmlpetrnsessions(mlpbase.multilayerperceptron individualnetwork, mlptrainer trainer, alglib.smp.shared_pool sessions) { mlpetrnsession t = new mlpetrnsession(); if( !alglib.smp.ae_shared_pool_is_initialized(sessions) ) { initmlpetrnsession(individualnetwork, trainer, t); alglib.smp.ae_shared_pool_set_seed(sessions, t); } }
/************************************************************************* Network creation This function creates network with desired structure. Network is created using one of the three methods: a) straighforward creation using MLPCreate???() b) MLPCreate???() for proxy object, which is copied with PassThroughSerializer() c) MLPCreate???() for proxy object, which is copied with MLPCopy() One of these methods is chosen with probability 1/3. *************************************************************************/ private static void createnetwork(mlpbase.multilayerperceptron network, int nkind, double a1, double a2, int nin, int nhid1, int nhid2, int nout) { int mkind = 0; mlpbase.multilayerperceptron tmp = new mlpbase.multilayerperceptron(); ap.assert(((nin>0 & nhid1>=0) & nhid2>=0) & nout>0, "CreateNetwork error"); ap.assert(nhid1!=0 | nhid2==0, "CreateNetwork error"); ap.assert(nkind!=1 | nout>=2, "CreateNetwork error"); mkind = math.randominteger(3); if( nhid1==0 ) { // // No hidden layers // if( nkind==0 ) { if( mkind==0 ) { mlpbase.mlpcreate0(nin, nout, network); } if( mkind==1 ) { mlpbase.mlpcreate0(nin, nout, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreate0(nin, nout, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==1 ) { if( mkind==0 ) { mlpbase.mlpcreatec0(nin, nout, network); } if( mkind==1 ) { mlpbase.mlpcreatec0(nin, nout, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreatec0(nin, nout, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==2 ) { if( mkind==0 ) { mlpbase.mlpcreateb0(nin, nout, a1, a2, network); } if( mkind==1 ) { mlpbase.mlpcreateb0(nin, nout, a1, a2, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreateb0(nin, nout, a1, a2, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==3 ) { if( mkind==0 ) { mlpbase.mlpcreater0(nin, nout, a1, a2, network); } if( mkind==1 ) { mlpbase.mlpcreater0(nin, nout, a1, a2, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreater0(nin, nout, a1, a2, tmp); mlpbase.mlpcopy(tmp, network); } } } } } mlpbase.mlprandomizefull(network); return; } if( nhid2==0 ) { // // One hidden layer // if( nkind==0 ) { if( mkind==0 ) { mlpbase.mlpcreate1(nin, nhid1, nout, network); } if( mkind==1 ) { mlpbase.mlpcreate1(nin, nhid1, nout, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreate1(nin, nhid1, nout, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==1 ) { if( mkind==0 ) { mlpbase.mlpcreatec1(nin, nhid1, nout, network); } if( mkind==1 ) { mlpbase.mlpcreatec1(nin, nhid1, nout, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreatec1(nin, nhid1, nout, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==2 ) { if( mkind==0 ) { mlpbase.mlpcreateb1(nin, nhid1, nout, a1, a2, network); } if( mkind==1 ) { mlpbase.mlpcreateb1(nin, nhid1, nout, a1, a2, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreateb1(nin, nhid1, nout, a1, a2, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==3 ) { if( mkind==0 ) { mlpbase.mlpcreater1(nin, nhid1, nout, a1, a2, network); } if( mkind==1 ) { mlpbase.mlpcreater1(nin, nhid1, nout, a1, a2, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreater1(nin, nhid1, nout, a1, a2, tmp); mlpbase.mlpcopy(tmp, network); } } } } } mlpbase.mlprandomizefull(network); return; } // // Two hidden layers // if( nkind==0 ) { if( mkind==0 ) { mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, network); } if( mkind==1 ) { mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==1 ) { if( mkind==0 ) { mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, network); } if( mkind==1 ) { mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==2 ) { if( mkind==0 ) { mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, a1, a2, network); } if( mkind==1 ) { mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, a1, a2, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, a1, a2, tmp); mlpbase.mlpcopy(tmp, network); } } else { if( nkind==3 ) { if( mkind==0 ) { mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a1, a2, network); } if( mkind==1 ) { mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a1, a2, tmp); { // // This code passes data structure through serializers // (serializes it to string and loads back) // serializer _local_serializer; string _local_str; _local_serializer = new serializer(); _local_serializer.alloc_start(); mlpbase.mlpalloc(_local_serializer, tmp); _local_serializer.sstart_str(); mlpbase.mlpserialize(_local_serializer, tmp); _local_serializer.stop(); _local_str = _local_serializer.get_string(); _local_serializer = new serializer(); _local_serializer.ustart_str(_local_str); mlpbase.mlpunserialize(_local_serializer, network); _local_serializer.stop(); } } if( mkind==2 ) { mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a1, a2, tmp); mlpbase.mlpcopy(tmp, network); } } } } } mlpbase.mlprandomizefull(network); }
/************************************************************************* This function estimates generalization error using cross-validation on the current dataset with current training settings. FOR USERS OF COMMERCIAL EDITION: ! Commercial version of ALGLIB includes two important improvements of ! this function: ! * multicore support (C++ and C# computational cores) ! * SSE support (C++ computational core) ! ! Second improvement gives constant speedup (2-3X). First improvement ! gives close-to-linear speedup on multicore systems. Following ! operations can be executed in parallel: ! * FoldsCount cross-validation rounds (always) ! * NRestarts training sessions performed within each of ! cross-validation rounds (if NRestarts>1) ! * gradient calculation over large dataset (if dataset is large enough) ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! In order to use SSE features you have to: ! * use commercial version of ALGLIB on Intel processors ! * use C++ computational core ! ! This note is given for users of commercial edition; if you use GPL ! edition, you still will be able to call smp-version of this function, ! but all computations will be done serially. ! ! We recommend you to carefully read ALGLIB Reference Manual, section ! called 'SMP support', before using parallel version of this function. INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. Network is not changed during cross- validation and is not trained - it is used only as representative of its architecture. I.e., we estimate generalization properties of ARCHITECTURE, not some specific network. NRestarts - number of restarts, >=0: * NRestarts>0 means that for each cross-validation round specified number of random restarts is performed, with best network being chosen after training. * NRestarts=0 is same as NRestarts=1 FoldsCount - number of folds in k-fold cross-validation: * 2<=FoldsCount<=size of dataset * recommended value: 10. * values larger than dataset size will be silently truncated down to dataset size OUTPUT PARAMETERS: Rep - structure which contains cross-validation estimates: * Rep.RelCLSError - fraction of misclassified cases. * Rep.AvgCE - acerage cross-entropy * Rep.RMSError - root-mean-square error * Rep.AvgError - average error * Rep.AvgRelError - average relative error NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), or subset with only one point was given, zeros are returned as estimates. NOTE: this method performs FoldsCount cross-validation rounds, each one with NRestarts random starts. Thus, FoldsCount*NRestarts networks are trained in total. NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems. NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError contain errors in prediction of posterior probabilities. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcv(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, int foldscount, mlpreport rep) { alglib.smp.shared_pool pooldatacv = new alglib.smp.shared_pool(); mlpparallelizationcv datacv = new mlpparallelizationcv(); mlpparallelizationcv sdatacv = null; double[,] cvy = new double[0,0]; int[] folds = new int[0]; double[] buf = new double[0]; double[] dy = new double[0]; int nin = 0; int nout = 0; int wcount = 0; int rowsize = 0; int ntype = 0; int ttype = 0; int i = 0; int j = 0; int k = 0; hqrnd.hqrndstate rs = new hqrnd.hqrndstate(); int i_ = 0; int i1_ = 0; if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPKFoldCV: type of input network is not similar to network type in trainer object"); alglib.ap.assert(s.npoints>=0, "MLPKFoldCV: possible trainer S is not initialized(S.NPoints<0)"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPKFoldCV: number of inputs in trainer is not equal to number of inputs in network"); alglib.ap.assert(s.nout==nout, "MLPKFoldCV: number of outputs in trainer is not equal to number of outputs in network"); alglib.ap.assert(nrestarts>=0, "MLPKFoldCV: NRestarts<0"); alglib.ap.assert(foldscount>=2, "MLPKFoldCV: FoldsCount<2"); if( foldscount>s.npoints ) { foldscount = s.npoints; } rep.relclserror = 0; rep.avgce = 0; rep.rmserror = 0; rep.avgerror = 0; rep.avgrelerror = 0; hqrnd.hqrndrandomize(rs); rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; if( s.npoints==0 || s.npoints==1 ) { return; } // // Read network geometry, test parameters // if( s.rcpar ) { rowsize = nin+nout; dy = new double[nout]; bdss.dserrallocate(-nout, ref buf); } else { rowsize = nin+1; dy = new double[1]; bdss.dserrallocate(nout, ref buf); } // // Folds // folds = new int[s.npoints]; for(i=0; i<=s.npoints-1; i++) { folds[i] = i*foldscount/s.npoints; } for(i=0; i<=s.npoints-2; i++) { j = i+hqrnd.hqrnduniformi(rs, s.npoints-i); if( j!=i ) { k = folds[i]; folds[i] = folds[j]; folds[j] = k; } } cvy = new double[s.npoints, nout]; // // Initialize SEED-value for shared pool // datacv.ngrad = 0; mlpbase.mlpcopy(network, datacv.network); datacv.subset = new int[s.npoints]; datacv.xyrow = new double[rowsize]; datacv.y = new double[nout]; // // Create shared pool // alglib.smp.ae_shared_pool_set_seed(pooldatacv, datacv); // // Parallelization // mthreadcv(s, rowsize, nrestarts, folds, 0, foldscount, cvy, pooldatacv); // // Calculate value for NGrad // alglib.smp.ae_shared_pool_first_recycled(pooldatacv, ref sdatacv); while( sdatacv!=null ) { rep.ngrad = rep.ngrad+sdatacv.ngrad; alglib.smp.ae_shared_pool_next_recycled(pooldatacv, ref sdatacv); } // // Connect of results and calculate cross-validation error // for(i=0; i<=s.npoints-1; i++) { if( s.datatype==0 ) { for(i_=0; i_<=rowsize-1;i_++) { datacv.xyrow[i_] = s.densexy[i,i_]; } } if( s.datatype==1 ) { sparse.sparsegetrow(s.sparsexy, i, ref datacv.xyrow); } for(i_=0; i_<=nout-1;i_++) { datacv.y[i_] = cvy[i,i_]; } if( s.rcpar ) { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = datacv.xyrow[i_+i1_]; } } else { dy[0] = datacv.xyrow[nin]; } bdss.dserraccumulate(ref buf, datacv.y, dy); } bdss.dserrfinish(ref buf); rep.relclserror = buf[0]; rep.avgce = buf[1]; rep.rmserror = buf[2]; rep.avgerror = buf[3]; rep.avgrelerror = buf[4]; }
/************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_mlpkfoldcv(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, int foldscount, mlpreport rep) { mlpkfoldcv(s,network,nrestarts,foldscount,rep); }
public modelerrors(mlpbase.modelerrors obj) { _innerobj = obj; }
/************************************************************************* Calculation of all types of errors -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpeallerrorsx(mlpensemble ensemble, double[,] densexy, sparse.sparsematrix sparsexy, int datasetsize, int datasettype, int[] idx, int subset0, int subset1, int subsettype, alglib.smp.shared_pool buf, mlpbase.modelerrors rep) { int i = 0; int j = 0; int nin = 0; int nout = 0; bool iscls = new bool(); int srcidx = 0; hpccores.mlpbuffers pbuf = null; mlpbase.modelerrors rep0 = new mlpbase.modelerrors(); mlpbase.modelerrors rep1 = new mlpbase.modelerrors(); int i_ = 0; int i1_ = 0; // // Get network information // nin = mlpbase.mlpgetinputscount(ensemble.network); nout = mlpbase.mlpgetoutputscount(ensemble.network); iscls = mlpbase.mlpissoftmax(ensemble.network); // // Retrieve buffer, prepare, process data, recycle buffer // alglib.smp.ae_shared_pool_retrieve(buf, ref pbuf); if( iscls ) { bdss.dserrallocate(nout, ref pbuf.tmp0); } else { bdss.dserrallocate(-nout, ref pbuf.tmp0); } apserv.rvectorsetlengthatleast(ref pbuf.x, nin); apserv.rvectorsetlengthatleast(ref pbuf.y, nout); apserv.rvectorsetlengthatleast(ref pbuf.desiredy, nout); for(i=subset0; i<=subset1-1; i++) { srcidx = -1; if( subsettype==0 ) { srcidx = i; } if( subsettype==1 ) { srcidx = idx[i]; } alglib.ap.assert(srcidx>=0, "MLPEAllErrorsX: internal error"); if( datasettype==0 ) { for(i_=0; i_<=nin-1;i_++) { pbuf.x[i_] = densexy[srcidx,i_]; } } if( datasettype==1 ) { sparse.sparsegetrow(sparsexy, srcidx, ref pbuf.x); } mlpeprocess(ensemble, pbuf.x, ref pbuf.y); if( mlpbase.mlpissoftmax(ensemble.network) ) { if( datasettype==0 ) { pbuf.desiredy[0] = densexy[srcidx,nin]; } if( datasettype==1 ) { pbuf.desiredy[0] = sparse.sparseget(sparsexy, srcidx, nin); } } else { if( datasettype==0 ) { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { pbuf.desiredy[i_] = densexy[srcidx,i_+i1_]; } } if( datasettype==1 ) { for(j=0; j<=nout-1; j++) { pbuf.desiredy[j] = sparse.sparseget(sparsexy, srcidx, nin+j); } } } bdss.dserraccumulate(ref pbuf.tmp0, pbuf.y, pbuf.desiredy); } bdss.dserrfinish(ref pbuf.tmp0); rep.relclserror = pbuf.tmp0[0]; rep.avgce = pbuf.tmp0[1]/Math.Log(2); rep.rmserror = pbuf.tmp0[2]; rep.avgerror = pbuf.tmp0[3]; rep.avgrelerror = pbuf.tmp0[4]; alglib.smp.ae_shared_pool_recycle(buf, ref pbuf); }
/************************************************************************* Network creation *************************************************************************/ private static void createnetwork(mlpbase.multilayerperceptron network, int nkind, double a1, double a2, int nin, int nhid1, int nhid2, int nout) { ap.assert(((nin>0 & nhid1>=0) & nhid2>=0) & nout>0, "CreateNetwork error"); ap.assert(nhid1!=0 | nhid2==0, "CreateNetwork error"); ap.assert(nkind!=1 | nout>=2, "CreateNetwork error"); if( nhid1==0 ) { // // No hidden layers // if( nkind==0 ) { mlpbase.mlpcreate0(nin, nout, network); } else { if( nkind==1 ) { mlpbase.mlpcreatec0(nin, nout, network); } else { if( nkind==2 ) { mlpbase.mlpcreateb0(nin, nout, a1, a2, network); } else { if( nkind==3 ) { mlpbase.mlpcreater0(nin, nout, a1, a2, network); } } } } return; } if( nhid2==0 ) { // // One hidden layer // if( nkind==0 ) { mlpbase.mlpcreate1(nin, nhid1, nout, network); } else { if( nkind==1 ) { mlpbase.mlpcreatec1(nin, nhid1, nout, network); } else { if( nkind==2 ) { mlpbase.mlpcreateb1(nin, nhid1, nout, a1, a2, network); } else { if( nkind==3 ) { mlpbase.mlpcreater1(nin, nhid1, nout, a1, a2, network); } } } } return; } // // Two hidden layers // if( nkind==0 ) { mlpbase.mlpcreate2(nin, nhid1, nhid2, nout, network); } else { if( nkind==1 ) { mlpbase.mlpcreatec2(nin, nhid1, nhid2, nout, network); } else { if( nkind==2 ) { mlpbase.mlpcreateb2(nin, nhid1, nhid2, nout, a1, a2, network); } else { if( nkind==3 ) { mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a1, a2, network); } } } } }
/************************************************************************* This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTrainingX call, and then user subsequently calls MLPContinueTrainingX to perform one more iteration of the training. This function performs one more iteration of the training and returns either True (training continues) or False (training stopped). In case True was returned, Network weights are updated according to the current state of the optimization progress. In case False was returned, no additional updates is performed (previous update of the network weights moved us to the final point, and no additional updates is needed). EXAMPLE: > > [initialize network and trainer object] > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > [visualize training progress] > INPUT PARAMETERS: S - trainer object Network - neural network which receives A COPY of the actual network which is trained by the algorithm. After each training roung state of the network being trained is copied to this variable. It must have same number of inputs and output/classes as was specified during creation of the trainer object and it must have exactly same architecture as the second network (TNetwork). TNetwork - neural network being trained. State - LBFGS optimizer, already initialized, number of dimensions must be equal to number of weights in the networks. Subset - some subset from training set(it stores row's numbers); SubsetSize - size of subset(if SubsetSize<0 - used full dataset). NGradBatch - number of calls MLPGradBatch function. Initial value is zero; OUTPUT PARAMETERS: Network - weights of the neural network are rewritten by the current approximation; NGradBatch - number of calls MLPGradBatch function after training. NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. NOTE: It is expected that Network is the same one which was passed to MLPStartTraining() function. However, THIS function checks only following: * that number of network inputs is consistent with trainer object settings * that number of network outputs/classes is consistent with trainer object settings * that number of network weights is the same as number of weights in the network passed to MLPStartTraining() function Exception is thrown when these conditions are violated. It is also expected that you do not change state of the network on your own - the only party who has right to change network during its training is a trainer object. Any attempt to interfere with trainer may lead to unpredictable results. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static bool mlpcontinuetrainingx(mlptrainer s, mlpbase.multilayerperceptron network, mlpbase.multilayerperceptron tnetwork, minlbfgs.minlbfgsstate state, int[] subset, int subsetsize, ref int ngradbatch) { bool result = new bool(); int nin = 0; int nout = 0; int wcount = 0; int twcount = 0; int ntype = 0; int ttype = 0; double decay = 0; double v = 0; int i = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPContinueTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)."); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPContinueTrainingX: internal error - type of the resulting network is not similar to network type in trainer object."); if( !mlpbase.mlpissoftmax(tnetwork) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPContinueTrainingX: internal error - type of the training network is not similar to network type in trainer object."); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPContinueTrainingX: internal error - number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPContinueTrainingX: internal error - number of outputs in trainer is not equal to number of outputs in the network."); mlpbase.mlpproperties(tnetwork, ref nin, ref nout, ref twcount); alglib.ap.assert(s.nin==nin, "MLPContinueTrainingX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPContinueTrainingX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(twcount==wcount, "MLPContinueTrainingX: internal error - number of weights the resulting network is not equal to number of weights in the training network."); alglib.ap.assert(alglib.ap.len(subset)>=subsetsize, "MLPContinueTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)<SubsetSize)."); for(i=0; i<=subsetsize-1; i++) { alglib.ap.assert(subset[i]>=0 && subset[i]<=s.npoints-1, "MLPContinueTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1)."); } if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && subsetsize!=0 ) { decay = s.decay; while( minlbfgs.minlbfgsiteration(state) ) { if( state.xupdated ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = tnetwork.weights[i_]; } result = true; return result; } for(i_=0; i_<=wcount-1;i_++) { tnetwork.weights[i_] = state.x[i_]; } if( s.datatype==0 ) { mlpbase.mlpgradbatchsubset(tnetwork, s.densexy, s.npoints, subset, subsetsize, ref state.f, ref state.g); } if( s.datatype==1 ) { mlpbase.mlpgradbatchsparsesubset(tnetwork, s.sparsexy, s.npoints, subset, subsetsize, ref state.f, ref state.g); } // // Increment number of operations performed on batch gradient // ngradbatch = ngradbatch+1; v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += tnetwork.weights[i_]*tnetwork.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*tnetwork.weights[i_]; } } for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = tnetwork.weights[i_]; } } result = false; return result; }
/************************************************************************* This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTrainingX call, and then user subsequently calls MLPContinueTrainingX to perform one more iteration of the training. After call to this function trainer object remembers network and is ready to train it. However, no training is performed until first call to MLPContinueTraining() function. Subsequent calls to MLPContinueTraining() will advance traing progress one iteration further. EXAMPLE: > > ...initialize network and trainer object.... > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > ...visualize training progress... > INPUT PARAMETERS: S - trainer object; Network - neural network which receives A COPY of the actual network which is trained by the algorithm. After each training roung state of the network being trained is copied to this variable. It must have same number of inputs and output/classes as was specified during creation of the trainer object and it must have exactly same architecture as the second network (TNetwork). TNetwork - neural network being trained. State - LBFGS optimizer, already initialized, number of dimensions must be equal to number of weights in the networks. RandomStart - randomize network before training or not: * True means that network is randomized and its initial state (one which was passed to the trainer object) is lost; * False means that training is started from the current state of the network. Subset - some subset from training set(it stores row's numbers); SubsetSize - size of subset(if SubsetSize<0 - used full dataset). OUTPUT PARAMETERS: Network - neural network which is ready to training (weights are initialized, preprocessor is initialized using current training set) NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlpstarttrainingx(mlptrainer s, mlpbase.multilayerperceptron network, mlpbase.multilayerperceptron tnetwork, minlbfgs.minlbfgsstate state, bool randomstart, int[] subset, int subsetsize) { int nin = 0; int nout = 0; int wcount = 0; int twcount = 0; int ntype = 0; int ttype = 0; int i = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPStartTrainingX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTrainingX: internal error - type of the resulting network is not similar to network type in trainer object"); if( !mlpbase.mlpissoftmax(tnetwork) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTrainingX: internal error - type of the training network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPStartTrainingX: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPStartTrainingX: number of outputs in trainer is not equal to number of outputs in the network."); mlpbase.mlpproperties(tnetwork, ref nin, ref nout, ref twcount); alglib.ap.assert(s.nin==nin, "MLPStartTrainingX: number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPStartTrainingX: number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(twcount==wcount, "MLPStartTrainingX: number of weights the resulting network is not equal to number of weights in the training network."); alglib.ap.assert(alglib.ap.len(subset)>=subsetsize, "MLPStartTrainingX: internal error - parameter SubsetSize more than input subset size(Length(Subset)<SubsetSize)"); for(i=0; i<=subsetsize-1; i++) { alglib.ap.assert(subset[i]>=0 && subset[i]<=s.npoints-1, "MLPStartTrainingX: internal error - parameter Subset contains incorrect index(Subset[I]<0 or Subset[I]>S.NPoints-1)"); } if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && subsetsize!=0 ) { // // Prepare // if( s.datatype==0 ) { mlpbase.mlpinitpreprocessorsubset(network, s.densexy, s.npoints, subset, subsetsize); mlpbase.mlpinitpreprocessorsubset(tnetwork, s.densexy, s.npoints, subset, subsetsize); } if( s.datatype==1 ) { mlpbase.mlpinitpreprocessorsparsesubset(network, s.sparsexy, s.npoints, subset, subsetsize); mlpbase.mlpinitpreprocessorsparsesubset(tnetwork, s.sparsexy, s.npoints, subset, subsetsize); } // // Process // if( randomstart ) { mlpbase.mlprandomize(network); } minlbfgs.minlbfgsrestartfrom(state, network.weights); } else { for(i=0; i<=wcount-1; i++) { network.weights[i] = 0; } } // // Copy weights // for(i_=0; i_<=wcount-1;i_++) { tnetwork.weights[i_] = network.weights[i_]; } }
/************************************************************************* This function initializes temporaries needed for training session. *************************************************************************/ private static void initmlptrnsessions(mlpbase.multilayerperceptron networktrained, bool randomizenetwork, mlptrainer trainer, alglib.smp.shared_pool sessions) { int[] dummysubset = new int[0]; smlptrnsession t = new smlptrnsession(); smlptrnsession p = null; if( alglib.smp.ae_shared_pool_is_initialized(sessions) ) { // // Pool was already initialized. // Clear sessions stored in the pool. // alglib.smp.ae_shared_pool_first_recycled(sessions, ref p); while( p!=null ) { alglib.ap.assert(mlpbase.mlpsamearchitecture(p.network, networktrained), "InitMLPTrnSessions: internal consistency error"); p.bestrmserror = math.maxrealnumber; alglib.smp.ae_shared_pool_next_recycled(sessions, ref p); } } else { // // Prepare session and seed pool // initmlptrnsession(networktrained, randomizenetwork, trainer, t); alglib.smp.ae_shared_pool_set_seed(sessions, t); } }
/************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. Training is performed using current training algorithm. FOR USERS OF COMMERCIAL EDITION: ! Commercial version of ALGLIB includes two important improvements of ! this function: ! * multicore support (C++ and C# computational cores) ! * SSE support (C++ computational core) ! ! Second improvement gives constant speedup (2-3X). First improvement ! gives close-to-linear speedup on multicore systems. Following ! operations can be executed in parallel: ! * NRestarts training sessions performed within each of ! cross-validation rounds (if NRestarts>1) ! * gradient calculation over large dataset (if dataset is large enough) ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! In order to use SSE features you have to: ! * use commercial version of ALGLIB on Intel processors ! * use C++ computational core ! ! This note is given for users of commercial edition; if you use GPL ! edition, you still will be able to call smp-version of this function, ! but all computations will be done serially. ! ! We recommend you to carefully read ALGLIB Reference Manual, section ! called 'SMP support', before using parallel version of this function. INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. NRestarts - number of restarts, >=0: * NRestarts>0 means that specified number of random restarts are performed, best network is chosen after training * NRestarts=0 means that current state of the network is used for training. OUTPUT PARAMETERS: Network - trained network NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(), network is filled by zero values. Same behavior for functions MLPStartTraining and MLPContinueTraining. NOTE: this method uses sum-of-squares error function for training. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlptrainnetwork(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; alglib.smp.shared_pool trnpool = new alglib.smp.shared_pool(); alglib.ap.assert(s.npoints>=0, "MLPTrainNetwork: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetwork: type of input network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPTrainNetwork: number of inputs in trainer is not equal to number of inputs in network"); alglib.ap.assert(s.nout==nout, "MLPTrainNetwork: number of outputs in trainer is not equal to number of outputs in network"); alglib.ap.assert(nrestarts>=0, "MLPTrainNetwork: NRestarts<0."); // // Train // mlptrainnetworkx(s, nrestarts, -1, s.subset, -1, s.subset, 0, network, rep, true, trnpool); }
/************************************************************************* This function initializes temporaries needed for ensemble training. *************************************************************************/ private static void initmlpetrnsession(mlpbase.multilayerperceptron individualnetwork, mlptrainer trainer, mlpetrnsession session) { int[] dummysubset = new int[0]; // // Prepare network: // * copy input network to Session.Network // * re-initialize preprocessor and weights if RandomizeNetwork=True // mlpbase.mlpcopy(individualnetwork, session.network); initmlptrnsessions(individualnetwork, true, trainer, session.mlpsessions); apserv.ivectorsetlengthatleast(ref session.trnsubset, trainer.npoints); apserv.ivectorsetlengthatleast(ref session.valsubset, trainer.npoints); }
/************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static void _pexec_mlptrainnetwork(mlptrainer s, mlpbase.multilayerperceptron network, int nrestarts, mlpreport rep) { mlptrainnetwork(s,network,nrestarts,rep); }
/************************************************************************* Creates ensemble from network. Only network geometry is copied. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatefromnetwork(mlpbase.multilayerperceptron network, int ensemblesize, mlpensemble ensemble) { int i = 0; int ccount = 0; int i_ = 0; int i1_ = 0; ap.assert(ensemblesize>0, "MLPECreate: incorrect ensemble size!"); // // network properties // mlpbase.mlpproperties(network, ref ensemble.nin, ref ensemble.nout, ref ensemble.wcount); if( mlpbase.mlpissoftmax(network) ) { ccount = ensemble.nin; } else { ccount = ensemble.nin+ensemble.nout; } ensemble.postprocessing = false; ensemble.issoftmax = mlpbase.mlpissoftmax(network); ensemble.ensemblesize = ensemblesize; // // structure information // ensemble.structinfo = new int[network.structinfo[0]-1+1]; for(i=0; i<=network.structinfo[0]-1; i++) { ensemble.structinfo[i] = network.structinfo[i]; } // // weights, means, sigmas // ensemble.weights = new double[ensemblesize*ensemble.wcount-1+1]; ensemble.columnmeans = new double[ensemblesize*ccount-1+1]; ensemble.columnsigmas = new double[ensemblesize*ccount-1+1]; for(i=0; i<=ensemblesize*ensemble.wcount-1; i++) { ensemble.weights[i] = math.randomreal()-0.5; } for(i=0; i<=ensemblesize-1; i++) { i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnmeans[i_] = network.columnmeans[i_+i1_]; } i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnsigmas[i_] = network.columnsigmas[i_+i1_]; } } // // serialized part // mlpbase.mlpserializeold(network, ref ensemble.serializedmlp, ref ensemble.serializedlen); // // temporaries, internal buffers // ensemble.tmpweights = new double[ensemble.wcount-1+1]; ensemble.tmpmeans = new double[ccount-1+1]; ensemble.tmpsigmas = new double[ccount-1+1]; ensemble.neurons = new double[ensemble.structinfo[mlpntotaloffset]-1+1]; ensemble.dfdnet = new double[ensemble.structinfo[mlpntotaloffset]-1+1]; ensemble.y = new double[ensemble.nout-1+1]; }
/************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. After call to this function trainer object remembers network and is ready to train it. However, no training is performed until first call to MLPContinueTraining() function. Subsequent calls to MLPContinueTraining() will advance training progress one iteration further. EXAMPLE: > > ...initialize network and trainer object.... > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > ...visualize training progress... > INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. RandomStart - randomize network before training or not: * True means that network is randomized and its initial state (one which was passed to the trainer object) is lost. * False means that training is started from the current state of the network OUTPUT PARAMETERS: Network - neural network which is ready to training (weights are initialized, preprocessor is initialized using current training set) NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpstarttraining(mlptrainer s, mlpbase.multilayerperceptron network, bool randomstart) { int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; alglib.ap.assert(s.npoints>=0, "MLPStartTraining: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTraining: type of input network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPStartTraining: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPStartTraining: number of outputs in trainer is not equal to number of outputs in the network."); // // Initialize temporaries // initmlptrnsession(network, randomstart, s, s.session); // // Train network // mlpstarttrainingx(s, randomstart, -1, s.subset, -1, s.session); // // Update network // mlpbase.mlpcopytunableparameters(s.session.network, network); }
/************************************************************************* Unsets network (initialize it to smallest network possible *************************************************************************/ private static void unsetnetwork(mlpbase.multilayerperceptron network) { mlpbase.mlpcreate0(1, 1, network); }
/************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. FOR USERS OF COMMERCIAL EDITION: ! Commercial version of ALGLIB includes two important improvements of ! this function: ! * multicore support (C++ and C# computational cores) ! * SSE support (C++ computational core) ! ! Second improvement gives constant speedup (2-3X). First improvement ! gives close-to-linear speedup on multicore systems. Following ! operations can be executed in parallel: ! * gradient calculation over large dataset (if dataset is large enough) ! ! In order to use multicore features you have to: ! * use commercial version of ALGLIB ! * call this function with "smp_" prefix, which indicates that ! multicore code will be used (for multicore support) ! ! In order to use SSE features you have to: ! * use commercial version of ALGLIB on Intel processors ! * use C++ computational core ! ! This note is given for users of commercial edition; if you use GPL ! edition, you still will be able to call smp-version of this function, ! but all computations will be done serially. ! ! We recommend you to carefully read ALGLIB Reference Manual, section ! called 'SMP support', before using parallel version of this function. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. This function performs one more iteration of the training and returns either True (training continues) or False (training stopped). In case True was returned, Network weights are updated according to the current state of the optimization progress. In case False was returned, no additional updates is performed (previous update of the network weights moved us to the final point, and no additional updates is needed). EXAMPLE: > > [initialize network and trainer object] > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > [visualize training progress] > INPUT PARAMETERS: S - trainer object Network - neural network structure, which is used to store current state of the training process. OUTPUT PARAMETERS: Network - weights of the neural network are rewritten by the current approximation. NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. NOTE: It is expected that Network is the same one which was passed to MLPStartTraining() function. However, THIS function checks only following: * that number of network inputs is consistent with trainer object settings * that number of network outputs/classes is consistent with trainer object settings * that number of network weights is the same as number of weights in the network passed to MLPStartTraining() function Exception is thrown when these conditions are violated. It is also expected that you do not change state of the network on your own - the only party who has right to change network during its training is a trainer object. Any attempt to interfere with trainer may lead to unpredictable results. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static bool mlpcontinuetraining(mlptrainer s, mlpbase.multilayerperceptron network) { bool result = new bool(); int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; int i_ = 0; alglib.ap.assert(s.npoints>=0, "MLPContinueTraining: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPContinueTraining: type of input network is not similar to network type in trainer object."); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPContinueTraining: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPContinueTraining: number of outputs in trainer is not equal to number of outputs in the network."); result = mlpcontinuetrainingx(s, s.subset, -1, ref s.ngradbatch, s.session); if( result ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = s.session.network.weights[i_]; } } return result; }
/************************************************************************* Neural network training using modified Levenberg-Marquardt with exact Hessian calculation and regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for small and medium scale problems (hundreds of weights). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, ref int info, mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0,0]; double[,] hmod = new double[0,0]; double[,] z = new double[0,0]; bool spd = new bool(); double nu = 0; double lambdav = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; info = 0; mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if( npoints<=0 || restarts<1 ) { info = -1; return; } if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(network, xy, npoints); g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; hmod = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wt = new double[wcount-1+1]; wx = new double[wcount-1+1]; ebest = math.maxrealnumber; // // Multiple passes // for(pass=1; pass<=restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(network); // // First stage of the hybrid algorithm: LBFGS // for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), wbase, state); minlbfgs.minlbfgssetcond(state, 0, 0, 0, Math.Max(25, wcount)); while( minlbfgs.minlbfgsiteration(state) ) { // // gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref wbase, internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; lambdav = 0.001; nu = 2; while( true ) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for(i=0; i<=wcount-1; i++) { for(i_=0; i_<=wcount-1;i_++) { hmod[i,i_] = h[i,i_]; } hmod[i,i] = hmod[i,i]+lambdav; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky+1; if( !spd ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } densesolver.spdmatrixcholeskysolve(hmod, wcount, true, g, ref solverinfo, solverrep, ref wdir); if( solverinfo<0 ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for(i_=0; i_<=wcount-1;i_++) { xnorm2 += network.weights[i_]*network.weights[i_]; } stepnorm = 0.0; for(i_=0; i_<=wcount-1;i_++) { stepnorm += wdir[i_]*wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(network, xy, npoints)+0.5*decay*xnorm2; if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) ) { break; } if( (double)(enew)>(double)(e) ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, invrep); if( invinfo<=0 ) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } for(i=0; i<=wcount-1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, wt, 1, 0.0, state); minlbfgs.minlbfgssetcond(state, 0, 0, 0, 5); while( minlbfgs.minlbfgsiteration(state) ) { // // gradient // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += state.x[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref g); for(i=0; i<=wcount-1; i++) { state.g[i] = 0; } for(i=0; i<=wcount-1; i++) { v = g[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i=0; i<=wcount-1; i++) { v = decay*network.weights[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref wt, internalrep); // // Accept new position. // Calculate Hessian // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += wt[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Update lambda // lambdav = lambdav*lambdadown; nu = 2; } // // update WBest // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = 0.5*decay*v+mlpbase.mlperror(network, xy, npoints); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* Single-threaded stub. HPC ALGLIB replaces it by multithreaded code. *************************************************************************/ public static bool _pexec_mlpcontinuetraining(mlptrainer s, mlpbase.multilayerperceptron network) { return mlpcontinuetraining(s,network); }
/************************************************************************* Neural network training using early stopping (base algorithm - L-BFGS with regularization). INPUT PARAMETERS: Network - neural network with initialized geometry TrnXY - training set TrnSize - training set size, TrnSize>0 ValXY - validation set ValSize - validation set size, ValSize>0 Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts, either: * strictly positive number - algorithm make specified number of restarts from random position. * -1, in which case algorithm makes exactly one run from the initial state of the network (no randomization). If you don't know what Restarts to choose, choose one one the following: * -1 (deterministic start) * +1 (one random restart) * +5 (moderate amount of random restarts) OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1, ...). * 2, task has been solved, stopping criterion met - sufficiently small step size. Not expected (we use EARLY stopping) but possible and not an error. * 6, task has been solved, stopping criterion met - increasing of validation set error. Rep - training report NOTE: Algorithm stops if validation set error increases for a long enough or step size is small enought (there are task where validation set may decrease for eternity). In any case solution returned corresponds to the minimum of validation set error. -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptraines(mlpbase.multilayerperceptron network, double[,] trnxy, int trnsize, double[,] valxy, int valsize, double decay, int restarts, ref int info, mlpreport rep) { int i = 0; int pass = 0; int nin = 0; int nout = 0; int wcount = 0; double[] w = new double[0]; double[] wbest = new double[0]; double e = 0; double v = 0; double ebest = 0; double[] wfinal = new double[0]; double efinal = 0; int itcnt = 0; int itbest = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double wstep = 0; bool needrandomization = new bool(); int i_ = 0; info = 0; wstep = 0.001; // // Test inputs, parse flags, read network geometry // if( ((trnsize<=0 || valsize<=0) || (restarts<1 && restarts!=-1)) || (double)(decay)<(double)(0) ) { info = -1; return; } if( restarts==-1 ) { needrandomization = false; restarts = 1; } else { needrandomization = true; } mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=trnsize-1; i++) { if( (int)Math.Round(trnxy[i,nin])<0 || (int)Math.Round(trnxy[i,nin])>=nout ) { info = -2; return; } } for(i=0; i<=valsize-1; i++) { if( (int)Math.Round(valxy[i,nin])<0 || (int)Math.Round(valxy[i,nin])>=nout ) { info = -2; return; } } } info = 2; // // Prepare // mlpbase.mlpinitpreprocessor(network, trnxy, trnsize); w = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wfinal = new double[wcount-1+1]; efinal = math.maxrealnumber; for(i=0; i<=wcount-1; i++) { wfinal[i] = 0; } // // Multiple starts // rep.ncholesky = 0; rep.nhess = 0; rep.ngrad = 0; for(pass=1; pass<=restarts; pass++) { // // Process // if( needrandomization ) { mlpbase.mlprandomize(network); } ebest = mlpbase.mlperror(network, valxy, valsize); for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = 0; itcnt = 0; for(i_=0; i_<=wcount-1;i_++) { w[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 10), w, state); minlbfgs.minlbfgssetcond(state, 0.0, 0.0, wstep, 0); minlbfgs.minlbfgssetxrep(state, true); while( minlbfgs.minlbfgsiteration(state) ) { // // Calculate gradient // if( state.needfg ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradnbatch(network, trnxy, trnsize, ref state.f, ref state.g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; } // // Validation set // if( state.xupdated ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } e = mlpbase.mlperror(network, valxy, valsize); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } itbest = itcnt; } if( itcnt>30 && (double)(itcnt)>(double)(1.5*itbest) ) { info = 6; break; } itcnt = itcnt+1; } } minlbfgs.minlbfgsresults(state, ref w, internalrep); // // Compare with final answer // if( (double)(ebest)<(double)(efinal) ) { for(i_=0; i_<=wcount-1;i_++) { wfinal[i_] = wbest[i_]; } efinal = ebest; } } // // The best network // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wfinal[i_]; } }
/************************************************************************* This function trains neural network passed to this function, using current dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset()) and current training settings. Training from NRestarts random starting positions is performed, best network is chosen. This function is inteded to be used internally. It may be used in several settings: * training with ValSubsetSize=0, corresponds to "normal" training with termination criteria based on S.MaxIts (steps count) and S.WStep (step size). Training sample is given by TrnSubset/TrnSubsetSize. * training with ValSubsetSize>0, corresponds to early stopping training with additional MaxIts/WStep stopping criteria. Training sample is given by TrnSubset/TrnSubsetSize, validation sample is given by ValSubset/ ValSubsetSize. -- ALGLIB -- Copyright 13.08.2012 by Bochkanov Sergey *************************************************************************/ private static void mlptrainnetworkx(mlptrainer s, int nrestarts, int algokind, int[] trnsubset, int trnsubsetsize, int[] valsubset, int valsubsetsize, mlpbase.multilayerperceptron network, mlpreport rep, bool isrootcall, alglib.smp.shared_pool sessions) { mlpbase.modelerrors modrep = new mlpbase.modelerrors(); double eval = 0; double ebest = 0; int ngradbatch = 0; int nin = 0; int nout = 0; int wcount = 0; int pcount = 0; int itbest = 0; int itcnt = 0; int ntype = 0; int ttype = 0; bool rndstart = new bool(); int i = 0; int nr0 = 0; int nr1 = 0; mlpreport rep0 = new mlpreport(); mlpreport rep1 = new mlpreport(); bool randomizenetwork = new bool(); double bestrmserror = 0; smlptrnsession psession = null; int i_ = 0; mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); // // Process root call // if( isrootcall ) { // // Check correctness of parameters // alglib.ap.assert(algokind==0 || algokind==-1, "MLPTrainNetworkX: unexpected AlgoKind"); alglib.ap.assert(s.npoints>=0, "MLPTrainNetworkX: internal error - parameter S is not initialized or is spoiled(S.NPoints<0)"); if( s.rcpar ) { ttype = 0; } else { ttype = 1; } if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } alglib.ap.assert(ntype==ttype, "MLPTrainNetworkX: internal error - type of the training network is not similar to network type in trainer object"); alglib.ap.assert(s.nin==nin, "MLPTrainNetworkX: internal error - number of inputs in trainer is not equal to number of inputs in the training network."); alglib.ap.assert(s.nout==nout, "MLPTrainNetworkX: internal error - number of outputs in trainer is not equal to number of outputs in the training network."); alglib.ap.assert(nrestarts>=0, "MLPTrainNetworkX: internal error - NRestarts<0."); alglib.ap.assert(alglib.ap.len(trnsubset)>=trnsubsetsize, "MLPTrainNetworkX: internal error - parameter TrnSubsetSize more than input subset size(Length(TrnSubset)<TrnSubsetSize)"); for(i=0; i<=trnsubsetsize-1; i++) { alglib.ap.assert(trnsubset[i]>=0 && trnsubset[i]<=s.npoints-1, "MLPTrainNetworkX: internal error - parameter TrnSubset contains incorrect index(TrnSubset[I]<0 or TrnSubset[I]>S.NPoints-1)"); } alglib.ap.assert(alglib.ap.len(valsubset)>=valsubsetsize, "MLPTrainNetworkX: internal error - parameter ValSubsetSize more than input subset size(Length(ValSubset)<ValSubsetSize)"); for(i=0; i<=valsubsetsize-1; i++) { alglib.ap.assert(valsubset[i]>=0 && valsubset[i]<=s.npoints-1, "MLPTrainNetworkX: internal error - parameter ValSubset contains incorrect index(ValSubset[I]<0 or ValSubset[I]>S.NPoints-1)"); } // // Train // randomizenetwork = nrestarts>0; initmlptrnsessions(network, randomizenetwork, s, sessions); mlptrainnetworkx(s, nrestarts, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep, false, sessions); // // Choose best network // bestrmserror = math.maxrealnumber; alglib.smp.ae_shared_pool_first_recycled(sessions, ref psession); while( psession!=null ) { if( (double)(psession.bestrmserror)<(double)(bestrmserror) ) { mlpbase.mlpimporttunableparameters(network, psession.bestparameters); bestrmserror = psession.bestrmserror; } alglib.smp.ae_shared_pool_next_recycled(sessions, ref psession); } // // Calculate errors // if( s.datatype==0 ) { mlpbase.mlpallerrorssubset(network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if( s.datatype==1 ) { mlpbase.mlpallerrorssparsesubset(network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } rep.relclserror = modrep.relclserror; rep.avgce = modrep.avgce; rep.rmserror = modrep.rmserror; rep.avgerror = modrep.avgerror; rep.avgrelerror = modrep.avgrelerror; // // Done // return; } // // Split problem, if we have more than 1 restart // if( nrestarts>=2 ) { // // Divide problem with NRestarts into two: NR0 and NR1. // nr0 = nrestarts/2; nr1 = nrestarts-nr0; mlptrainnetworkx(s, nr0, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep0, false, sessions); mlptrainnetworkx(s, nr1, algokind, trnsubset, trnsubsetsize, valsubset, valsubsetsize, network, rep1, false, sessions); // // Aggregate results // rep.ngrad = rep0.ngrad+rep1.ngrad; rep.nhess = rep0.nhess+rep1.nhess; rep.ncholesky = rep0.ncholesky+rep1.ncholesky; // // Done :) // return; } // // Execution with NRestarts=1 or NRestarts=0: // * NRestarts=1 means that network is restarted from random position // * NRestarts=0 means that network is not randomized // alglib.ap.assert(nrestarts==0 || nrestarts==1, "MLPTrainNetworkX: internal error"); rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; alglib.smp.ae_shared_pool_retrieve(sessions, ref psession); if( ((s.datatype==0 || s.datatype==1) && s.npoints>0) && trnsubsetsize!=0 ) { // // Train network using combination of early stopping and step-size // and step-count based criteria. Network state with best value of // validation set error is stored in WBuf0. When validation set is // zero, most recent state of network is stored. // rndstart = nrestarts!=0; ngradbatch = 0; eval = 0; ebest = 0; itbest = 0; itcnt = 0; mlpstarttrainingx(s, rndstart, algokind, trnsubset, trnsubsetsize, psession); if( s.datatype==0 ) { ebest = mlpbase.mlperrorsubset(psession.network, s.densexy, s.npoints, valsubset, valsubsetsize); } if( s.datatype==1 ) { ebest = mlpbase.mlperrorsparsesubset(psession.network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } for(i_=0; i_<=wcount-1;i_++) { psession.wbuf0[i_] = psession.network.weights[i_]; } while( mlpcontinuetrainingx(s, trnsubset, trnsubsetsize, ref ngradbatch, psession) ) { if( s.datatype==0 ) { eval = mlpbase.mlperrorsubset(psession.network, s.densexy, s.npoints, valsubset, valsubsetsize); } if( s.datatype==1 ) { eval = mlpbase.mlperrorsparsesubset(psession.network, s.sparsexy, s.npoints, valsubset, valsubsetsize); } if( (double)(eval)<=(double)(ebest) || valsubsetsize==0 ) { for(i_=0; i_<=wcount-1;i_++) { psession.wbuf0[i_] = psession.network.weights[i_]; } ebest = eval; itbest = itcnt; } if( itcnt>30 && (double)(itcnt)>(double)(1.5*itbest) ) { break; } itcnt = itcnt+1; } for(i_=0; i_<=wcount-1;i_++) { psession.network.weights[i_] = psession.wbuf0[i_]; } rep.ngrad = ngradbatch; } else { for(i=0; i<=wcount-1; i++) { psession.network.weights[i] = 0; } } // // Evaluate network performance and update PSession.BestParameters/BestRMSError // (if needed). // if( s.datatype==0 ) { mlpbase.mlpallerrorssubset(psession.network, s.densexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if( s.datatype==1 ) { mlpbase.mlpallerrorssparsesubset(psession.network, s.sparsexy, s.npoints, trnsubset, trnsubsetsize, modrep); } if( (double)(modrep.rmserror)<(double)(psession.bestrmserror) ) { mlpbase.mlpexporttunableparameters(psession.network, ref psession.bestparameters, ref pcount); psession.bestrmserror = modrep.rmserror; } // // Move session back to pool // alglib.smp.ae_shared_pool_recycle(sessions, ref psession); }
/************************************************************************* Cross-validation estimate of generalization error. Base algorithm - Levenberg-Marquardt. INPUT PARAMETERS: Network - neural network with initialized geometry. Network is not changed during cross-validation - it is used only as a representative of its architecture. XY - training set. SSize - training set size Decay - weight decay, same as in MLPTrainLBFGS Restarts - number of restarts, >0. restarts are counted for each partition separately, so total number of restarts will be Restarts*FoldsCount. FoldsCount - number of folds in k-fold cross-validation, 2<=FoldsCount<=SSize. recommended value: 10. OUTPUT PARAMETERS: Info - return code, same as in MLPTrainLBFGS Rep - report, same as in MLPTrainLM/MLPTrainLBFGS CVRep - generalization error estimates -- ALGLIB -- Copyright 09.12.2007 by Bochkanov Sergey *************************************************************************/ public static void mlpkfoldcvlm(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, int foldscount, ref int info, mlpreport rep, mlpcvreport cvrep) { info = 0; mlpkfoldcvgeneral(network, xy, npoints, decay, restarts, foldscount, true, 0.0, 0, ref info, rep, cvrep); }
/************************************************************************* This function initializes temporaries needed for training session. -- ALGLIB -- Copyright 01.07.2013 by Bochkanov Sergey *************************************************************************/ private static void initmlptrnsession(mlpbase.multilayerperceptron networktrained, bool randomizenetwork, mlptrainer trainer, smlptrnsession session) { int nin = 0; int nout = 0; int wcount = 0; int pcount = 0; int[] dummysubset = new int[0]; // // Prepare network: // * copy input network to Session.Network // * re-initialize preprocessor and weights if RandomizeNetwork=True // mlpbase.mlpcopy(networktrained, session.network); if( randomizenetwork ) { alglib.ap.assert(trainer.datatype==0 || trainer.datatype==1, "InitTemporaries: unexpected Trainer.DataType"); if( trainer.datatype==0 ) { mlpbase.mlpinitpreprocessorsubset(session.network, trainer.densexy, trainer.npoints, dummysubset, -1); } if( trainer.datatype==1 ) { mlpbase.mlpinitpreprocessorsparsesubset(session.network, trainer.sparsexy, trainer.npoints, dummysubset, -1); } mlpbase.mlprandomize(session.network); session.randomizenetwork = true; } else { session.randomizenetwork = false; } // // Determine network geometry and initialize optimizer // mlpbase.mlpproperties(session.network, ref nin, ref nout, ref wcount); minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, trainer.lbfgsfactor), session.network.weights, session.optimizer); minlbfgs.minlbfgssetxrep(session.optimizer, true); // // Create buffers // session.wbuf0 = new double[wcount]; session.wbuf1 = new double[wcount]; // // Initialize session result // mlpbase.mlpexporttunableparameters(session.network, ref session.bestparameters, ref pcount); session.bestrmserror = math.maxrealnumber; }
/************************************************************************* Creates ensemble from network. Only network geometry is copied. -- ALGLIB -- Copyright 17.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreatefromnetwork(mlpbase.multilayerperceptron network, int ensemblesize, mlpensemble ensemble) { int i = 0; int ccount = 0; int wcount = 0; int i_ = 0; int i1_ = 0; alglib.ap.assert(ensemblesize>0, "MLPECreate: incorrect ensemble size!"); // // Copy network // mlpbase.mlpcopy(network, ensemble.network); // // network properties // if( mlpbase.mlpissoftmax(network) ) { ccount = mlpbase.mlpgetinputscount(ensemble.network); } else { ccount = mlpbase.mlpgetinputscount(ensemble.network)+mlpbase.mlpgetoutputscount(ensemble.network); } wcount = mlpbase.mlpgetweightscount(ensemble.network); ensemble.ensemblesize = ensemblesize; // // weights, means, sigmas // ensemble.weights = new double[ensemblesize*wcount]; ensemble.columnmeans = new double[ensemblesize*ccount]; ensemble.columnsigmas = new double[ensemblesize*ccount]; for(i=0; i<=ensemblesize*wcount-1; i++) { ensemble.weights[i] = math.randomreal()-0.5; } for(i=0; i<=ensemblesize-1; i++) { i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnmeans[i_] = network.columnmeans[i_+i1_]; } i1_ = (0) - (i*ccount); for(i_=i*ccount; i_<=(i+1)*ccount-1;i_++) { ensemble.columnsigmas[i_] = network.columnsigmas[i_+i1_]; } } // // temporaries, internal buffers // ensemble.y = new double[mlpbase.mlpgetoutputscount(ensemble.network)]; }
/************************************************************************* IMPORTANT: this is an "expert" version of the MLPTrain() function. We do not recommend you to use it unless you are pretty sure that you need ability to monitor training progress. This function performs step-by-step training of the neural network. Here "step-by-step" means that training starts with MLPStartTraining() call, and then user subsequently calls MLPContinueTraining() to perform one more iteration of the training. After call to this function trainer object remembers network and is ready to train it. However, no training is performed until first call to MLPContinueTraining() function. Subsequent calls to MLPContinueTraining() will advance training progress one iteration further. EXAMPLE: > > ...initialize network and trainer object.... > > MLPStartTraining(Trainer, Network, True) > while MLPContinueTraining(Trainer, Network) do > ...visualize training progress... > INPUT PARAMETERS: S - trainer object Network - neural network. It must have same number of inputs and output/classes as was specified during creation of the trainer object. RandomStart - randomize network before training or not: * True means that network is randomized and its initial state (one which was passed to the trainer object) is lost. * False means that training is started from the current state of the network OUTPUT PARAMETERS: Network - neural network which is ready to training (weights are initialized, preprocessor is initialized using current training set) NOTE: this method uses sum-of-squares error function for training. NOTE: it is expected that trainer object settings are NOT changed during step-by-step training, i.e. no one changes stopping criteria or training set during training. It is possible and there is no defense against such actions, but algorithm behavior in such cases is undefined and can be unpredictable. -- ALGLIB -- Copyright 23.07.2012 by Bochkanov Sergey *************************************************************************/ public static void mlpstarttraining(mlptrainer s, mlpbase.multilayerperceptron network, bool randomstart) { int nin = 0; int nout = 0; int wcount = 0; int ntype = 0; int ttype = 0; alglib.ap.assert(s.npoints>=0, "MLPStartTraining: parameter S is not initialized or is spoiled(S.NPoints<0)"); if( !mlpbase.mlpissoftmax(network) ) { ntype = 0; } else { ntype = 1; } if( s.rcpar ) { ttype = 0; } else { ttype = 1; } alglib.ap.assert(ntype==ttype, "MLPStartTraining: type of input network is not similar to network type in trainer object"); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); alglib.ap.assert(s.nin==nin, "MLPStartTraining: number of inputs in trainer is not equal to number of inputs in the network."); alglib.ap.assert(s.nout==nout, "MLPStartTraining: number of outputs in trainer is not equal to number of outputs in the network."); // // Create LBFGS optimizer // minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, s.lbfgsfactor), network.weights, s.tstate); minlbfgs.minlbfgssetcond(s.tstate, 0.0, 0.0, s.wstep, s.maxits); minlbfgs.minlbfgssetxrep(s.tstate, true); // // Create duplicate of the network // mlpbase.mlpcopy(network, s.tnetwork); // // Train network // mlpstarttrainingx(s, network, s.tnetwork, s.tstate, randomstart, s.subset, -1); }