/************************************************************************* Like MLPCreate0, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreate0(int nin, int nout, int ensemblesize, ref mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreate0(nin, nout, ref net); mlpecreatefromnetwork(ref net, ensemblesize, ref ensemble); }
/************************************************************************* Like MLPCreateR2, but for ensembles. -- ALGLIB -- Copyright 18.02.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpecreater2(int nin, int nhid1, int nhid2, int nout, double a, double b, int ensemblesize, ref mlpensemble ensemble) { mlpbase.multilayerperceptron net = new mlpbase.multilayerperceptron(); mlpbase.mlpcreater2(nin, nhid1, nhid2, nout, a, b, ref net); mlpecreatefromnetwork(ref net, ensemblesize, ref ensemble); }
/************************************************************************* Internal cross-validation subroutine *************************************************************************/ private static void mlpkfoldcvgeneral(ref mlpbase.multilayerperceptron n, ref double[,] xy, int npoints, double decay, int restarts, int foldscount, bool lmalgorithm, double wstep, int maxits, ref int info, ref mlpreport rep, ref mlpcvreport cvrep) { int i = 0; int fold = 0; int j = 0; int k = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int rowlen = 0; int wcount = 0; int nclasses = 0; int tssize = 0; int cvssize = 0; double[,] cvset = new double[0,0]; double[,] testset = new double[0,0]; int[] folds = new int[0]; int relcnt = 0; mlpreport internalrep = new mlpreport(); double[] x = new double[0]; double[] y = new double[0]; int i_ = 0; // // Read network geometry, test parameters // mlpbase.mlpproperties(ref n, ref nin, ref nout, ref wcount); if( mlpbase.mlpissoftmax(ref n) ) { nclasses = nout; rowlen = nin+1; } else { nclasses = -nout; rowlen = nin+nout; } if( npoints<=0 | foldscount<2 | foldscount>npoints ) { info = -1; return; } mlpbase.mlpcopy(ref n, ref network); // // K-fold out cross-validation. // First, estimate generalization error // testset = new double[npoints-1+1, rowlen-1+1]; cvset = new double[npoints-1+1, rowlen-1+1]; x = new double[nin-1+1]; y = new double[nout-1+1]; mlpkfoldsplit(ref xy, npoints, nclasses, foldscount, false, ref folds); cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; relcnt = 0; for(fold=0; fold<=foldscount-1; fold++) { // // Separate set // tssize = 0; cvssize = 0; for(i=0; i<=npoints-1; i++) { if( folds[i]==fold ) { for(i_=0; i_<=rowlen-1;i_++) { testset[tssize,i_] = xy[i,i_]; } tssize = tssize+1; } else { for(i_=0; i_<=rowlen-1;i_++) { cvset[cvssize,i_] = xy[i,i_]; } cvssize = cvssize+1; } } // // Train on CV training set // if( lmalgorithm ) { mlptrainlm(ref network, ref cvset, cvssize, decay, restarts, ref info, ref internalrep); } else { mlptrainlbfgs(ref network, ref cvset, cvssize, decay, restarts, wstep, maxits, ref info, ref internalrep); } if( info<0 ) { cvrep.relclserror = 0; cvrep.avgce = 0; cvrep.rmserror = 0; cvrep.avgerror = 0; cvrep.avgrelerror = 0; return; } rep.ngrad = rep.ngrad+internalrep.ngrad; rep.nhess = rep.nhess+internalrep.nhess; rep.ncholesky = rep.ncholesky+internalrep.ncholesky; // // Estimate error using CV test set // if( mlpbase.mlpissoftmax(ref network) ) { // // classification-only code // cvrep.relclserror = cvrep.relclserror+mlpbase.mlpclserror(ref network, ref testset, tssize); cvrep.avgce = cvrep.avgce+mlpbase.mlperrorn(ref network, ref testset, tssize); } for(i=0; i<=tssize-1; i++) { for(i_=0; i_<=nin-1;i_++) { x[i_] = testset[i,i_]; } mlpbase.mlpprocess(ref network, ref x, ref y); if( mlpbase.mlpissoftmax(ref network) ) { // // Classification-specific code // k = (int)Math.Round(testset[i,nin]); for(j=0; j<=nout-1; j++) { if( j==k ) { cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-1); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-1); cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs(y[j]-1); relcnt = relcnt+1; } else { cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]); } } } else { // // Regression-specific code // for(j=0; j<=nout-1; j++) { cvrep.rmserror = cvrep.rmserror+AP.Math.Sqr(y[j]-testset[i,nin+j]); cvrep.avgerror = cvrep.avgerror+Math.Abs(y[j]-testset[i,nin+j]); if( (double)(testset[i,nin+j])!=(double)(0) ) { cvrep.avgrelerror = cvrep.avgrelerror+Math.Abs((y[j]-testset[i,nin+j])/testset[i,nin+j]); relcnt = relcnt+1; } } } } } if( mlpbase.mlpissoftmax(ref network) ) { cvrep.relclserror = cvrep.relclserror/npoints; cvrep.avgce = cvrep.avgce/(Math.Log(2)*npoints); } cvrep.rmserror = Math.Sqrt(cvrep.rmserror/(npoints*nout)); cvrep.avgerror = cvrep.avgerror/(npoints*nout); cvrep.avgrelerror = cvrep.avgrelerror/relcnt; info = 1; }
/************************************************************************* Internal bagging subroutine. -- ALGLIB -- Copyright 19.02.2009 by Bochkanov Sergey *************************************************************************/ private static void mlpebagginginternal(ref mlpensemble ensemble, ref double[,] xy, int npoints, double decay, int restarts, double wstep, int maxits, bool lmalgorithm, ref int info, ref mlptrain.mlpreport rep, ref mlptrain.mlpcvreport ooberrors) { double[,] xys = new double[0,0]; bool[] s = new bool[0]; double[,] oobbuf = new double[0,0]; int[] oobcntbuf = new int[0]; double[] x = new double[0]; double[] y = new double[0]; double[] dy = new double[0]; double[] dsbuf = new double[0]; int nin = 0; int nout = 0; int ccnt = 0; int pcnt = 0; int i = 0; int j = 0; int k = 0; double v = 0; mlptrain.mlpreport tmprep = new mlptrain.mlpreport(); mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int i_ = 0; int i1_ = 0; // // Test for inputs // if( !lmalgorithm & (double)(wstep)==(double)(0) & maxits==0 ) { info = -8; return; } if( npoints<=0 | restarts<1 | (double)(wstep)<(double)(0) | maxits<0 ) { info = -1; return; } if( ensemble.issoftmax ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,ensemble.nin])<0 | (int)Math.Round(xy[i,ensemble.nin])>=ensemble.nout ) { info = -2; return; } } } // // allocate temporaries // info = 2; rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; ooberrors.relclserror = 0; ooberrors.avgce = 0; ooberrors.rmserror = 0; ooberrors.avgerror = 0; ooberrors.avgrelerror = 0; nin = ensemble.nin; nout = ensemble.nout; if( ensemble.issoftmax ) { ccnt = nin+1; pcnt = nin; } else { ccnt = nin+nout; pcnt = nin+nout; } xys = new double[npoints-1+1, ccnt-1+1]; s = new bool[npoints-1+1]; oobbuf = new double[npoints-1+1, nout-1+1]; oobcntbuf = new int[npoints-1+1]; x = new double[nin-1+1]; y = new double[nout-1+1]; if( ensemble.issoftmax ) { dy = new double[0+1]; } else { dy = new double[nout-1+1]; } for(i=0; i<=npoints-1; i++) { for(j=0; j<=nout-1; j++) { oobbuf[i,j] = 0; } } for(i=0; i<=npoints-1; i++) { oobcntbuf[i] = 0; } mlpbase.mlpunserialize(ref ensemble.serializedmlp, ref network); // // main bagging cycle // for(k=0; k<=ensemble.ensemblesize-1; k++) { // // prepare dataset // for(i=0; i<=npoints-1; i++) { s[i] = false; } for(i=0; i<=npoints-1; i++) { j = AP.Math.RandomInteger(npoints); s[j] = true; for(i_=0; i_<=ccnt-1;i_++) { xys[i,i_] = xy[j,i_]; } } // // train // if( lmalgorithm ) { mlptrain.mlptrainlm(ref network, ref xys, npoints, decay, restarts, ref info, ref tmprep); } else { mlptrain.mlptrainlbfgs(ref network, ref xys, npoints, decay, restarts, wstep, maxits, ref info, ref tmprep); } if( info<0 ) { return; } // // save results // rep.ngrad = rep.ngrad+tmprep.ngrad; rep.nhess = rep.nhess+tmprep.nhess; rep.ncholesky = rep.ncholesky+tmprep.ncholesky; i1_ = (0) - (k*ensemble.wcount); for(i_=k*ensemble.wcount; i_<=(k+1)*ensemble.wcount-1;i_++) { ensemble.weights[i_] = network.weights[i_+i1_]; } i1_ = (0) - (k*pcnt); for(i_=k*pcnt; i_<=(k+1)*pcnt-1;i_++) { ensemble.columnmeans[i_] = network.columnmeans[i_+i1_]; } i1_ = (0) - (k*pcnt); for(i_=k*pcnt; i_<=(k+1)*pcnt-1;i_++) { ensemble.columnsigmas[i_] = network.columnsigmas[i_+i1_]; } // // OOB estimates // for(i=0; i<=npoints-1; i++) { if( !s[i] ) { for(i_=0; i_<=nin-1;i_++) { x[i_] = xy[i,i_]; } mlpbase.mlpprocess(ref network, ref x, ref y); for(i_=0; i_<=nout-1;i_++) { oobbuf[i,i_] = oobbuf[i,i_] + y[i_]; } oobcntbuf[i] = oobcntbuf[i]+1; } } } // // OOB estimates // if( ensemble.issoftmax ) { bdss.dserrallocate(nout, ref dsbuf); } else { bdss.dserrallocate(-nout, ref dsbuf); } for(i=0; i<=npoints-1; i++) { if( oobcntbuf[i]!=0 ) { v = (double)(1)/(double)(oobcntbuf[i]); for(i_=0; i_<=nout-1;i_++) { y[i_] = v*oobbuf[i,i_]; } if( ensemble.issoftmax ) { dy[0] = xy[i,nin]; } else { i1_ = (nin) - (0); for(i_=0; i_<=nout-1;i_++) { dy[i_] = v*xy[i,i_+i1_]; } } bdss.dserraccumulate(ref dsbuf, ref y, ref dy); } } bdss.dserrfinish(ref dsbuf); ooberrors.relclserror = dsbuf[0]; ooberrors.avgce = dsbuf[1]; ooberrors.rmserror = dsbuf[2]; ooberrors.avgerror = dsbuf[3]; ooberrors.avgrelerror = dsbuf[4]; }
/************************************************************************* This subroutine trains logit model. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars] First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1, NClasses<2). * 1, if task has been solved LM - model built Rep - training report -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnltrainh(ref double[,] xy, int npoints, int nvars, int nclasses, ref int info, ref logitmodel lm, ref mnlreport rep) { int i = 0; int j = 0; int k = 0; int ssize = 0; bool allsame = new bool(); int offs = 0; double threshold = 0; double wminstep = 0; double decay = 0; int wdim = 0; int expoffs = 0; double v = 0; double s = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int wcount = 0; double e = 0; double[] g = new double[0]; double[,] h = new double[0,0]; bool spd = new bool(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double wstep = 0; double[] wdir = new double[0]; double[] work = new double[0]; int mcstage = 0; logitmcstate mcstate = new logitmcstate(); int mcinfo = 0; int mcnfev = 0; int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; int i1_ = 0; threshold = 1000*AP.Math.MachineEpsilon; wminstep = 0.001; decay = 0.001; // // Test for inputs // if( npoints<nvars+2 | nvars<1 | nclasses<2 ) { info = -1; return; } for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])<0 | (int)Math.Round(xy[i,nvars])>=nclasses ) { info = -2; return; } } info = 1; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; // // Allocate array // wdim = (nvars+1)*(nclasses-1); offs = 5; expoffs = offs+wdim; ssize = 5+(nvars+1)*(nclasses-1)+nclasses; lm.w = new double[ssize-1+1]; lm.w[0] = ssize; lm.w[1] = logitvnum; lm.w[2] = nvars; lm.w[3] = nclasses; lm.w[4] = offs; // // Degenerate case: all outputs are equal // allsame = true; for(i=1; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])!=(int)Math.Round(xy[i-1,nvars]) ) { allsame = false; } } if( allsame ) { for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++) { lm.w[offs+i] = 0; } v = -(2*Math.Log(AP.Math.MinRealNumber)); k = (int)Math.Round(xy[0,nvars]); if( k==nclasses-1 ) { for(i=0; i<=nclasses-2; i++) { lm.w[offs+i*(nvars+1)+nvars] = -v; } } else { for(i=0; i<=nclasses-2; i++) { if( i==k ) { lm.w[offs+i*(nvars+1)+nvars] = +v; } else { lm.w[offs+i*(nvars+1)+nvars] = 0; } } } return; } // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpcreatec0(nvars, nclasses, ref network); mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); for(i=0; i<=wcount-1; i++) { network.weights[i] = (2*AP.Math.RandomReal()-1)/nvars; } g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; work = new double[wcount-1+1]; // // First stage: optimize in gradient direction. // for(k=0; k<=wcount/3+10; k++) { // // Calculate gradient in starting point // mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; // // Setup optimization scheme // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, ref wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, ref mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, ref wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, ref mcstate, ref mcstage); } } // // Second stage: use Hessian when we are close to the minimum // while( true ) { // // Calculate and update E/G/H // mlpbase.mlphessiannbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Select step direction // NOTE: it is important to use lower-triangle Cholesky // factorization since it is much faster than higher-triangle version. // spd = trfac.spdmatrixcholesky(ref h, wcount, false); densesolver.spdmatrixcholeskysolve(ref h, wcount, false, ref g, ref solverinfo, ref solverrep, ref wdir); spd = solverinfo>0; if( spd ) { // // H is positive definite. // Step in Newton direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } spd = true; } else { // // H is indefinite. // Step in gradient direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } spd = false; } // // Optimize in WDir direction // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, ref wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, ref mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, ref wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, ref mcstate, ref mcstage); } if( spd & (mcinfo==2 | mcinfo==4 | mcinfo==6) ) { break; } } // // Convert from NN format to MNL format // i1_ = (0) - (offs); for(i_=offs; i_<=offs+wcount-1;i_++) { lm.w[i_] = network.weights[i_+i1_]; } for(k=0; k<=nvars-1; k++) { for(i=0; i<=nclasses-2; i++) { s = network.columnsigmas[k]; if( (double)(s)==(double)(0) ) { s = 1; } j = offs+(nvars+1)*i; v = lm.w[j+k]; lm.w[j+k] = v/s; lm.w[j+nvars] = lm.w[j+nvars]+v*network.columnmeans[k]/s; } } for(k=0; k<=nclasses-2; k++) { lm.w[offs+(nvars+1)*k+nvars] = -lm.w[offs+(nvars+1)*k+nvars]; } }
public static bool testmlp(bool silent) { bool result = new bool(); bool waserrors = new bool(); int passcount = 0; int maxn = 0; int maxhid = 0; int info = 0; int nf = 0; int nhid = 0; int nl = 0; int nhid1 = 0; int nhid2 = 0; int nkind = 0; int i = 0; int j = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); mlpbase.multilayerperceptron network2 = new mlpbase.multilayerperceptron(); mlptrain.mlpreport rep = new mlptrain.mlpreport(); mlptrain.mlpcvreport cvrep = new mlptrain.mlpcvreport(); int ncount = 0; double[,] xy = new double[0,0]; double[,] valxy = new double[0,0]; int ssize = 0; int valsize = 0; bool allsame = new bool(); bool inferrors = new bool(); bool procerrors = new bool(); bool graderrors = new bool(); bool hesserrors = new bool(); bool trnerrors = new bool(); waserrors = false; inferrors = false; procerrors = false; graderrors = false; hesserrors = false; trnerrors = false; passcount = 10; maxn = 4; maxhid = 4; // // General multilayer network tests // for(nf=1; nf<=maxn; nf++) { for(nl=1; nl<=maxn; nl++) { for(nhid1=0; nhid1<=maxhid; nhid1++) { for(nhid2=0; nhid2<=0; nhid2++) { for(nkind=0; nkind<=3; nkind++) { // // Skip meaningless parameters combinations // if( nkind==1 & nl<2 ) { continue; } if( nhid1==0 & nhid2!=0 ) { continue; } // // Tests // testinformational(nkind, nf, nhid1, nhid2, nl, passcount, ref inferrors); testprocessing(nkind, nf, nhid1, nhid2, nl, passcount, ref procerrors); testgradient(nkind, nf, nhid1, nhid2, nl, passcount, ref graderrors); testhessian(nkind, nf, nhid1, nhid2, nl, passcount, ref hesserrors); } } } } } // // Test network training on simple XOR problem // xy = new double[3+1, 2+1]; xy[0,0] = -1; xy[0,1] = -1; xy[0,2] = -1; xy[1,0] = +1; xy[1,1] = -1; xy[1,2] = +1; xy[2,0] = -1; xy[2,1] = +1; xy[2,2] = +1; xy[3,0] = +1; xy[3,1] = +1; xy[3,2] = -1; mlpbase.mlpcreate1(2, 2, 1, ref network); mlptrain.mlptrainlm(ref network, ref xy, 4, 0.001, 10, ref info, ref rep); trnerrors = trnerrors | (double)(mlpbase.mlprmserror(ref network, ref xy, 4))>(double)(0.1); // // Test CV on random noisy problem // ncount = 100; xy = new double[ncount-1+1, 1+1]; for(i=0; i<=ncount-1; i++) { xy[i,0] = 2*AP.Math.RandomReal()-1; xy[i,1] = AP.Math.RandomInteger(4); } mlpbase.mlpcreatec0(1, 4, ref network); mlptrain.mlpkfoldcvlm(ref network, ref xy, ncount, 0.001, 5, 10, ref info, ref rep, ref cvrep); // // Final report // waserrors = inferrors | procerrors | graderrors | hesserrors | trnerrors; if( !silent ) { System.Console.Write("MLP TEST"); System.Console.WriteLine(); System.Console.Write("INFORMATIONAL FUNCTIONS: "); if( !inferrors ) { System.Console.Write("OK"); System.Console.WriteLine(); } else { System.Console.Write("FAILED"); System.Console.WriteLine(); } System.Console.Write("BASIC PROCESSING: "); if( !procerrors ) { System.Console.Write("OK"); System.Console.WriteLine(); } else { System.Console.Write("FAILED"); System.Console.WriteLine(); } System.Console.Write("GRADIENT CALCULATION: "); if( !graderrors ) { System.Console.Write("OK"); System.Console.WriteLine(); } else { System.Console.Write("FAILED"); System.Console.WriteLine(); } System.Console.Write("HESSIAN CALCULATION: "); if( !hesserrors ) { System.Console.Write("OK"); System.Console.WriteLine(); } else { System.Console.Write("FAILED"); System.Console.WriteLine(); } System.Console.Write("TRAINING: "); if( !trnerrors ) { System.Console.Write("OK"); System.Console.WriteLine(); } else { System.Console.Write("FAILED"); System.Console.WriteLine(); } if( waserrors ) { System.Console.Write("TEST SUMMARY: FAILED"); System.Console.WriteLine(); } else { System.Console.Write("TEST SUMMARY: PASSED"); System.Console.WriteLine(); } System.Console.WriteLine(); System.Console.WriteLine(); } result = !waserrors; return result; }
/************************************************************************* Gradient functions test *************************************************************************/ private static void testgradient(int nkind, int nin, int nhid1, int nhid2, int nout, int passcount, ref bool err) { mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); mlpbase.multilayerperceptron network2 = new mlpbase.multilayerperceptron(); int n1 = 0; int n2 = 0; int wcount = 0; bool zeronet = new bool(); double h = 0; double etol = 0; double a1 = 0; double a2 = 0; int pass = 0; int i = 0; int j = 0; int k = 0; bool allsame = new bool(); int ilen = 0; int rlen = 0; int ssize = 0; double[,] xy = new double[0,0]; double[] grad1 = new double[0]; double[] grad2 = new double[0]; double[] x = new double[0]; double[] y = new double[0]; double[] x1 = new double[0]; double[] x2 = new double[0]; double[] y1 = new double[0]; double[] y2 = new double[0]; int[] ia = new int[0]; double[] ra = new double[0]; double v = 0; double e = 0; double e1 = 0; double e2 = 0; double v1 = 0; double v2 = 0; double v3 = 0; double v4 = 0; double wprev = 0; int i_ = 0; int i1_ = 0; System.Diagnostics.Debug.Assert(passcount>=2, "PassCount<2!"); a1 = 0; a2 = 0; if( nkind==2 ) { a1 = 1000*AP.Math.RandomReal()-500; a2 = 2*AP.Math.RandomReal()-1; } if( nkind==3 ) { a1 = 1000*AP.Math.RandomReal()-500; a2 = a1+(2*AP.Math.RandomInteger(2)-1)*(0.1+0.9*AP.Math.RandomReal()); } createnetwork(ref network, nkind, a1, a2, nin, nhid1, nhid2, nout); mlpbase.mlpproperties(ref network, ref n1, ref n2, ref wcount); h = 0.0001; etol = 0.01; // // Initialize // x = new double[nin-1+1]; x1 = new double[nin-1+1]; x2 = new double[nin-1+1]; y = new double[nout-1+1]; y1 = new double[nout-1+1]; y2 = new double[nout-1+1]; grad1 = new double[wcount-1+1]; grad2 = new double[wcount-1+1]; // // Process // for(pass=1; pass<=passcount; pass++) { mlpbase.mlprandomizefull(ref network); // // Test error/gradient calculation (least squares) // xy = new double[0+1, nin+nout-1+1]; for(i=0; i<=nin-1; i++) { x[i] = 4*AP.Math.RandomReal()-2; } for(i_=0; i_<=nin-1;i_++) { xy[0,i_] = x[i_]; } if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=nout-1; i++) { y[i] = 0; } xy[0,nin] = AP.Math.RandomInteger(nout); y[(int)Math.Round(xy[0,nin])] = 1; } else { for(i=0; i<=nout-1; i++) { y[i] = 4*AP.Math.RandomReal()-2; } i1_ = (0) - (nin); for(i_=nin; i_<=nin+nout-1;i_++) { xy[0,i_] = y[i_+i1_]; } } mlpbase.mlpgrad(ref network, ref x, ref y, ref e, ref grad2); mlpbase.mlpprocess(ref network, ref x, ref y2); for(i_=0; i_<=nout-1;i_++) { y2[i_] = y2[i_] - y[i_]; } v = 0.0; for(i_=0; i_<=nout-1;i_++) { v += y2[i_]*y2[i_]; } v = v/2; err = err | (double)(Math.Abs((v-e)/v))>(double)(etol); err = err | (double)(Math.Abs((mlpbase.mlperror(ref network, ref xy, 1)-v)/v))>(double)(etol); for(i=0; i<=wcount-1; i++) { wprev = network.weights[i]; network.weights[i] = wprev-2*h; mlpbase.mlpprocess(ref network, ref x, ref y1); for(i_=0; i_<=nout-1;i_++) { y1[i_] = y1[i_] - y[i_]; } v1 = 0.0; for(i_=0; i_<=nout-1;i_++) { v1 += y1[i_]*y1[i_]; } v1 = v1/2; network.weights[i] = wprev-h; mlpbase.mlpprocess(ref network, ref x, ref y1); for(i_=0; i_<=nout-1;i_++) { y1[i_] = y1[i_] - y[i_]; } v2 = 0.0; for(i_=0; i_<=nout-1;i_++) { v2 += y1[i_]*y1[i_]; } v2 = v2/2; network.weights[i] = wprev+h; mlpbase.mlpprocess(ref network, ref x, ref y1); for(i_=0; i_<=nout-1;i_++) { y1[i_] = y1[i_] - y[i_]; } v3 = 0.0; for(i_=0; i_<=nout-1;i_++) { v3 += y1[i_]*y1[i_]; } v3 = v3/2; network.weights[i] = wprev+2*h; mlpbase.mlpprocess(ref network, ref x, ref y1); for(i_=0; i_<=nout-1;i_++) { y1[i_] = y1[i_] - y[i_]; } v4 = 0.0; for(i_=0; i_<=nout-1;i_++) { v4 += y1[i_]*y1[i_]; } v4 = v4/2; network.weights[i] = wprev; grad1[i] = (v1-8*v2+8*v3-v4)/(12*h); if( (double)(Math.Abs(grad1[i]))>(double)(1.0E-3) ) { err = err | (double)(Math.Abs((grad2[i]-grad1[i])/grad1[i]))>(double)(etol); } else { err = err | (double)(Math.Abs(grad2[i]-grad1[i]))>(double)(etol); } } // // Test error/gradient calculation (natural). // Testing on non-random structure networks // (because NKind is representative only in that case). // xy = new double[0+1, nin+nout-1+1]; for(i=0; i<=nin-1; i++) { x[i] = 4*AP.Math.RandomReal()-2; } for(i_=0; i_<=nin-1;i_++) { xy[0,i_] = x[i_]; } if( mlpbase.mlpissoftmax(ref network) ) { for(i=0; i<=nout-1; i++) { y[i] = 0; } xy[0,nin] = AP.Math.RandomInteger(nout); y[(int)Math.Round(xy[0,nin])] = 1; } else { for(i=0; i<=nout-1; i++) { y[i] = 4*AP.Math.RandomReal()-2; } i1_ = (0) - (nin); for(i_=nin; i_<=nin+nout-1;i_++) { xy[0,i_] = y[i_+i1_]; } } mlpbase.mlpgradn(ref network, ref x, ref y, ref e, ref grad2); mlpbase.mlpprocess(ref network, ref x, ref y2); v = 0; if( nkind!=1 ) { for(i=0; i<=nout-1; i++) { v = v+0.5*AP.Math.Sqr(y2[i]-y[i]); } } else { for(i=0; i<=nout-1; i++) { if( (double)(y[i])!=(double)(0) ) { if( (double)(y2[i])==(double)(0) ) { v = v+y[i]*Math.Log(AP.Math.MaxRealNumber); } else { v = v+y[i]*Math.Log(y[i]/y2[i]); } } } } err = err | (double)(Math.Abs((v-e)/v))>(double)(etol); err = err | (double)(Math.Abs((mlpbase.mlperrorn(ref network, ref xy, 1)-v)/v))>(double)(etol); for(i=0; i<=wcount-1; i++) { wprev = network.weights[i]; network.weights[i] = wprev+h; mlpbase.mlpprocess(ref network, ref x, ref y2); network.weights[i] = wprev-h; mlpbase.mlpprocess(ref network, ref x, ref y1); network.weights[i] = wprev; v = 0; if( nkind!=1 ) { for(j=0; j<=nout-1; j++) { v = v+0.5*(AP.Math.Sqr(y2[j]-y[j])-AP.Math.Sqr(y1[j]-y[j]))/(2*h); } } else { for(j=0; j<=nout-1; j++) { if( (double)(y[j])!=(double)(0) ) { if( (double)(y2[j])==(double)(0) ) { v = v+y[j]*Math.Log(AP.Math.MaxRealNumber); } else { v = v+y[j]*Math.Log(y[j]/y2[j]); } if( (double)(y1[j])==(double)(0) ) { v = v-y[j]*Math.Log(AP.Math.MaxRealNumber); } else { v = v-y[j]*Math.Log(y[j]/y1[j]); } } } v = v/(2*h); } grad1[i] = v; if( (double)(Math.Abs(grad1[i]))>(double)(1.0E-3) ) { err = err | (double)(Math.Abs((grad2[i]-grad1[i])/grad1[i]))>(double)(etol); } else { err = err | (double)(Math.Abs(grad2[i]-grad1[i]))>(double)(etol); } } // // Test gradient calculation: batch (least squares) // ssize = 1+AP.Math.RandomInteger(10); xy = new double[ssize-1+1, nin+nout-1+1]; for(i=0; i<=wcount-1; i++) { grad1[i] = 0; } e1 = 0; for(i=0; i<=ssize-1; i++) { for(j=0; j<=nin-1; j++) { x1[j] = 4*AP.Math.RandomReal()-2; } for(i_=0; i_<=nin-1;i_++) { xy[i,i_] = x1[i_]; } if( mlpbase.mlpissoftmax(ref network) ) { for(j=0; j<=nout-1; j++) { y1[j] = 0; } xy[i,nin] = AP.Math.RandomInteger(nout); y1[(int)Math.Round(xy[i,nin])] = 1; } else { for(j=0; j<=nout-1; j++) { y1[j] = 4*AP.Math.RandomReal()-2; } i1_ = (0) - (nin); for(i_=nin; i_<=nin+nout-1;i_++) { xy[i,i_] = y1[i_+i1_]; } } mlpbase.mlpgrad(ref network, ref x1, ref y1, ref v, ref grad2); e1 = e1+v; for(i_=0; i_<=wcount-1;i_++) { grad1[i_] = grad1[i_] + grad2[i_]; } } mlpbase.mlpgradbatch(ref network, ref xy, ssize, ref e2, ref grad2); err = err | (double)(Math.Abs(e1-e2)/e1)>(double)(0.01); for(i=0; i<=wcount-1; i++) { if( (double)(grad1[i])!=(double)(0) ) { err = err | (double)(Math.Abs((grad2[i]-grad1[i])/grad1[i]))>(double)(etol); } else { err = err | (double)(grad2[i])!=(double)(grad1[i]); } } // // Test gradient calculation: batch (natural error func) // ssize = 1+AP.Math.RandomInteger(10); xy = new double[ssize-1+1, nin+nout-1+1]; for(i=0; i<=wcount-1; i++) { grad1[i] = 0; } e1 = 0; for(i=0; i<=ssize-1; i++) { for(j=0; j<=nin-1; j++) { x1[j] = 4*AP.Math.RandomReal()-2; } for(i_=0; i_<=nin-1;i_++) { xy[i,i_] = x1[i_]; } if( mlpbase.mlpissoftmax(ref network) ) { for(j=0; j<=nout-1; j++) { y1[j] = 0; } xy[i,nin] = AP.Math.RandomInteger(nout); y1[(int)Math.Round(xy[i,nin])] = 1; } else { for(j=0; j<=nout-1; j++) { y1[j] = 4*AP.Math.RandomReal()-2; } i1_ = (0) - (nin); for(i_=nin; i_<=nin+nout-1;i_++) { xy[i,i_] = y1[i_+i1_]; } } mlpbase.mlpgradn(ref network, ref x1, ref y1, ref v, ref grad2); e1 = e1+v; for(i_=0; i_<=wcount-1;i_++) { grad1[i_] = grad1[i_] + grad2[i_]; } } mlpbase.mlpgradnbatch(ref network, ref xy, ssize, ref e2, ref grad2); err = err | (double)(Math.Abs(e1-e2)/e1)>(double)(etol); for(i=0; i<=wcount-1; i++) { if( (double)(grad1[i])!=(double)(0) ) { err = err | (double)(Math.Abs((grad2[i]-grad1[i])/grad1[i]))>(double)(etol); } else { err = err | (double)(grad2[i])!=(double)(grad1[i]); } } } }
/************************************************************************* Processing functions test *************************************************************************/ private static void testprocessing(int nkind, int nin, int nhid1, int nhid2, int nout, int passcount, ref bool err) { mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); mlpbase.multilayerperceptron network2 = new mlpbase.multilayerperceptron(); int n1 = 0; int n2 = 0; int wcount = 0; bool zeronet = new bool(); double a1 = 0; double a2 = 0; int pass = 0; int i = 0; bool allsame = new bool(); int rlen = 0; double[] x1 = new double[0]; double[] x2 = new double[0]; double[] y1 = new double[0]; double[] y2 = new double[0]; double[] ra = new double[0]; double[] ra2 = new double[0]; double v = 0; int i_ = 0; System.Diagnostics.Debug.Assert(passcount>=2, "PassCount<2!"); // // Prepare network // a1 = 0; a2 = 0; if( nkind==2 ) { a1 = 1000*AP.Math.RandomReal()-500; a2 = 2*AP.Math.RandomReal()-1; } if( nkind==3 ) { a1 = 1000*AP.Math.RandomReal()-500; a2 = a1+(2*AP.Math.RandomInteger(2)-1)*(0.1+0.9*AP.Math.RandomReal()); } createnetwork(ref network, nkind, a1, a2, nin, nhid1, nhid2, nout); mlpbase.mlpproperties(ref network, ref n1, ref n2, ref wcount); // // Initialize arrays // x1 = new double[nin-1+1]; x2 = new double[nin-1+1]; y1 = new double[nout-1+1]; y2 = new double[nout-1+1]; // // Main cycle // for(pass=1; pass<=passcount; pass++) { // // Last run is made on zero network // mlpbase.mlprandomizefull(ref network); zeronet = false; if( pass==passcount ) { for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = 0*network.weights[i_]; } zeronet = true; } // // Same inputs leads to same outputs // for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; x2[i] = x1[i]; } for(i=0; i<=nout-1; i++) { y1[i] = 2*AP.Math.RandomReal()-1; y2[i] = 2*AP.Math.RandomReal()-1; } mlpbase.mlpprocess(ref network, ref x1, ref y1); mlpbase.mlpprocess(ref network, ref x2, ref y2); allsame = true; for(i=0; i<=nout-1; i++) { allsame = allsame & (double)(y1[i])==(double)(y2[i]); } err = err | !allsame; // // Same inputs on original network leads to same outputs // on copy created using MLPCopy // unsetnetwork(ref network2); mlpbase.mlpcopy(ref network, ref network2); for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; x2[i] = x1[i]; } for(i=0; i<=nout-1; i++) { y1[i] = 2*AP.Math.RandomReal()-1; y2[i] = 2*AP.Math.RandomReal()-1; } mlpbase.mlpprocess(ref network, ref x1, ref y1); mlpbase.mlpprocess(ref network2, ref x2, ref y2); allsame = true; for(i=0; i<=nout-1; i++) { allsame = allsame & (double)(y1[i])==(double)(y2[i]); } err = err | !allsame; // // Same inputs on original network leads to same outputs // on copy created using MLPSerialize // unsetnetwork(ref network2); mlpbase.mlpserialize(ref network, ref ra, ref rlen); ra2 = new double[rlen-1+1]; for(i=0; i<=rlen-1; i++) { ra2[i] = ra[i]; } mlpbase.mlpunserialize(ref ra2, ref network2); for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; x2[i] = x1[i]; } for(i=0; i<=nout-1; i++) { y1[i] = 2*AP.Math.RandomReal()-1; y2[i] = 2*AP.Math.RandomReal()-1; } mlpbase.mlpprocess(ref network, ref x1, ref y1); mlpbase.mlpprocess(ref network2, ref x2, ref y2); allsame = true; for(i=0; i<=nout-1; i++) { allsame = allsame & (double)(y1[i])==(double)(y2[i]); } err = err | !allsame; // // Different inputs leads to different outputs (non-zero network) // if( !zeronet ) { for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; x2[i] = 2*AP.Math.RandomReal()-1; } for(i=0; i<=nout-1; i++) { y1[i] = 2*AP.Math.RandomReal()-1; y2[i] = y1[i]; } mlpbase.mlpprocess(ref network, ref x1, ref y1); mlpbase.mlpprocess(ref network, ref x2, ref y2); allsame = true; for(i=0; i<=nout-1; i++) { allsame = allsame & (double)(y1[i])==(double)(y2[i]); } err = err | allsame; } // // Randomization changes outputs (when inputs are unchanged, non-zero network) // if( !zeronet ) { for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; x2[i] = 2*AP.Math.RandomReal()-1; } for(i=0; i<=nout-1; i++) { y1[i] = 2*AP.Math.RandomReal()-1; y2[i] = y1[i]; } mlpbase.mlpcopy(ref network, ref network2); mlpbase.mlprandomize(ref network2); mlpbase.mlpprocess(ref network, ref x1, ref y1); mlpbase.mlpprocess(ref network2, ref x1, ref y2); allsame = true; for(i=0; i<=nout-1; i++) { allsame = allsame & (double)(y1[i])==(double)(y2[i]); } err = err | allsame; } // // Full randomization changes outputs (when inputs are unchanged, non-zero network) // if( !zeronet ) { for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; x2[i] = 2*AP.Math.RandomReal()-1; } for(i=0; i<=nout-1; i++) { y1[i] = 2*AP.Math.RandomReal()-1; y2[i] = y1[i]; } mlpbase.mlpcopy(ref network, ref network2); mlpbase.mlprandomizefull(ref network2); mlpbase.mlpprocess(ref network, ref x1, ref y1); mlpbase.mlpprocess(ref network2, ref x1, ref y2); allsame = true; for(i=0; i<=nout-1; i++) { allsame = allsame & (double)(y1[i])==(double)(y2[i]); } err = err | allsame; } // // Normalization properties // if( nkind==1 ) { // // Classifier network outputs are normalized // for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; } mlpbase.mlpprocess(ref network, ref x1, ref y1); v = 0; for(i=0; i<=nout-1; i++) { v = v+y1[i]; err = err | (double)(y1[i])<(double)(0); } err = err | (double)(Math.Abs(v-1))>(double)(1000*AP.Math.MachineEpsilon); } if( nkind==2 ) { // // B-type network outputs are bounded from above/below // for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; } mlpbase.mlpprocess(ref network, ref x1, ref y1); for(i=0; i<=nout-1; i++) { if( (double)(a2)>=(double)(0) ) { err = err | (double)(y1[i])<(double)(a1); } else { err = err | (double)(y1[i])>(double)(a1); } } } if( nkind==3 ) { // // R-type network outputs are within [A1,A2] (or [A2,A1]) // for(i=0; i<=nin-1; i++) { x1[i] = 2*AP.Math.RandomReal()-1; } mlpbase.mlpprocess(ref network, ref x1, ref y1); for(i=0; i<=nout-1; i++) { err = err | (double)(y1[i])<(double)(Math.Min(a1, a2)) | (double)(y1[i])>(double)(Math.Max(a1, a2)); } } } }
/************************************************************************* Iformational functions test *************************************************************************/ private static void testinformational(int nkind, int nin, int nhid1, int nhid2, int nout, int passcount, ref bool err) { mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int n1 = 0; int n2 = 0; int wcount = 0; createnetwork(ref network, nkind, 0.0, 0.0, nin, nhid1, nhid2, nout); mlpbase.mlpproperties(ref network, ref n1, ref n2, ref wcount); err = err | n1!=nin | n2!=nout | wcount<=0; }
/************************************************************************* Hessian functions test *************************************************************************/ private static void testhessian(int nkind, int nin, int nhid1, int nhid2, int nout, int passcount, ref bool err) { mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); mlpbase.multilayerperceptron network2 = new mlpbase.multilayerperceptron(); int hkind = 0; int n1 = 0; int n2 = 0; int wcount = 0; bool zeronet = new bool(); double h = 0; double etol = 0; int pass = 0; int i = 0; int j = 0; bool allsame = new bool(); int ilen = 0; int rlen = 0; int ssize = 0; double a1 = 0; double a2 = 0; double[,] xy = new double[0,0]; double[,] h1 = new double[0,0]; double[,] h2 = new double[0,0]; double[] grad1 = new double[0]; double[] grad2 = new double[0]; double[] grad3 = new double[0]; double[] x = new double[0]; double[] y = new double[0]; double[] x1 = new double[0]; double[] x2 = new double[0]; double[] y1 = new double[0]; double[] y2 = new double[0]; int[] ia = new int[0]; double[] ra = new double[0]; double v = 0; double e = 0; double e1 = 0; double e2 = 0; double v1 = 0; double v2 = 0; double v3 = 0; double v4 = 0; double wprev = 0; int i_ = 0; int i1_ = 0; System.Diagnostics.Debug.Assert(passcount>=2, "PassCount<2!"); a1 = 0; a2 = 0; if( nkind==2 ) { a1 = 1000*AP.Math.RandomReal()-500; a2 = 2*AP.Math.RandomReal()-1; } if( nkind==3 ) { a1 = 1000*AP.Math.RandomReal()-500; a2 = a1+(2*AP.Math.RandomInteger(2)-1)*(0.1+0.9*AP.Math.RandomReal()); } createnetwork(ref network, nkind, a1, a2, nin, nhid1, nhid2, nout); mlpbase.mlpproperties(ref network, ref n1, ref n2, ref wcount); h = 0.0001; etol = 0.05; // // Initialize // x = new double[nin-1+1]; x1 = new double[nin-1+1]; x2 = new double[nin-1+1]; y = new double[nout-1+1]; y1 = new double[nout-1+1]; y2 = new double[nout-1+1]; grad1 = new double[wcount-1+1]; grad2 = new double[wcount-1+1]; grad3 = new double[wcount-1+1]; h1 = new double[wcount-1+1, wcount-1+1]; h2 = new double[wcount-1+1, wcount-1+1]; // // Process // for(pass=1; pass<=passcount; pass++) { mlpbase.mlprandomizefull(ref network); // // Test hessian calculation . // E1 contains total error (calculated using MLPGrad/MLPGradN) // Grad1 contains total gradient (calculated using MLPGrad/MLPGradN) // H1 contains Hessian calculated using differences of gradients // // E2, Grad2 and H2 contains corresponing values calculated using MLPHessianBatch/MLPHessianNBatch // for(hkind=0; hkind<=1; hkind++) { ssize = 1+AP.Math.RandomInteger(10); xy = new double[ssize-1+1, nin+nout-1+1]; for(i=0; i<=wcount-1; i++) { grad1[i] = 0; } for(i=0; i<=wcount-1; i++) { for(j=0; j<=wcount-1; j++) { h1[i,j] = 0; } } e1 = 0; for(i=0; i<=ssize-1; i++) { // // X, Y // for(j=0; j<=nin-1; j++) { x1[j] = 4*AP.Math.RandomReal()-2; } for(i_=0; i_<=nin-1;i_++) { xy[i,i_] = x1[i_]; } if( mlpbase.mlpissoftmax(ref network) ) { for(j=0; j<=nout-1; j++) { y1[j] = 0; } xy[i,nin] = AP.Math.RandomInteger(nout); y1[(int)Math.Round(xy[i,nin])] = 1; } else { for(j=0; j<=nout-1; j++) { y1[j] = 4*AP.Math.RandomReal()-2; } i1_ = (0) - (nin); for(i_=nin; i_<=nin+nout-1;i_++) { xy[i,i_] = y1[i_+i1_]; } } // // E1, Grad1 // if( hkind==0 ) { mlpbase.mlpgrad(ref network, ref x1, ref y1, ref v, ref grad2); } else { mlpbase.mlpgradn(ref network, ref x1, ref y1, ref v, ref grad2); } e1 = e1+v; for(i_=0; i_<=wcount-1;i_++) { grad1[i_] = grad1[i_] + grad2[i_]; } // // H1 // for(j=0; j<=wcount-1; j++) { wprev = network.weights[j]; network.weights[j] = wprev-2*h; if( hkind==0 ) { mlpbase.mlpgrad(ref network, ref x1, ref y1, ref v, ref grad2); } else { mlpbase.mlpgradn(ref network, ref x1, ref y1, ref v, ref grad2); } network.weights[j] = wprev-h; if( hkind==0 ) { mlpbase.mlpgrad(ref network, ref x1, ref y1, ref v, ref grad3); } else { mlpbase.mlpgradn(ref network, ref x1, ref y1, ref v, ref grad3); } for(i_=0; i_<=wcount-1;i_++) { grad2[i_] = grad2[i_] - 8*grad3[i_]; } network.weights[j] = wprev+h; if( hkind==0 ) { mlpbase.mlpgrad(ref network, ref x1, ref y1, ref v, ref grad3); } else { mlpbase.mlpgradn(ref network, ref x1, ref y1, ref v, ref grad3); } for(i_=0; i_<=wcount-1;i_++) { grad2[i_] = grad2[i_] + 8*grad3[i_]; } network.weights[j] = wprev+2*h; if( hkind==0 ) { mlpbase.mlpgrad(ref network, ref x1, ref y1, ref v, ref grad3); } else { mlpbase.mlpgradn(ref network, ref x1, ref y1, ref v, ref grad3); } for(i_=0; i_<=wcount-1;i_++) { grad2[i_] = grad2[i_] - grad3[i_]; } v = 1/(12*h); for(i_=0; i_<=wcount-1;i_++) { h1[j,i_] = h1[j,i_] + v*grad2[i_]; } network.weights[j] = wprev; } } if( hkind==0 ) { mlpbase.mlphessianbatch(ref network, ref xy, ssize, ref e2, ref grad2, ref h2); } else { mlpbase.mlphessiannbatch(ref network, ref xy, ssize, ref e2, ref grad2, ref h2); } err = err | (double)(Math.Abs(e1-e2)/e1)>(double)(etol); for(i=0; i<=wcount-1; i++) { if( (double)(Math.Abs(grad1[i]))>(double)(1.0E-2) ) { err = err | (double)(Math.Abs((grad2[i]-grad1[i])/grad1[i]))>(double)(etol); } else { err = err | (double)(Math.Abs(grad2[i]-grad1[i]))>(double)(etol); } } for(i=0; i<=wcount-1; i++) { for(j=0; j<=wcount-1; j++) { if( (double)(Math.Abs(h1[i,j]))>(double)(5.0E-2) ) { err = err | (double)(Math.Abs((h1[i,j]-h2[i,j])/h1[i,j]))>(double)(etol); } else { err = err | (double)(Math.Abs(h2[i,j]-h1[i,j]))>(double)(etol); } } } } } }
/************************************************************************* Training neural networks ensemble using early stopping. INPUT PARAMETERS: Ensemble - model with initialized geometry XY - training set NPoints - training set size Decay - weight decay coefficient, >=0.001 Restarts - restarts, >0. OUTPUT PARAMETERS: Ensemble - trained model Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<0, Restarts<1). * 6, if task has been solved. Rep - training report. OOBErrors - out-of-bag generalization error estimate -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlpetraines(ref mlpensemble ensemble, ref double[,] xy, int npoints, double decay, int restarts, ref int info, ref mlptrain.mlpreport rep) { int i = 0; int k = 0; int ccount = 0; int pcount = 0; double[,] trnxy = new double[0,0]; double[,] valxy = new double[0,0]; int trnsize = 0; int valsize = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int tmpinfo = 0; mlptrain.mlpreport tmprep = new mlptrain.mlpreport(); int i_ = 0; int i1_ = 0; if( npoints<2 | restarts<1 | (double)(decay)<(double)(0) ) { info = -1; return; } if( ensemble.issoftmax ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,ensemble.nin])<0 | (int)Math.Round(xy[i,ensemble.nin])>=ensemble.nout ) { info = -2; return; } } } info = 6; // // allocate // if( ensemble.issoftmax ) { ccount = ensemble.nin+1; pcount = ensemble.nin; } else { ccount = ensemble.nin+ensemble.nout; pcount = ensemble.nin+ensemble.nout; } trnxy = new double[npoints-1+1, ccount-1+1]; valxy = new double[npoints-1+1, ccount-1+1]; mlpbase.mlpunserialize(ref ensemble.serializedmlp, ref network); rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // train networks // for(k=0; k<=ensemble.ensemblesize-1; k++) { // // Split set // do { trnsize = 0; valsize = 0; for(i=0; i<=npoints-1; i++) { if( (double)(AP.Math.RandomReal())<(double)(0.66) ) { // // Assign sample to training set // for(i_=0; i_<=ccount-1;i_++) { trnxy[trnsize,i_] = xy[i,i_]; } trnsize = trnsize+1; } else { // // Assign sample to validation set // for(i_=0; i_<=ccount-1;i_++) { valxy[valsize,i_] = xy[i,i_]; } valsize = valsize+1; } } } while( ! (trnsize!=0 & valsize!=0) ); // // Train // mlptrain.mlptraines(ref network, ref trnxy, trnsize, ref valxy, valsize, decay, restarts, ref tmpinfo, ref tmprep); if( tmpinfo<0 ) { info = tmpinfo; return; } // // save results // i1_ = (0) - (k*ensemble.wcount); for(i_=k*ensemble.wcount; i_<=(k+1)*ensemble.wcount-1;i_++) { ensemble.weights[i_] = network.weights[i_+i1_]; } i1_ = (0) - (k*pcount); for(i_=k*pcount; i_<=(k+1)*pcount-1;i_++) { ensemble.columnmeans[i_] = network.columnmeans[i_+i1_]; } i1_ = (0) - (k*pcount); for(i_=k*pcount; i_<=(k+1)*pcount-1;i_++) { ensemble.columnsigmas[i_] = network.columnsigmas[i_+i1_]; } rep.ngrad = rep.ngrad+tmprep.ngrad; rep.nhess = rep.nhess+tmprep.nhess; rep.ncholesky = rep.ncholesky+tmprep.ncholesky; } }