/************************************************************************* This subroutine trains logit model. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars] First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1, NClasses<2). * 1, if task has been solved LM - model built Rep - training report -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnltrainh(double[,] xy, int npoints, int nvars, int nclasses, ref int info, logitmodel lm, mnlreport rep) { int i = 0; int j = 0; int k = 0; int ssize = 0; bool allsame = new bool(); int offs = 0; double threshold = 0; double wminstep = 0; double decay = 0; int wdim = 0; int expoffs = 0; double v = 0; double s = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int wcount = 0; double e = 0; double[] g = new double[0]; double[,] h = new double[0,0]; bool spd = new bool(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double wstep = 0; double[] wdir = new double[0]; double[] work = new double[0]; int mcstage = 0; logitmcstate mcstate = new logitmcstate(); int mcinfo = 0; int mcnfev = 0; int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; int i1_ = 0; info = 0; threshold = 1000*math.machineepsilon; wminstep = 0.001; decay = 0.001; // // Test for inputs // if( (npoints<nvars+2 || nvars<1) || nclasses<2 ) { info = -1; return; } for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])<0 || (int)Math.Round(xy[i,nvars])>=nclasses ) { info = -2; return; } } info = 1; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; // // Allocate array // wdim = (nvars+1)*(nclasses-1); offs = 5; expoffs = offs+wdim; ssize = 5+(nvars+1)*(nclasses-1)+nclasses; lm.w = new double[ssize-1+1]; lm.w[0] = ssize; lm.w[1] = logitvnum; lm.w[2] = nvars; lm.w[3] = nclasses; lm.w[4] = offs; // // Degenerate case: all outputs are equal // allsame = true; for(i=1; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])!=(int)Math.Round(xy[i-1,nvars]) ) { allsame = false; } } if( allsame ) { for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++) { lm.w[offs+i] = 0; } v = -(2*Math.Log(math.minrealnumber)); k = (int)Math.Round(xy[0,nvars]); if( k==nclasses-1 ) { for(i=0; i<=nclasses-2; i++) { lm.w[offs+i*(nvars+1)+nvars] = -v; } } else { for(i=0; i<=nclasses-2; i++) { if( i==k ) { lm.w[offs+i*(nvars+1)+nvars] = v; } else { lm.w[offs+i*(nvars+1)+nvars] = 0; } } } return; } // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpcreatec0(nvars, nclasses, network); mlpbase.mlpinitpreprocessor(network, xy, npoints); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); for(i=0; i<=wcount-1; i++) { network.weights[i] = (2*math.randomreal()-1)/nvars; } g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; work = new double[wcount-1+1]; // // First stage: optimize in gradient direction. // for(k=0; k<=wcount/3+10; k++) { // // Calculate gradient in starting point // mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; // // Setup optimization scheme // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); } } // // Second stage: use Hessian when we are close to the minimum // while( true ) { // // Calculate and update E/G/H // mlpbase.mlphessiannbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Select step direction // NOTE: it is important to use lower-triangle Cholesky // factorization since it is much faster than higher-triangle version. // spd = trfac.spdmatrixcholesky(ref h, wcount, false); densesolver.spdmatrixcholeskysolve(h, wcount, false, g, ref solverinfo, solverrep, ref wdir); spd = solverinfo>0; if( spd ) { // // H is positive definite. // Step in Newton direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } spd = true; } else { // // H is indefinite. // Step in gradient direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } spd = false; } // // Optimize in WDir direction // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); } if( spd && ((mcinfo==2 || mcinfo==4) || mcinfo==6) ) { break; } } // // Convert from NN format to MNL format // i1_ = (0) - (offs); for(i_=offs; i_<=offs+wcount-1;i_++) { lm.w[i_] = network.weights[i_+i1_]; } for(k=0; k<=nvars-1; k++) { for(i=0; i<=nclasses-2; i++) { s = network.columnsigmas[k]; if( (double)(s)==(double)(0) ) { s = 1; } j = offs+(nvars+1)*i; v = lm.w[j+k]; lm.w[j+k] = v/s; lm.w[j+nvars] = lm.w[j+nvars]+v*network.columnmeans[k]/s; } } for(k=0; k<=nclasses-2; k++) { lm.w[offs+(nvars+1)*k+nvars] = -lm.w[offs+(nvars+1)*k+nvars]; } }
public override alglib.apobject make_copy() { mnlreport _result = new mnlreport(); _result.ngrad = ngrad; _result.nhess = nhess; return _result; }