Пример #1
0
        /*************************************************************************
        This subroutine trains logit model.

        INPUT PARAMETERS:
            XY          -   training set, array[0..NPoints-1,0..NVars]
                            First NVars columns store values of independent
                            variables, next column stores number of class (from 0
                            to NClasses-1) which dataset element belongs to. Fractional
                            values are rounded to nearest integer.
            NPoints     -   training set size, NPoints>=1
            NVars       -   number of independent variables, NVars>=1
            NClasses    -   number of classes, NClasses>=2

        OUTPUT PARAMETERS:
            Info        -   return code:
                            * -2, if there is a point with class number
                                  outside of [0..NClasses-1].
                            * -1, if incorrect parameters was passed
                                  (NPoints<NVars+2, NVars<1, NClasses<2).
                            *  1, if task has been solved
            LM          -   model built
            Rep         -   training report

          -- ALGLIB --
             Copyright 10.09.2008 by Bochkanov Sergey
        *************************************************************************/
        public static void mnltrainh(double[,] xy,
            int npoints,
            int nvars,
            int nclasses,
            ref int info,
            logitmodel lm,
            mnlreport rep)
        {
            int i = 0;
            int j = 0;
            int k = 0;
            int ssize = 0;
            bool allsame = new bool();
            int offs = 0;
            double threshold = 0;
            double wminstep = 0;
            double decay = 0;
            int wdim = 0;
            int expoffs = 0;
            double v = 0;
            double s = 0;
            mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron();
            int nin = 0;
            int nout = 0;
            int wcount = 0;
            double e = 0;
            double[] g = new double[0];
            double[,] h = new double[0,0];
            bool spd = new bool();
            double[] x = new double[0];
            double[] y = new double[0];
            double[] wbase = new double[0];
            double wstep = 0;
            double[] wdir = new double[0];
            double[] work = new double[0];
            int mcstage = 0;
            logitmcstate mcstate = new logitmcstate();
            int mcinfo = 0;
            int mcnfev = 0;
            int solverinfo = 0;
            densesolver.densesolverreport solverrep = new densesolver.densesolverreport();
            int i_ = 0;
            int i1_ = 0;

            info = 0;

            threshold = 1000*math.machineepsilon;
            wminstep = 0.001;
            decay = 0.001;
            
            //
            // Test for inputs
            //
            if( (npoints<nvars+2 || nvars<1) || nclasses<2 )
            {
                info = -1;
                return;
            }
            for(i=0; i<=npoints-1; i++)
            {
                if( (int)Math.Round(xy[i,nvars])<0 || (int)Math.Round(xy[i,nvars])>=nclasses )
                {
                    info = -2;
                    return;
                }
            }
            info = 1;
            
            //
            // Initialize data
            //
            rep.ngrad = 0;
            rep.nhess = 0;
            
            //
            // Allocate array
            //
            wdim = (nvars+1)*(nclasses-1);
            offs = 5;
            expoffs = offs+wdim;
            ssize = 5+(nvars+1)*(nclasses-1)+nclasses;
            lm.w = new double[ssize-1+1];
            lm.w[0] = ssize;
            lm.w[1] = logitvnum;
            lm.w[2] = nvars;
            lm.w[3] = nclasses;
            lm.w[4] = offs;
            
            //
            // Degenerate case: all outputs are equal
            //
            allsame = true;
            for(i=1; i<=npoints-1; i++)
            {
                if( (int)Math.Round(xy[i,nvars])!=(int)Math.Round(xy[i-1,nvars]) )
                {
                    allsame = false;
                }
            }
            if( allsame )
            {
                for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++)
                {
                    lm.w[offs+i] = 0;
                }
                v = -(2*Math.Log(math.minrealnumber));
                k = (int)Math.Round(xy[0,nvars]);
                if( k==nclasses-1 )
                {
                    for(i=0; i<=nclasses-2; i++)
                    {
                        lm.w[offs+i*(nvars+1)+nvars] = -v;
                    }
                }
                else
                {
                    for(i=0; i<=nclasses-2; i++)
                    {
                        if( i==k )
                        {
                            lm.w[offs+i*(nvars+1)+nvars] = v;
                        }
                        else
                        {
                            lm.w[offs+i*(nvars+1)+nvars] = 0;
                        }
                    }
                }
                return;
            }
            
            //
            // General case.
            // Prepare task and network. Allocate space.
            //
            mlpbase.mlpcreatec0(nvars, nclasses, network);
            mlpbase.mlpinitpreprocessor(network, xy, npoints);
            mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount);
            for(i=0; i<=wcount-1; i++)
            {
                network.weights[i] = (2*math.randomreal()-1)/nvars;
            }
            g = new double[wcount-1+1];
            h = new double[wcount-1+1, wcount-1+1];
            wbase = new double[wcount-1+1];
            wdir = new double[wcount-1+1];
            work = new double[wcount-1+1];
            
            //
            // First stage: optimize in gradient direction.
            //
            for(k=0; k<=wcount/3+10; k++)
            {
                
                //
                // Calculate gradient in starting point
                //
                mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g);
                v = 0.0;
                for(i_=0; i_<=wcount-1;i_++)
                {
                    v += network.weights[i_]*network.weights[i_];
                }
                e = e+0.5*decay*v;
                for(i_=0; i_<=wcount-1;i_++)
                {
                    g[i_] = g[i_] + decay*network.weights[i_];
                }
                rep.ngrad = rep.ngrad+1;
                
                //
                // Setup optimization scheme
                //
                for(i_=0; i_<=wcount-1;i_++)
                {
                    wdir[i_] = -g[i_];
                }
                v = 0.0;
                for(i_=0; i_<=wcount-1;i_++)
                {
                    v += wdir[i_]*wdir[i_];
                }
                wstep = Math.Sqrt(v);
                v = 1/Math.Sqrt(v);
                for(i_=0; i_<=wcount-1;i_++)
                {
                    wdir[i_] = v*wdir[i_];
                }
                mcstage = 0;
                mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage);
                while( mcstage!=0 )
                {
                    mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g);
                    v = 0.0;
                    for(i_=0; i_<=wcount-1;i_++)
                    {
                        v += network.weights[i_]*network.weights[i_];
                    }
                    e = e+0.5*decay*v;
                    for(i_=0; i_<=wcount-1;i_++)
                    {
                        g[i_] = g[i_] + decay*network.weights[i_];
                    }
                    rep.ngrad = rep.ngrad+1;
                    mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage);
                }
            }
            
            //
            // Second stage: use Hessian when we are close to the minimum
            //
            while( true )
            {
                
                //
                // Calculate and update E/G/H
                //
                mlpbase.mlphessiannbatch(network, xy, npoints, ref e, ref g, ref h);
                v = 0.0;
                for(i_=0; i_<=wcount-1;i_++)
                {
                    v += network.weights[i_]*network.weights[i_];
                }
                e = e+0.5*decay*v;
                for(i_=0; i_<=wcount-1;i_++)
                {
                    g[i_] = g[i_] + decay*network.weights[i_];
                }
                for(k=0; k<=wcount-1; k++)
                {
                    h[k,k] = h[k,k]+decay;
                }
                rep.nhess = rep.nhess+1;
                
                //
                // Select step direction
                // NOTE: it is important to use lower-triangle Cholesky
                // factorization since it is much faster than higher-triangle version.
                //
                spd = trfac.spdmatrixcholesky(ref h, wcount, false);
                densesolver.spdmatrixcholeskysolve(h, wcount, false, g, ref solverinfo, solverrep, ref wdir);
                spd = solverinfo>0;
                if( spd )
                {
                    
                    //
                    // H is positive definite.
                    // Step in Newton direction.
                    //
                    for(i_=0; i_<=wcount-1;i_++)
                    {
                        wdir[i_] = -1*wdir[i_];
                    }
                    spd = true;
                }
                else
                {
                    
                    //
                    // H is indefinite.
                    // Step in gradient direction.
                    //
                    for(i_=0; i_<=wcount-1;i_++)
                    {
                        wdir[i_] = -g[i_];
                    }
                    spd = false;
                }
                
                //
                // Optimize in WDir direction
                //
                v = 0.0;
                for(i_=0; i_<=wcount-1;i_++)
                {
                    v += wdir[i_]*wdir[i_];
                }
                wstep = Math.Sqrt(v);
                v = 1/Math.Sqrt(v);
                for(i_=0; i_<=wcount-1;i_++)
                {
                    wdir[i_] = v*wdir[i_];
                }
                mcstage = 0;
                mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage);
                while( mcstage!=0 )
                {
                    mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g);
                    v = 0.0;
                    for(i_=0; i_<=wcount-1;i_++)
                    {
                        v += network.weights[i_]*network.weights[i_];
                    }
                    e = e+0.5*decay*v;
                    for(i_=0; i_<=wcount-1;i_++)
                    {
                        g[i_] = g[i_] + decay*network.weights[i_];
                    }
                    rep.ngrad = rep.ngrad+1;
                    mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage);
                }
                if( spd && ((mcinfo==2 || mcinfo==4) || mcinfo==6) )
                {
                    break;
                }
            }
            
            //
            // Convert from NN format to MNL format
            //
            i1_ = (0) - (offs);
            for(i_=offs; i_<=offs+wcount-1;i_++)
            {
                lm.w[i_] = network.weights[i_+i1_];
            }
            for(k=0; k<=nvars-1; k++)
            {
                for(i=0; i<=nclasses-2; i++)
                {
                    s = network.columnsigmas[k];
                    if( (double)(s)==(double)(0) )
                    {
                        s = 1;
                    }
                    j = offs+(nvars+1)*i;
                    v = lm.w[j+k];
                    lm.w[j+k] = v/s;
                    lm.w[j+nvars] = lm.w[j+nvars]+v*network.columnmeans[k]/s;
                }
            }
            for(k=0; k<=nclasses-2; k++)
            {
                lm.w[offs+(nvars+1)*k+nvars] = -lm.w[offs+(nvars+1)*k+nvars];
            }
        }
Пример #2
0
 public override alglib.apobject make_copy()
 {
     mnlreport _result = new mnlreport();
     _result.ngrad = ngrad;
     _result.nhess = nhess;
     return _result;
 }