private void LBFGSalg()
        {
            func = func;
            double[]         aprx  = new double[] { 0, 1.5, 100 };
            lbfgs.lbfgsstate state = new lbfgs.lbfgsstate();
            lbfgsb.funcgrad = gradfunc;
            int[]    nb   = new int[] { 0, 2, 2 };
            double[] l    = new double[] { 0, 1.3, 0 };
            double[] u    = new double[] { 0, 1.7, 100 };
            int      info = 0;

            lbfgsb.lbfgsbminimize(2, 2, ref aprx, 0.00000000001, 0.00000000001, 0.00000000001, 10000, ref nb, ref l, ref u, ref info);
            // textBox1.Text += "n = " + aprx[1].ToString() + "\td =  " + aprx[2].ToString() + "\tf = " + func.functional(aprx[1], aprx[2]) + "\r\n";
            //lbfgs.minlbfgs(2,2,aprx,0.0000001,0.0000001,0.0000000001,100,0,state);
        }
Beispiel #2
0
        public static bool testminlbfgs(bool silent)
        {
            bool result    = new bool();
            bool waserrors = new bool();
            bool referror  = new bool();
            bool lin1error = new bool();
            bool lin2error = new bool();
            bool eqerror   = new bool();
            bool converror = new bool();
            int  n         = 0;
            int  m         = 0;

            double[] x  = new double[0];
            double[] xe = new double[0];
            double[] b  = new double[0];
            int      i  = 0;
            int      j  = 0;
            double   v  = 0;

            double[,] a = new double[0, 0];
            lbfgs.lbfgsstate  state = new lbfgs.lbfgsstate();
            lbfgs.lbfgsreport rep   = new lbfgs.lbfgsreport();
            int i_ = 0;

            waserrors = false;

            //
            // Reference problem
            //
            x    = new double[2 + 1];
            n    = 3;
            m    = 2;
            x[0] = 100 * AP.Math.RandomReal() - 50;
            x[1] = 100 * AP.Math.RandomReal() - 50;
            x[2] = 100 * AP.Math.RandomReal() - 50;
            lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0, 0.0, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = AP.Math.Sqr(state.x[0] - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]);
                state.g[0] = 2 * (state.x[0] - 2) + 2 * (state.x[0] - state.x[2]);
                state.g[1] = 2 * state.x[1];
                state.g[2] = 2 * (state.x[2] - state.x[0]);
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            Console.WriteLine("Problem 1: " + state.f.ToString() + " punto: " + x[0].ToString() + " " + x[1].ToString() + " " + x[2].ToString());
            referror = rep.terminationtype <= 0 | Math.Abs(x[0] - 2) > 0.001 | Math.Abs(x[1]) > 0.001 | Math.Abs(x[2] - 2) > 0.001;

            //
            // 1D problem #1
            //
            x    = new double[0 + 1];
            n    = 1;
            m    = 1;
            x[0] = 100 * AP.Math.RandomReal() - 50;
            lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0, 0.0, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = -Math.Cos(state.x[0]);
                state.g[0] = Math.Sin(state.x[0]);
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            lin1error = rep.terminationtype <= 0 | Math.Abs(x[0] / Math.PI - (int)Math.Round(x[0] / Math.PI)) > 0.001;

            //
            // 1D problem #2
            //
            x    = new double[0 + 1];
            n    = 1;
            m    = 1;
            x[0] = 100 * AP.Math.RandomReal() - 50;
            lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0, 0.0, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = AP.Math.Sqr(state.x[0]) / (1 + AP.Math.Sqr(state.x[0]));
                state.g[0] = (2 * state.x[0] * (1 + AP.Math.Sqr(state.x[0])) - AP.Math.Sqr(state.x[0]) * 2 * state.x[0]) / AP.Math.Sqr(1 + AP.Math.Sqr(state.x[0]));
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            lin2error = rep.terminationtype <= 0 | Math.Abs(x[0]) > 0.001;

            //
            // Linear equations
            //
            eqerror = false;
            for (n = 1; n <= 10; n++)
            {
                //
                // Prepare task
                //
                a  = new double[n - 1 + 1, n - 1 + 1];
                x  = new double[n - 1 + 1];
                xe = new double[n - 1 + 1];
                b  = new double[n - 1 + 1];
                for (i = 0; i <= n - 1; i++)
                {
                    xe[i] = 2 * AP.Math.RandomReal() - 1;
                }
                for (i = 0; i <= n - 1; i++)
                {
                    for (j = 0; j <= n - 1; j++)
                    {
                        a[i, j] = 2 * AP.Math.RandomReal() - 1;
                    }
                }
                for (i = 0; i <= n - 1; i++)
                {
                    v = 0.0;
                    for (i_ = 0; i_ <= n - 1; i_++)
                    {
                        v += a[i, i_] * xe[i_];
                    }
                    b[i] = v;
                }

                //
                // Test different M
                //
                for (m = 1; m <= n; m++)
                {
                    //
                    // Solve task
                    //
                    for (i = 0; i <= n - 1; i++)
                    {
                        x[i] = 2 * AP.Math.RandomReal() - 1;
                    }
                    lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0, 0.0, 0, 0, ref state);
                    while (lbfgs.minlbfgsiteration(ref state))
                    {
                        state.f = 0;
                        for (i = 0; i <= n - 1; i++)
                        {
                            state.g[i] = 0;
                        }
                        for (i = 0; i <= n - 1; i++)
                        {
                            v = 0.0;
                            for (i_ = 0; i_ <= n - 1; i_++)
                            {
                                v += a[i, i_] * state.x[i_];
                            }
                            state.f = state.f + AP.Math.Sqr(v - b[i]);
                            for (j = 0; j <= n - 1; j++)
                            {
                                state.g[j] = state.g[j] + 2 * (v - b[i]) * a[i, j];
                            }
                        }
                    }
                    lbfgs.minlbfgsresults(ref state, ref x, ref rep);
                    eqerror = eqerror | rep.terminationtype <= 0;
                    for (i = 0; i <= n - 1; i++)
                    {
                        eqerror = eqerror | Math.Abs(x[i] - xe[i]) > 0.001;
                    }
                }
            }

            //
            // Testing convergence properties
            //
            converror = false;
            x         = new double[2 + 1];
            n         = 3;
            m         = 2;
            for (i = 0; i <= 2; i++)
            {
                x[i] = 6 * AP.Math.RandomReal() - 3;
            }
            lbfgs.minlbfgs(n, m, ref x, 0.0001, 0.0, 0.0, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = AP.Math.Sqr(Math.Exp(state.x[0]) - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]);
                state.g[0] = 2 * (Math.Exp(state.x[0]) - 2) * Math.Exp(state.x[0]) + 2 * (state.x[0] - state.x[2]);
                state.g[1] = 2 * state.x[1];
                state.g[2] = 2 * (state.x[2] - state.x[0]);
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            converror = converror | Math.Abs(x[0] - Math.Log(2)) > 0.05;
            converror = converror | Math.Abs(x[1]) > 0.05;
            converror = converror | Math.Abs(x[2] - Math.Log(2)) > 0.05;
            converror = converror | rep.terminationtype != 4;
            for (i = 0; i <= 2; i++)
            {
                x[i] = 6 * AP.Math.RandomReal() - 3;
            }
            lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0001, 0.0, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = AP.Math.Sqr(Math.Exp(state.x[0]) - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]);
                state.g[0] = 2 * (Math.Exp(state.x[0]) - 2) * Math.Exp(state.x[0]) + 2 * (state.x[0] - state.x[2]);
                state.g[1] = 2 * state.x[1];
                state.g[2] = 2 * (state.x[2] - state.x[0]);
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            converror = converror | Math.Abs(x[0] - Math.Log(2)) > 0.05;
            converror = converror | Math.Abs(x[1]) > 0.05;
            converror = converror | Math.Abs(x[2] - Math.Log(2)) > 0.05;
            converror = converror | rep.terminationtype != 1;
            for (i = 0; i <= 2; i++)
            {
                x[i] = 6 * AP.Math.RandomReal() - 3;
            }
            lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0, 0.0001, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = AP.Math.Sqr(Math.Exp(state.x[0]) - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]);
                state.g[0] = 2 * (Math.Exp(state.x[0]) - 2) * Math.Exp(state.x[0]) + 2 * (state.x[0] - state.x[2]);
                state.g[1] = 2 * state.x[1];
                state.g[2] = 2 * (state.x[2] - state.x[0]);
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            converror = converror | Math.Abs(x[0] - Math.Log(2)) > 0.05;
            converror = converror | Math.Abs(x[1]) > 0.05;
            converror = converror | Math.Abs(x[2] - Math.Log(2)) > 0.05;
            converror = converror | rep.terminationtype != 2;
            for (i = 0; i <= 2; i++)
            {
                x[i] = 2 * AP.Math.RandomReal() - 1;
            }
            lbfgs.minlbfgs(n, m, ref x, 0.0, 0.0, 0.0, 10, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                state.f    = AP.Math.Sqr(Math.Exp(state.x[0]) - 2) + AP.Math.Sqr(state.x[1]) + AP.Math.Sqr(state.x[2] - state.x[0]);
                state.g[0] = 2 * (Math.Exp(state.x[0]) - 2) * Math.Exp(state.x[0]) + 2 * (state.x[0] - state.x[2]);
                state.g[1] = 2 * state.x[1];
                state.g[2] = 2 * (state.x[2] - state.x[0]);
            }
            lbfgs.minlbfgsresults(ref state, ref x, ref rep);
            converror = converror | rep.terminationtype != 5 | rep.iterationscount != 10;

            //
            // end
            //
            waserrors = referror | lin1error | lin2error | eqerror | converror;
            if (!silent)
            {
                System.Console.Write("TESTING L-BFGS OPTIMIZATION");
                System.Console.WriteLine();
                System.Console.Write("REFERENCE PROBLEM:                        ");
                if (referror)
                {
                    System.Console.Write("FAILED");
                    System.Console.WriteLine();
                }
                else
                {
                    System.Console.Write("OK");
                    System.Console.WriteLine();
                }
                System.Console.Write("1-D PROBLEM #1:                           ");
                if (lin1error)
                {
                    System.Console.Write("FAILED");
                    System.Console.WriteLine();
                }
                else
                {
                    System.Console.Write("OK");
                    System.Console.WriteLine();
                }
                System.Console.Write("1-D PROBLEM #2:                           ");
                if (lin2error)
                {
                    System.Console.Write("FAILED");
                    System.Console.WriteLine();
                }
                else
                {
                    System.Console.Write("OK");
                    System.Console.WriteLine();
                }
                System.Console.Write("LINEAR EQUATIONS:                         ");
                if (eqerror)
                {
                    System.Console.Write("FAILED");
                    System.Console.WriteLine();
                }
                else
                {
                    System.Console.Write("OK");
                    System.Console.WriteLine();
                }
                System.Console.Write("CONVERGENCE PROPERTIES:                   ");
                if (converror)
                {
                    System.Console.Write("FAILED");
                    System.Console.WriteLine();
                }
                else
                {
                    System.Console.Write("OK");
                    System.Console.WriteLine();
                }
                if (waserrors)
                {
                    System.Console.Write("TEST FAILED");
                    System.Console.WriteLine();
                }
                else
                {
                    System.Console.Write("TEST PASSED");
                    System.Console.WriteLine();
                }
                System.Console.WriteLine();
                System.Console.WriteLine();
            }
            result = !waserrors;
            return(result);
        }
Beispiel #3
0
    /*************************************************************************
    *  Neural network training using early stopping (base algorithm - L-BFGS with
    *  regularization).
    *
    *  INPUT PARAMETERS:
    *   Network     -   neural network with initialized geometry
    *   TrnXY       -   training set
    *   TrnSize     -   training set size
    *   ValXY       -   validation set
    *   ValSize     -   validation set size
    *   Decay       -   weight decay constant, >=0.001
    *                   Decay term 'Decay*||Weights||^2' is added to error
    *                   function.
    *                   If you don't know what Decay to choose, use 0.001.
    *   Restarts    -   number of restarts from random position, >0.
    *                   If you don't know what Restarts to choose, use 2.
    *
    *  OUTPUT PARAMETERS:
    *   Network     -   trained neural network.
    *   Info        -   return code:
    * -2, if there is a point with class number
    *                         outside of [0..NOut-1].
    * -1, if wrong parameters specified
    *                         (NPoints<0, Restarts<1, ...).
    *  2, task has been solved, stopping  criterion  met -
    *                         sufficiently small step size.  Not expected  (we
    *                         use  EARLY  stopping)  but  possible  and not an
    *                         error.
    *  6, task has been solved, stopping  criterion  met -
    *                         increasing of validation set error.
    *   Rep         -   training report
    *
    *  NOTE:
    *
    *  Algorithm stops if validation set error increases for  a  long  enough  or
    *  step size is small enought  (there  are  task  where  validation  set  may
    *  decrease for eternity). In any case solution returned corresponds  to  the
    *  minimum of validation set error.
    *
    *  -- ALGLIB --
    *    Copyright 10.03.2009 by Bochkanov Sergey
    *************************************************************************/
    public static void mlptraines(ref mlpbase.multilayerperceptron network,
                                  ref double[,] trnxy,
                                  int trnsize,
                                  ref double[,] valxy,
                                  int valsize,
                                  double decay,
                                  int restarts,
                                  ref int info,
                                  ref mlpreport rep)
    {
        int i = 0;
        //int j = 0;
        int pass   = 0;
        int nin    = 0;
        int nout   = 0;
        int wcount = 0;

        double[] w     = new double[0];
        double[] wbest = new double[0];
        double   e     = 0;
        double   v     = 0;
        double   ebest = 0;

        double[] wfinal = new double[0];
        double   efinal = 0;
        int      itbest = 0;

        lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
        lbfgs.lbfgsstate  state       = new lbfgs.lbfgsstate();
        double            wstep       = 0;
        int i_ = 0;

        wstep = 0.001;

        //
        // Test inputs, parse flags, read network geometry
        //
        if (trnsize <= 0 | valsize <= 0 | restarts < 1 | decay < 0)
        {
            info = -1;
            return;
        }
        mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
        if (mlpbase.mlpissoftmax(ref network))
        {
            for (i = 0; i <= trnsize - 1; i++)
            {
                if ((int)Math.Round(trnxy[i, nin]) < 0 | (int)Math.Round(trnxy[i, nin]) >= nout)
                {
                    info = -2;
                    return;
                }
            }
            for (i = 0; i <= valsize - 1; i++)
            {
                if ((int)Math.Round(valxy[i, nin]) < 0 | (int)Math.Round(valxy[i, nin]) >= nout)
                {
                    info = -2;
                    return;
                }
            }
        }
        info = 2;

        //
        // Prepare
        //
        mlpbase.mlpinitpreprocessor(ref network, ref trnxy, trnsize);
        w      = new double[wcount - 1 + 1];
        wbest  = new double[wcount - 1 + 1];
        wfinal = new double[wcount - 1 + 1];
        efinal = AP.Math.MaxRealNumber;
        for (i = 0; i <= wcount - 1; i++)
        {
            wfinal[i] = 0;
        }

        //
        // Multiple starts
        //
        rep.ncholesky = 0;
        rep.nhess     = 0;
        rep.ngrad     = 0;
        for (pass = 1; pass <= restarts; pass++)
        {
            //
            // Process
            //
            mlpbase.mlprandomize(ref network);
            ebest = mlpbase.mlperror(ref network, ref valxy, valsize);
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                wbest[i_] = network.weights[i_];
            }
            itbest = 0;
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                w[i_] = network.weights[i_];
            }
            lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, 0, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                //
                // Calculate gradient
                //
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    network.weights[i_] = state.x[i_];
                }
                mlpbase.mlpgradnbatch(ref network, ref trnxy, trnsize, ref state.f, ref state.g);
                v = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    v += network.weights[i_] * network.weights[i_];
                }
                state.f = state.f + 0.5 * decay * v;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    state.g[i_] = state.g[i_] + decay * network.weights[i_];
                }
                rep.ngrad = rep.ngrad + 1;

                //
                // Validation set
                //
                if (state.xupdated)
                {
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                    {
                        network.weights[i_] = w[i_];
                    }
                    e = mlpbase.mlperror(ref network, ref valxy, valsize);
                    if (e < ebest)
                    {
                        ebest = e;
                        for (i_ = 0; i_ <= wcount - 1; i_++)
                        {
                            wbest[i_] = network.weights[i_];
                        }
                        itbest = internalrep.iterationscount;
                    }
                    if (internalrep.iterationscount > 30 & internalrep.iterationscount > 1.5 * itbest)
                    {
                        info = 6;
                        break;
                    }
                }
            }
            lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);

            //
            // Compare with final answer
            //
            if (ebest < efinal)
            {
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    wfinal[i_] = wbest[i_];
                }
                efinal = ebest;
            }
        }

        //
        // The best network
        //
        for (i_ = 0; i_ <= wcount - 1; i_++)
        {
            network.weights[i_] = wfinal[i_];
        }
    }
Beispiel #4
0
    /*************************************************************************
    *  Neural network training  using  modified  Levenberg-Marquardt  with  exact
    *  Hessian calculation and regularization. Subroutine trains  neural  network
    *  with restarts from random positions. Algorithm is well  suited  for  small
    *  and medium scale problems (hundreds of weights).
    *
    *  INPUT PARAMETERS:
    *   Network     -   neural network with initialized geometry
    *   XY          -   training set
    *   NPoints     -   training set size
    *   Decay       -   weight decay constant, >=0.001
    *                   Decay term 'Decay*||Weights||^2' is added to error
    *                   function.
    *                   If you don't know what Decay to choose, use 0.001.
    *   Restarts    -   number of restarts from random position, >0.
    *                   If you don't know what Restarts to choose, use 2.
    *
    *  OUTPUT PARAMETERS:
    *   Network     -   trained neural network.
    *   Info        -   return code:
    * -9, if internal matrix inverse subroutine failed
    * -2, if there is a point with class number
    *                         outside of [0..NOut-1].
    * -1, if wrong parameters specified
    *                         (NPoints<0, Restarts<1).
    *  2, if task has been solved.
    *   Rep         -   training report
    *
    *  -- ALGLIB --
    *    Copyright 10.03.2009 by Bochkanov Sergey
    *************************************************************************/
    public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
                                  ref double[,] xy,
                                  int npoints,
                                  double decay,
                                  int restarts,
                                  ref int info,
                                  ref mlpreport rep)
    {
        int nin    = 0;
        int nout   = 0;
        int wcount = 0;
        //double lmftol = 0;
        double lmsteptol = 0;
        int    i         = 0;
        //int j = 0;
        int k = 0;
        //int mx = 0;
        double v        = 0;
        double e        = 0;
        double enew     = 0;
        double xnorm2   = 0;
        double stepnorm = 0;

        double[] g = new double[0];
        double[] d = new double[0];
        double[,] h    = new double[0, 0];
        double[,] hmod = new double[0, 0];
        double[,] z    = new double[0, 0];
        bool   spd        = new bool();
        double nu         = 0;
        double lambda     = 0;
        double lambdaup   = 0;
        double lambdadown = 0;

        //int cvcnt = 0;
        //double cvrelcnt = 0;
        lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
        lbfgs.lbfgsstate  state       = new lbfgs.lbfgsstate();
        double[]          x           = new double[0];
        double[]          y           = new double[0];
        double[]          wbase       = new double[0];
        //double wstep = 0;
        double[] wdir = new double[0];
        double[] wt   = new double[0];
        double[] wx   = new double[0];
        int      pass = 0;

        double[] wbest = new double[0];
        double   ebest = 0;
        int      i_    = 0;

        mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
        lambdaup   = 10;
        lambdadown = 0.3;
        //lmftol = 0.001;
        lmsteptol = 0.001;

        //
        // Test for inputs
        //
        if (npoints <= 0 | restarts < 1)
        {
            info = -1;
            return;
        }
        if (mlpbase.mlpissoftmax(ref network))
        {
            for (i = 0; i <= npoints - 1; i++)
            {
                if ((int)Math.Round(xy[i, nin]) < 0 | (int)Math.Round(xy[i, nin]) >= nout)
                {
                    info = -2;
                    return;
                }
            }
        }
        decay = Math.Max(decay, mindecay);
        info  = 2;

        //
        // Initialize data
        //
        rep.ngrad     = 0;
        rep.nhess     = 0;
        rep.ncholesky = 0;

        //
        // General case.
        // Prepare task and network. Allocate space.
        //
        mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
        g     = new double[wcount - 1 + 1];
        h     = new double[wcount - 1 + 1, wcount - 1 + 1];
        hmod  = new double[wcount - 1 + 1, wcount - 1 + 1];
        wbase = new double[wcount - 1 + 1];
        wdir  = new double[wcount - 1 + 1];
        wbest = new double[wcount - 1 + 1];
        wt    = new double[wcount - 1 + 1];
        wx    = new double[wcount - 1 + 1];
        ebest = AP.Math.MaxRealNumber;

        //
        // Multiple passes
        //
        for (pass = 1; pass <= restarts; pass++)
        {
            //
            // Initialize weights
            //
            mlpbase.mlprandomize(ref network);

            //
            // First stage of the hybrid algorithm: LBFGS
            //
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                wbase[i_] = network.weights[i_];
            }
            lbfgs.minlbfgs(wcount, Math.Min(wcount, 5), ref wbase, 0.0, 0.0, 0.0, Math.Max(25, wcount), 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                //
                // gradient
                //
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    network.weights[i_] = state.x[i_];
                }
                mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);

                //
                // weight decay
                //
                v = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    v += network.weights[i_] * network.weights[i_];
                }
                state.f = state.f + 0.5 * decay * v;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    state.g[i_] = state.g[i_] + decay * network.weights[i_];
                }

                //
                // next iteration
                //
                rep.ngrad = rep.ngrad + 1;
            }
            lbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                network.weights[i_] = wbase[i_];
            }

            //
            // Second stage of the hybrid algorithm: LM
            //
            // Initialize H with identity matrix,
            // G with gradient,
            // E with regularized error.
            //
            mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
            v = 0.0;
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                v += network.weights[i_] * network.weights[i_];
            }
            e = e + 0.5 * decay * v;
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                g[i_] = g[i_] + decay * network.weights[i_];
            }
            for (k = 0; k <= wcount - 1; k++)
            {
                h[k, k] = h[k, k] + decay;
            }
            rep.nhess = rep.nhess + 1;
            lambda    = 0.001;
            nu        = 2;
            while (true)
            {
                //
                // 1. HMod = H+lambda*I
                // 2. Try to solve (H+Lambda*I)*dx = -g.
                //    Increase lambda if left part is not positive definite.
                //
                for (i = 0; i <= wcount - 1; i++)
                {
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                    {
                        hmod[i, i_] = h[i, i_];
                    }
                    hmod[i, i] = hmod[i, i] + lambda;
                }
                spd           = cholesky.spdmatrixcholesky(ref hmod, wcount, true);
                rep.ncholesky = rep.ncholesky + 1;
                if (!spd)
                {
                    lambda = lambda * lambdaup * nu;
                    nu     = nu * 2;
                    continue;
                }
                if (!spdsolve.spdmatrixcholeskysolve(ref hmod, g, wcount, true, ref wdir))
                {
                    lambda = lambda * lambdaup * nu;
                    nu     = nu * 2;
                    continue;
                }
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    wdir[i_] = -1 * wdir[i_];
                }

                //
                // Lambda found.
                // 1. Save old w in WBase
                // 1. Test some stopping criterions
                // 2. If error(w+wdir)>error(w), increase lambda
                //
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    network.weights[i_] = network.weights[i_] + wdir[i_];
                }
                xnorm2 = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    xnorm2 += network.weights[i_] * network.weights[i_];
                }
                stepnorm = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    stepnorm += wdir[i_] * wdir[i_];
                }
                stepnorm = Math.Sqrt(stepnorm);
                enew     = mlpbase.mlperror(ref network, ref xy, npoints) + 0.5 * decay * xnorm2;
                if (stepnorm < lmsteptol * (1 + Math.Sqrt(xnorm2)))
                {
                    break;
                }
                if (enew > e)
                {
                    lambda = lambda * lambdaup * nu;
                    nu     = nu * 2;
                    continue;
                }

                //
                // Optimize using inv(cholesky(H)) as preconditioner
                //
                if (!trinverse.rmatrixtrinverse(ref hmod, wcount, true, false))
                {
                    //
                    // if matrix can't be inverted then exit with errors
                    // TODO: make WCount steps in direction suggested by HMod
                    //
                    info = -9;
                    return;
                }
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    wbase[i_] = network.weights[i_];
                }
                for (i = 0; i <= wcount - 1; i++)
                {
                    wt[i] = 0;
                }
                lbfgs.minlbfgs(wcount, wcount, ref wt, 0.0, 0.0, 0.0, 5, 0, ref state);
                while (lbfgs.minlbfgsiteration(ref state))
                {
                    //
                    // gradient
                    //
                    for (i = 0; i <= wcount - 1; i++)
                    {
                        v = 0.0;
                        for (i_ = i; i_ <= wcount - 1; i_++)
                        {
                            v += state.x[i_] * hmod[i, i_];
                        }
                        network.weights[i] = wbase[i] + v;
                    }
                    mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
                    for (i = 0; i <= wcount - 1; i++)
                    {
                        state.g[i] = 0;
                    }
                    for (i = 0; i <= wcount - 1; i++)
                    {
                        v = g[i];
                        for (i_ = i; i_ <= wcount - 1; i_++)
                        {
                            state.g[i_] = state.g[i_] + v * hmod[i, i_];
                        }
                    }

                    //
                    // weight decay
                    // grad(x'*x) = A'*(x0+A*t)
                    //
                    v = 0.0;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                    {
                        v += network.weights[i_] * network.weights[i_];
                    }
                    state.f = state.f + 0.5 * decay * v;
                    for (i = 0; i <= wcount - 1; i++)
                    {
                        v = decay * network.weights[i];
                        for (i_ = i; i_ <= wcount - 1; i_++)
                        {
                            state.g[i_] = state.g[i_] + v * hmod[i, i_];
                        }
                    }

                    //
                    // next iteration
                    //
                    rep.ngrad = rep.ngrad + 1;
                }
                lbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);

                //
                // Accept new position.
                // Calculate Hessian
                //
                for (i = 0; i <= wcount - 1; i++)
                {
                    v = 0.0;
                    for (i_ = i; i_ <= wcount - 1; i_++)
                    {
                        v += wt[i_] * hmod[i, i_];
                    }
                    network.weights[i] = wbase[i] + v;
                }
                mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
                v = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    v += network.weights[i_] * network.weights[i_];
                }
                e = e + 0.5 * decay * v;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    g[i_] = g[i_] + decay * network.weights[i_];
                }
                for (k = 0; k <= wcount - 1; k++)
                {
                    h[k, k] = h[k, k] + decay;
                }
                rep.nhess = rep.nhess + 1;

                //
                // Update lambda
                //
                lambda = lambda * lambdadown;
                nu     = 2;
            }

            //
            // update WBest
            //
            v = 0.0;
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                v += network.weights[i_] * network.weights[i_];
            }
            e = 0.5 * decay * v + mlpbase.mlperror(ref network, ref xy, npoints);
            if (e < ebest)
            {
                ebest = e;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    wbest[i_] = network.weights[i_];
                }
            }
        }

        //
        // copy WBest to output
        //
        for (i_ = 0; i_ <= wcount - 1; i_++)
        {
            network.weights[i_] = wbest[i_];
        }
    }
Beispiel #5
0
    /*************************************************************************
    *  Neural  network  training  using  L-BFGS  algorithm  with  regularization.
    *  Subroutine  trains  neural  network  with  restarts from random positions.
    *  Algorithm  is  well  suited  for  problems  of  any dimensionality (memory
    *  requirements and step complexity are linear by weights number).
    *
    *  INPUT PARAMETERS:
    *   Network     -   neural network with initialized geometry
    *   XY          -   training set
    *   NPoints     -   training set size
    *   Decay       -   weight decay constant, >=0.001
    *                   Decay term 'Decay*||Weights||^2' is added to error
    *                   function.
    *                   If you don't know what Decay to choose, use 0.001.
    *   Restarts    -   number of restarts from random position, >0.
    *                   If you don't know what Restarts to choose, use 2.
    *   WStep       -   stopping criterion. Algorithm stops if  step  size  is
    *                   less than WStep. Recommended value - 0.01.  Zero  step
    *                   size means stopping after MaxIts iterations.
    *   MaxIts      -   stopping   criterion.  Algorithm  stops  after  MaxIts
    *                   iterations (NOT gradient  calculations).  Zero  MaxIts
    *                   means stopping when step is sufficiently small.
    *
    *  OUTPUT PARAMETERS:
    *   Network     -   trained neural network.
    *   Info        -   return code:
    * -8, if both WStep=0 and MaxIts=0
    * -2, if there is a point with class number
    *                         outside of [0..NOut-1].
    * -1, if wrong parameters specified
    *                         (NPoints<0, Restarts<1).
    *  2, if task has been solved.
    *   Rep         -   training report
    *
    *  -- ALGLIB --
    *    Copyright 09.12.2007 by Bochkanov Sergey
    *************************************************************************/
    public static void mlptrainlbfgs(ref mlpbase.multilayerperceptron network,
                                     ref double[,] xy,
                                     int npoints,
                                     double decay,
                                     int restarts,
                                     double wstep,
                                     int maxits,
                                     ref int info,
                                     ref mlpreport rep)
    {
        int i = 0;
        //int j = 0;
        int pass   = 0;
        int nin    = 0;
        int nout   = 0;
        int wcount = 0;

        double[] w     = new double[0];
        double[] wbest = new double[0];
        double   e     = 0;
        double   v     = 0;
        double   ebest = 0;

        lbfgs.lbfgsreport internalrep = new lbfgs.lbfgsreport();
        lbfgs.lbfgsstate  state       = new lbfgs.lbfgsstate();
        int i_ = 0;


        //
        // Test inputs, parse flags, read network geometry
        //
        if (wstep == 0 & maxits == 0)
        {
            info = -8;
            return;
        }
        if (npoints <= 0 | restarts < 1 | wstep < 0 | maxits < 0)
        {
            info = -1;
            return;
        }
        mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
        if (mlpbase.mlpissoftmax(ref network))
        {
            for (i = 0; i <= npoints - 1; i++)
            {
                if ((int)Math.Round(xy[i, nin]) < 0 | (int)Math.Round(xy[i, nin]) >= nout)
                {
                    info = -2;
                    return;
                }
            }
        }
        decay = Math.Max(decay, mindecay);
        info  = 2;

        //
        // Prepare
        //
        mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
        w     = new double[wcount - 1 + 1];
        wbest = new double[wcount - 1 + 1];
        ebest = AP.Math.MaxRealNumber;

        //
        // Multiple starts
        //
        rep.ncholesky = 0;
        rep.nhess     = 0;
        rep.ngrad     = 0;
        for (pass = 1; pass <= restarts; pass++)
        {
            //
            // Process
            //
            mlpbase.mlprandomize(ref network);
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                w[i_] = network.weights[i_];
            }
            lbfgs.minlbfgs(wcount, Math.Min(wcount, 50), ref w, 0.0, 0.0, wstep, maxits, 0, ref state);
            while (lbfgs.minlbfgsiteration(ref state))
            {
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    network.weights[i_] = state.x[i_];
                }
                mlpbase.mlpgradnbatch(ref network, ref xy, npoints, ref state.f, ref state.g);
                v = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    v += network.weights[i_] * network.weights[i_];
                }
                state.f = state.f + 0.5 * decay * v;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    state.g[i_] = state.g[i_] + decay * network.weights[i_];
                }
                rep.ngrad = rep.ngrad + 1;
            }
            lbfgs.minlbfgsresults(ref state, ref w, ref internalrep);
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                network.weights[i_] = w[i_];
            }

            //
            // Compare with best
            //
            v = 0.0;
            for (i_ = 0; i_ <= wcount - 1; i_++)
            {
                v += network.weights[i_] * network.weights[i_];
            }
            e = mlpbase.mlperrorn(ref network, ref xy, npoints) + 0.5 * decay * v;
            if (e < ebest)
            {
                for (i_ = 0; i_ <= wcount - 1; i_++)
                {
                    wbest[i_] = network.weights[i_];
                }
                ebest = e;
            }
        }

        //
        // The best network
        //
        for (i_ = 0; i_ <= wcount - 1; i_++)
        {
            network.weights[i_] = wbest[i_];
        }
    }