Example #1
        Neural network training  using  modified  Levenberg-Marquardt  with  exact
        Hessian calculation and regularization. Subroutine trains  neural  network
        with restarts from random positions. Algorithm is well  suited  for  small
        and medium scale problems (hundreds of weights).

            Network     -   neural network with initialized geometry
            XY          -   training set
            NPoints     -   training set size
            Decay       -   weight decay constant, >=0.001
                            Decay term 'Decay*||Weights||^2' is added to error
                            If you don't know what Decay to choose, use 0.001.
            Restarts    -   number of restarts from random position, >0.
                            If you don't know what Restarts to choose, use 2.

            Network     -   trained neural network.
            Info        -   return code:
                            * -9, if internal matrix inverse subroutine failed
                            * -2, if there is a point with class number
                                  outside of [0..NOut-1].
                            * -1, if wrong parameters specified
                                  (NPoints<0, Restarts<1).
                            *  2, if task has been solved.
            Rep         -   training report

          -- ALGLIB --
             Copyright 10.03.2009 by Bochkanov Sergey
        public static void mlptrainlm(mlpbase.multilayerperceptron network,
            double[,] xy,
            int npoints,
            double decay,
            int restarts,
            ref int info,
            mlpreport rep)
            int nin = 0;
            int nout = 0;
            int wcount = 0;
            double lmftol = 0;
            double lmsteptol = 0;
            int i = 0;
            int k = 0;
            double v = 0;
            double e = 0;
            double enew = 0;
            double xnorm2 = 0;
            double stepnorm = 0;
            double[] g = new double[0];
            double[] d = new double[0];
            double[,] h = new double[0,0];
            double[,] hmod = new double[0,0];
            double[,] z = new double[0,0];
            bool spd = new bool();
            double nu = 0;
            double lambdav = 0;
            double lambdaup = 0;
            double lambdadown = 0;
            minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport();
            minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate();
            double[] x = new double[0];
            double[] y = new double[0];
            double[] wbase = new double[0];
            double[] wdir = new double[0];
            double[] wt = new double[0];
            double[] wx = new double[0];
            int pass = 0;
            double[] wbest = new double[0];
            double ebest = 0;
            int invinfo = 0;
            matinv.matinvreport invrep = new matinv.matinvreport();
            int solverinfo = 0;
            densesolver.densesolverreport solverrep = new densesolver.densesolverreport();
            int i_ = 0;

            info = 0;

            mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount);
            lambdaup = 10;
            lambdadown = 0.3;
            lmftol = 0.001;
            lmsteptol = 0.001;
            // Test for inputs
            if( npoints<=0 || restarts<1 )
                info = -1;
            if( mlpbase.mlpissoftmax(network) )
                for(i=0; i<=npoints-1; i++)
                    if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout )
                        info = -2;
            decay = Math.Max(decay, mindecay);
            info = 2;
            // Initialize data
            rep.ngrad = 0;
            rep.nhess = 0;
            rep.ncholesky = 0;
            // General case.
            // Prepare task and network. Allocate space.
            mlpbase.mlpinitpreprocessor(network, xy, npoints);
            g = new double[wcount-1+1];
            h = new double[wcount-1+1, wcount-1+1];
            hmod = new double[wcount-1+1, wcount-1+1];
            wbase = new double[wcount-1+1];
            wdir = new double[wcount-1+1];
            wbest = new double[wcount-1+1];
            wt = new double[wcount-1+1];
            wx = new double[wcount-1+1];
            ebest = math.maxrealnumber;
            // Multiple passes
            for(pass=1; pass<=restarts; pass++)
                // Initialize weights
                // First stage of the hybrid algorithm: LBFGS
                for(i_=0; i_<=wcount-1;i_++)
                    wbase[i_] = network.weights[i_];
                minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), wbase, state);
                minlbfgs.minlbfgssetcond(state, 0, 0, 0, Math.Max(25, wcount));
                while( minlbfgs.minlbfgsiteration(state) )
                    // gradient
                    for(i_=0; i_<=wcount-1;i_++)
                        network.weights[i_] = state.x[i_];
                    mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref state.g);
                    // weight decay
                    v = 0.0;
                    for(i_=0; i_<=wcount-1;i_++)
                        v += network.weights[i_]*network.weights[i_];
                    state.f = state.f+0.5*decay*v;
                    for(i_=0; i_<=wcount-1;i_++)
                        state.g[i_] = state.g[i_] + decay*network.weights[i_];
                    // next iteration
                    rep.ngrad = rep.ngrad+1;
                minlbfgs.minlbfgsresults(state, ref wbase, internalrep);
                for(i_=0; i_<=wcount-1;i_++)
                    network.weights[i_] = wbase[i_];
                // Second stage of the hybrid algorithm: LM
                // Initialize H with identity matrix,
                // G with gradient,
                // E with regularized error.
                mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h);
                v = 0.0;
                for(i_=0; i_<=wcount-1;i_++)
                    v += network.weights[i_]*network.weights[i_];
                e = e+0.5*decay*v;
                for(i_=0; i_<=wcount-1;i_++)
                    g[i_] = g[i_] + decay*network.weights[i_];
                for(k=0; k<=wcount-1; k++)
                    h[k,k] = h[k,k]+decay;
                rep.nhess = rep.nhess+1;
                lambdav = 0.001;
                nu = 2;
                while( true )
                    // 1. HMod = H+lambda*I
                    // 2. Try to solve (H+Lambda*I)*dx = -g.
                    //    Increase lambda if left part is not positive definite.
                    for(i=0; i<=wcount-1; i++)
                        for(i_=0; i_<=wcount-1;i_++)
                            hmod[i,i_] = h[i,i_];
                        hmod[i,i] = hmod[i,i]+lambdav;
                    spd = trfac.spdmatrixcholesky(ref hmod, wcount, true);
                    rep.ncholesky = rep.ncholesky+1;
                    if( !spd )
                        lambdav = lambdav*lambdaup*nu;
                        nu = nu*2;
                    densesolver.spdmatrixcholeskysolve(hmod, wcount, true, g, ref solverinfo, solverrep, ref wdir);
                    if( solverinfo<0 )
                        lambdav = lambdav*lambdaup*nu;
                        nu = nu*2;
                    for(i_=0; i_<=wcount-1;i_++)
                        wdir[i_] = -1*wdir[i_];
                    // Lambda found.
                    // 1. Save old w in WBase
                    // 1. Test some stopping criterions
                    // 2. If error(w+wdir)>error(w), increase lambda
                    for(i_=0; i_<=wcount-1;i_++)
                        network.weights[i_] = network.weights[i_] + wdir[i_];
                    xnorm2 = 0.0;
                    for(i_=0; i_<=wcount-1;i_++)
                        xnorm2 += network.weights[i_]*network.weights[i_];
                    stepnorm = 0.0;
                    for(i_=0; i_<=wcount-1;i_++)
                        stepnorm += wdir[i_]*wdir[i_];
                    stepnorm = Math.Sqrt(stepnorm);
                    enew = mlpbase.mlperror(network, xy, npoints)+0.5*decay*xnorm2;
                    if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) )
                    if( (double)(enew)>(double)(e) )
                        lambdav = lambdav*lambdaup*nu;
                        nu = nu*2;
                    // Optimize using inv(cholesky(H)) as preconditioner
                    matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, invrep);
                    if( invinfo<=0 )
                        // if matrix can't be inverted then exit with errors
                        // TODO: make WCount steps in direction suggested by HMod
                        info = -9;
                    for(i_=0; i_<=wcount-1;i_++)
                        wbase[i_] = network.weights[i_];
                    for(i=0; i<=wcount-1; i++)
                        wt[i] = 0;
                    minlbfgs.minlbfgscreatex(wcount, wcount, wt, 1, 0.0, state);
                    minlbfgs.minlbfgssetcond(state, 0, 0, 0, 5);
                    while( minlbfgs.minlbfgsiteration(state) )
                        // gradient
                        for(i=0; i<=wcount-1; i++)
                            v = 0.0;
                            for(i_=i; i_<=wcount-1;i_++)
                                v += state.x[i_]*hmod[i,i_];
                            network.weights[i] = wbase[i]+v;
                        mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref g);
                        for(i=0; i<=wcount-1; i++)
                            state.g[i] = 0;
                        for(i=0; i<=wcount-1; i++)
                            v = g[i];
                            for(i_=i; i_<=wcount-1;i_++)
                                state.g[i_] = state.g[i_] + v*hmod[i,i_];
                        // weight decay
                        // grad(x'*x) = A'*(x0+A*t)
                        v = 0.0;
                        for(i_=0; i_<=wcount-1;i_++)
                            v += network.weights[i_]*network.weights[i_];
                        state.f = state.f+0.5*decay*v;
                        for(i=0; i<=wcount-1; i++)
                            v = decay*network.weights[i];
                            for(i_=i; i_<=wcount-1;i_++)
                                state.g[i_] = state.g[i_] + v*hmod[i,i_];
                        // next iteration
                        rep.ngrad = rep.ngrad+1;
                    minlbfgs.minlbfgsresults(state, ref wt, internalrep);
                    // Accept new position.
                    // Calculate Hessian
                    for(i=0; i<=wcount-1; i++)
                        v = 0.0;
                        for(i_=i; i_<=wcount-1;i_++)
                            v += wt[i_]*hmod[i,i_];
                        network.weights[i] = wbase[i]+v;
                    mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h);
                    v = 0.0;
                    for(i_=0; i_<=wcount-1;i_++)
                        v += network.weights[i_]*network.weights[i_];
                    e = e+0.5*decay*v;
                    for(i_=0; i_<=wcount-1;i_++)
                        g[i_] = g[i_] + decay*network.weights[i_];
                    for(k=0; k<=wcount-1; k++)
                        h[k,k] = h[k,k]+decay;
                    rep.nhess = rep.nhess+1;
                    // Update lambda
                    lambdav = lambdav*lambdadown;
                    nu = 2;
                // update WBest
                v = 0.0;
                for(i_=0; i_<=wcount-1;i_++)
                    v += network.weights[i_]*network.weights[i_];
                e = 0.5*decay*v+mlpbase.mlperror(network, xy, npoints);
                if( (double)(e)<(double)(ebest) )
                    ebest = e;
                    for(i_=0; i_<=wcount-1;i_++)
                        wbest[i_] = network.weights[i_];
            // copy WBest to output
            for(i_=0; i_<=wcount-1;i_++)
                network.weights[i_] = wbest[i_];
        HPD test
        private static void testhpdinv(int maxn,
            int passcount,
            double threshold,
            ref bool hpderrors)
            complex[,] a = new complex[0,0];
            complex[,] cha = new complex[0,0];
            complex[,] inva = new complex[0,0];
            complex[,] invcha = new complex[0,0];
            bool isupper = new bool();
            int i = 0;
            int j = 0;
            int k = 0;
            int n = 0;
            int pass = 0;
            int taskkind = 0;
            int info = 0;
            matinv.matinvreport rep = new matinv.matinvreport();
            int i_ = 0;

            // General square matrices:
            // * test general solvers
            // * test least squares solver
            for(pass=1; pass<=passcount; pass++)
                for(n=1; n<=maxn; n++)
                    isupper = (double)(math.randomreal())>(double)(0.5);
                    // ********************************************************
                    // WELL CONDITIONED TASKS
                    // ability to find correct solution is tested
                    // ********************************************************
                    // 1. generate random well conditioned matrix A.
                    // 2. generate random solution vector xe
                    // 3. generate right part b=A*xe
                    // 4. test different methods on original A
                    matgen.hpdmatrixrndcond(n, 1000, ref a);
                    cmatrixdrophalf(ref a, n, isupper);
                    cmatrixmakeacopy(a, n, n, ref cha);
                    if( !trfac.hpdmatrixcholesky(ref cha, n, isupper) )
                    cmatrixmakeacopy(a, n, n, ref inva);
                    cmatrixmakeacopy(cha, n, n, ref invcha);
                    info = 0;
                    matinv.hpdmatrixinverse(ref inva, n, isupper, ref info, rep);
                    hpderrors = hpderrors | !hpdmatrixcheckinverse(a, inva, isupper, n, threshold, info, rep);
                    info = 0;
                    matinv.hpdmatrixcholeskyinverse(ref invcha, n, isupper, ref info, rep);
                    hpderrors = hpderrors | !hpdmatrixcheckinverse(a, invcha, isupper, n, threshold, info, rep);
                    // ********************************************************
                    // EXACTLY SINGULAR MATRICES
                    // ability to detect singularity is tested
                    // ********************************************************
                    // 1. generate different types of singular matrices:
                    //    * zero
                    //    * with zero columns
                    //    * with zero rows
                    // 2. test different methods
                    for(taskkind=0; taskkind<=2; taskkind++)
                        cunset2d(ref a);
                        if( taskkind==0 )
                            // all zeros
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j] = 0;
                        if( taskkind==1 )
                            // there is zero column
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j].x = 2*math.randomreal()-1;
                                    a[i,j].y = 2*math.randomreal()-1;
                            k = math.randominteger(n);
                            for(i_=0; i_<=n-1;i_++)
                                a[i_,k] = 0*a[i_,k];
                            for(i_=0; i_<=n-1;i_++)
                                a[k,i_] = 0*a[k,i_];
                        if( taskkind==2 )
                            // there is zero row
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j].x = 2*math.randomreal()-1;
                                    a[i,j].y = 2*math.randomreal()-1;
                            k = math.randominteger(n);
                            for(i_=0; i_<=n-1;i_++)
                                a[k,i_] = 0*a[k,i_];
                            for(i_=0; i_<=n-1;i_++)
                                a[i_,k] = 0*a[i_,k];
                        info = 0;
                        matinv.hpdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep);
                        if( info!=-3 & info!=1 )
                            hpderrors = true;
                            hpderrors = (hpderrors | (double)(rep.r1)<(double)(0)) | (double)(rep.r1)>(double)(1000*math.machineepsilon);
                            hpderrors = (hpderrors | (double)(rep.rinf)<(double)(0)) | (double)(rep.rinf)>(double)(1000*math.machineepsilon);
 public minlmstate()
     x = new double[0];
     fi = new double[0];
     j = new double[0,0];
     h = new double[0,0];
     g = new double[0];
     internalstate = new minlbfgs.minlbfgsstate();
     internalrep = new minlbfgs.minlbfgsreport();
     xprec = new double[0];
     xbase = new double[0];
     xdir = new double[0];
     gbase = new double[0];
     xprev = new double[0];
     rawmodel = new double[0,0];
     model = new double[0,0];
     work = new double[0];
     rstate = new rcommstate();
     solverrep = new densesolver.densesolverreport();
     invrep = new matinv.matinvreport();
        Complex TR inverse
        private static void testctrinv(int maxn,
            int passcount,
            double threshold,
            ref bool ctrerrors)
            complex[,] a = new complex[0,0];
            complex[,] b = new complex[0,0];
            int n = 0;
            int pass = 0;
            int i = 0;
            int j = 0;
            int task = 0;
            bool isupper = new bool();
            bool isunit = new bool();
            complex v = 0;
            bool waserrors = new bool();
            int info = 0;
            matinv.matinvreport rep = new matinv.matinvreport();
            int i_ = 0;

            waserrors = false;
            // Test
            for(n=1; n<=maxn; n++)
                a = new complex[n, n];
                b = new complex[n, n];
                for(task=0; task<=3; task++)
                    for(pass=1; pass<=passcount; pass++)
                        // Determine task
                        isupper = task%2==0;
                        isunit = task/2%2==0;
                        // Generate matrix
                        for(i=0; i<=n-1; i++)
                            for(j=0; j<=n-1; j++)
                                if( i==j )
                                    a[i,i].x = 1+math.randomreal();
                                    a[i,i].y = 1+math.randomreal();
                                    a[i,j].x = 0.2*math.randomreal()-0.1;
                                    a[i,j].y = 0.2*math.randomreal()-0.1;
                                b[i,j] = a[i,j];
                        // Inverse
                        matinv.cmatrixtrinverse(ref b, n, isupper, isunit, ref info, rep);
                        if( info<=0 )
                            ctrerrors = true;
                        // Structural test
                        if( isunit )
                            for(i=0; i<=n-1; i++)
                                ctrerrors = ctrerrors | a[i,i]!=b[i,i];
                        if( isupper )
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=i-1; j++)
                                    ctrerrors = ctrerrors | a[i,j]!=b[i,j];
                            for(i=0; i<=n-1; i++)
                                for(j=i+1; j<=n-1; j++)
                                    ctrerrors = ctrerrors | a[i,j]!=b[i,j];
                        // Inverse test
                        for(i=0; i<=n-1; i++)
                            for(j=0; j<=n-1; j++)
                                if( (j<i & isupper) | (j>i & !isupper) )
                                    a[i,j] = 0;
                                    b[i,j] = 0;
                        if( isunit )
                            for(i=0; i<=n-1; i++)
                                a[i,i] = 1;
                                b[i,i] = 1;
                        for(i=0; i<=n-1; i++)
                            for(j=0; j<=n-1; j++)
                                v = 0.0;
                                for(i_=0; i_<=n-1;i_++)
                                    v += a[i,i_]*b[i_,j];
                                if( j!=i )
                                    ctrerrors = ctrerrors | (double)(math.abscomplex(v))>(double)(threshold);
                                    ctrerrors = ctrerrors | (double)(math.abscomplex(v-1))>(double)(threshold);
        Complex test
        private static void testcinv(int maxn,
            int passcount,
            double threshold,
            ref bool cerrors)
            complex[,] a = new complex[0,0];
            complex[,] lua = new complex[0,0];
            complex[,] inva = new complex[0,0];
            complex[,] invlua = new complex[0,0];
            int[] p = new int[0];
            int i = 0;
            int j = 0;
            int k = 0;
            int n = 0;
            int pass = 0;
            int taskkind = 0;
            int info = 0;
            matinv.matinvreport rep = new matinv.matinvreport();
            int i_ = 0;

            // General square matrices:
            // * test general solvers
            // * test least squares solver
            for(pass=1; pass<=passcount; pass++)
                for(n=1; n<=maxn; n++)
                    // ********************************************************
                    // WELL CONDITIONED TASKS
                    // ability to find correct solution is tested
                    // ********************************************************
                    // 1. generate random well conditioned matrix A.
                    // 2. generate random solution vector xe
                    // 3. generate right part b=A*xe
                    // 4. test different methods on original A
                    matgen.cmatrixrndcond(n, 1000, ref a);
                    cmatrixmakeacopy(a, n, n, ref lua);
                    trfac.cmatrixlu(ref lua, n, n, ref p);
                    cmatrixmakeacopy(a, n, n, ref inva);
                    cmatrixmakeacopy(lua, n, n, ref invlua);
                    info = 0;
                    matinv.cmatrixinverse(ref inva, n, ref info, rep);
                    cerrors = cerrors | !cmatrixcheckinverse(a, inva, n, threshold, info, rep);
                    info = 0;
                    matinv.cmatrixluinverse(ref invlua, p, n, ref info, rep);
                    cerrors = cerrors | !cmatrixcheckinverse(a, invlua, n, threshold, info, rep);
                    // ********************************************************
                    // EXACTLY SINGULAR MATRICES
                    // ability to detect singularity is tested
                    // ********************************************************
                    // 1. generate different types of singular matrices:
                    //    * zero
                    //    * with zero columns
                    //    * with zero rows
                    //    * with equal rows/columns
                    // 2. test different methods
                    for(taskkind=0; taskkind<=4; taskkind++)
                        cunset2d(ref a);
                        if( taskkind==0 )
                            // all zeros
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j] = 0;
                        if( taskkind==1 )
                            // there is zero column
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j].x = 2*math.randomreal()-1;
                                    a[i,j].y = 2*math.randomreal()-1;
                            k = math.randominteger(n);
                            for(i_=0; i_<=n-1;i_++)
                                a[i_,k] = 0*a[i_,k];
                        if( taskkind==2 )
                            // there is zero row
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j].x = 2*math.randomreal()-1;
                                    a[i,j].y = 2*math.randomreal()-1;
                            k = math.randominteger(n);
                            for(i_=0; i_<=n-1;i_++)
                                a[k,i_] = 0*a[k,i_];
                        if( taskkind==3 )
                            // equal columns
                            if( n<2 )
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j].x = 2*math.randomreal()-1;
                                    a[i,j].y = 2*math.randomreal()-1;
                            k = 1+math.randominteger(n-1);
                            for(i_=0; i_<=n-1;i_++)
                                a[i_,0] = a[i_,k];
                        if( taskkind==4 )
                            // equal rows
                            if( n<2 )
                            a = new complex[n, n];
                            for(i=0; i<=n-1; i++)
                                for(j=0; j<=n-1; j++)
                                    a[i,j].x = 2*math.randomreal()-1;
                                    a[i,j].y = 2*math.randomreal()-1;
                            k = 1+math.randominteger(n-1);
                            for(i_=0; i_<=n-1;i_++)
                                a[0,i_] = a[k,i_];
                        cmatrixmakeacopy(a, n, n, ref lua);
                        trfac.cmatrixlu(ref lua, n, n, ref p);
                        info = 0;
                        matinv.cmatrixinverse(ref a, n, ref info, rep);
                        cerrors = cerrors | !cmatrixcheckinversesingular(a, n, threshold, info, rep);
                        info = 0;
                        matinv.cmatrixluinverse(ref lua, p, n, ref info, rep);
                        cerrors = cerrors | !cmatrixcheckinversesingular(lua, n, threshold, info, rep);
Example #6
File: linalg.cs Project: Ring-r/opt
        Algorithm for reduction of the following generalized symmetric positive-
        definite eigenvalue problem:
            A*x = lambda*B*x (1) or
            A*B*x = lambda*x (2) or
            B*A*x = lambda*x (3)
        to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
        the given problems are the same, and the eigenvectors of the given problem
        could be obtained by multiplying the obtained eigenvectors by the
        transformation matrix x = R*y).

        Here A is a symmetric matrix, B - symmetric positive-definite matrix.

        Input parameters:
            A           -   symmetric matrix which is given by its upper or lower
                            triangular part.
                            Array whose indexes range within [0..N-1, 0..N-1].
            N           -   size of matrices A and B.
            IsUpperA    -   storage format of matrix A.
            B           -   symmetric positive-definite matrix which is given by
                            its upper or lower triangular part.
                            Array whose indexes range within [0..N-1, 0..N-1].
            IsUpperB    -   storage format of matrix B.
            ProblemType -   if ProblemType is equal to:
                             * 1, the following problem is solved: A*x = lambda*B*x;
                             * 2, the following problem is solved: A*B*x = lambda*x;
                             * 3, the following problem is solved: B*A*x = lambda*x.

        Output parameters:
            A           -   symmetric matrix which is given by its upper or lower
                            triangle depending on IsUpperA. Contains matrix C.
                            Array whose indexes range within [0..N-1, 0..N-1].
            R           -   upper triangular or low triangular transformation matrix
                            which is used to obtain the eigenvectors of a given problem
                            as the product of eigenvectors of C (from the right) and
                            matrix R (from the left). If the matrix is upper
                            triangular, the elements below the main diagonal
                            are equal to 0 (and vice versa). Thus, we can perform
                            the multiplication without taking into account the
                            internal structure (which is an easier though less
                            effective way).
                            Array whose indexes range within [0..N-1, 0..N-1].
            IsUpperR    -   type of matrix R (upper or lower triangular).

            True, if the problem was reduced successfully.
            False, if the error occurred during the Cholesky decomposition of
                matrix B (the matrix is not positive-definite).

          -- ALGLIB --
             Copyright 1.28.2006 by Bochkanov Sergey
        public static bool smatrixgevdreduce(ref double[,] a,
            int n,
            bool isuppera,
            double[,] b,
            bool isupperb,
            int problemtype,
            ref double[,] r,
            ref bool isupperr)
            bool result = new bool();
            double[,] t = new double[0,0];
            double[] w1 = new double[0];
            double[] w2 = new double[0];
            double[] w3 = new double[0];
            int i = 0;
            int j = 0;
            double v = 0;
            matinv.matinvreport rep = new matinv.matinvreport();
            int info = 0;
            int i_ = 0;
            int i1_ = 0;

            r = new double[0,0];
            isupperr = new bool();

            ap.assert(n>0, "SMatrixGEVDReduce: N<=0!");
            ap.assert((problemtype==1 | problemtype==2) | problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!");
            result = true;
            // Problem 1:  A*x = lambda*B*x
            // Reducing to:
            //     C*y = lambda*y
            //     C = L^(-1) * A * L^(-T)
            //     x = L^(-T) * y
            if( problemtype==1 )
                // Factorize B in T: B = LL'
                t = new double[n-1+1, n-1+1];
                if( isupperb )
                    for(i=0; i<=n-1; i++)
                        for(i_=i; i_<=n-1;i_++)
                            t[i_,i] = b[i,i_];
                    for(i=0; i<=n-1; i++)
                        for(i_=0; i_<=i;i_++)
                            t[i,i_] = b[i,i_];
                if( !trfac.spdmatrixcholesky(ref t, n, false) )
                    result = false;
                    return result;
                // Invert L in T
                matinv.rmatrixtrinverse(ref t, n, false, false, ref info, rep);
                if( info<=0 )
                    result = false;
                    return result;
                // Build L^(-1) * A * L^(-T) in R
                w1 = new double[n+1];
                w2 = new double[n+1];
                r = new double[n-1+1, n-1+1];
                for(j=1; j<=n; j++)
                    // Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T))
                    i1_ = (0) - (1);
                    for(i_=1; i_<=j;i_++)
                        w1[i_] = t[j-1,i_+i1_];
                    sblas.symmetricmatrixvectormultiply(a, isuppera, 0, j-1, w1, 1.0, ref w2);
                    if( isuppera )
                        blas.matrixvectormultiply(a, 0, j-1, j, n-1, true, w1, 1, j, 1.0, ref w2, j+1, n, 0.0);
                        blas.matrixvectormultiply(a, j, n-1, 0, j-1, false, w1, 1, j, 1.0, ref w2, j+1, n, 0.0);
                    // Form l(i)*w2 (here l(i) is i-th row of L^(-1))
                    for(i=1; i<=n; i++)
                        i1_ = (1)-(0);
                        v = 0.0;
                        for(i_=0; i_<=i-1;i_++)
                            v += t[i-1,i_]*w2[i_+i1_];
                        r[i-1,j-1] = v;
                // Copy R to A
                for(i=0; i<=n-1; i++)
                    for(i_=0; i_<=n-1;i_++)
                        a[i,i_] = r[i,i_];
                // Copy L^(-1) from T to R and transpose
                isupperr = true;
                for(i=0; i<=n-1; i++)
                    for(j=0; j<=i-1; j++)
                        r[i,j] = 0;
                for(i=0; i<=n-1; i++)
                    for(i_=i; i_<=n-1;i_++)
                        r[i,i_] = t[i_,i];
                return result;
            // Problem 2:  A*B*x = lambda*x
            // or
            // problem 3:  B*A*x = lambda*x
            // Reducing to:
            //     C*y = lambda*y
            //     C = U * A * U'
            //     B = U'* U
            if( problemtype==2 | problemtype==3 )
                // Factorize B in T: B = U'*U
                t = new double[n-1+1, n-1+1];
                if( isupperb )
                    for(i=0; i<=n-1; i++)
                        for(i_=i; i_<=n-1;i_++)
                            t[i,i_] = b[i,i_];
                    for(i=0; i<=n-1; i++)
                        for(i_=i; i_<=n-1;i_++)
                            t[i,i_] = b[i_,i];
                if( !trfac.spdmatrixcholesky(ref t, n, true) )
                    result = false;
                    return result;
                // Build U * A * U' in R
                w1 = new double[n+1];
                w2 = new double[n+1];
                w3 = new double[n+1];
                r = new double[n-1+1, n-1+1];
                for(j=1; j<=n; j++)
                    // Form w2 = A * u'(j) (here u'(j) is j-th column of U')
                    i1_ = (j-1) - (1);
                    for(i_=1; i_<=n-j+1;i_++)
                        w1[i_] = t[j-1,i_+i1_];
                    sblas.symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, w1, 1.0, ref w3);
                    i1_ = (1) - (j);
                    for(i_=j; i_<=n;i_++)
                        w2[i_] = w3[i_+i1_];
                    i1_ = (j-1) - (j);
                    for(i_=j; i_<=n;i_++)
                        w1[i_] = t[j-1,i_+i1_];
                    if( isuppera )
                        blas.matrixvectormultiply(a, 0, j-2, j-1, n-1, false, w1, j, n, 1.0, ref w2, 1, j-1, 0.0);
                        blas.matrixvectormultiply(a, j-1, n-1, 0, j-2, true, w1, j, n, 1.0, ref w2, 1, j-1, 0.0);
                    // Form u(i)*w2 (here u(i) is i-th row of U)
                    for(i=1; i<=n; i++)
                        i1_ = (i)-(i-1);
                        v = 0.0;
                        for(i_=i-1; i_<=n-1;i_++)
                            v += t[i-1,i_]*w2[i_+i1_];
                        r[i-1,j-1] = v;
                // Copy R to A
                for(i=0; i<=n-1; i++)
                    for(i_=0; i_<=n-1;i_++)
                        a[i,i_] = r[i,i_];
                if( problemtype==2 )
                    // Invert U in T
                    matinv.rmatrixtrinverse(ref t, n, true, false, ref info, rep);
                    if( info<=0 )
                        result = false;
                        return result;
                    // Copy U^-1 from T to R
                    isupperr = true;
                    for(i=0; i<=n-1; i++)
                        for(j=0; j<=i-1; j++)
                            r[i,j] = 0;
                    for(i=0; i<=n-1; i++)
                        for(i_=i; i_<=n-1;i_++)
                            r[i,i_] = t[i,i_];
                    // Copy U from T to R and transpose
                    isupperr = false;
                    for(i=0; i<=n-1; i++)
                        for(j=i+1; j<=n-1; j++)
                            r[i,j] = 0;
                    for(i=0; i<=n-1; i++)
                        for(i_=i; i_<=n-1;i_++)
                            r[i_,i] = t[i,i_];
            return result;
Example #7
File: linalg.cs Project: Ring-r/opt
 public matinvreport(matinv.matinvreport obj)
     _innerobj = obj;
Example #8
File: linalg.cs Project: Ring-r/opt
 public matinvreport()
     _innerobj = new matinv.matinvreport();
Example #9
        This internal function estimates covariance matrix and other error-related
        information for linear/nonlinear least squares model.

        It has a bit awkward interface, but it can be used  for  both  linear  and
        nonlinear problems.

            F1  -   array[0..N-1,0..K-1]:
                    * for linear problems - matrix of function values
                    * for nonlinear problems - Jacobian matrix
            F0  -   array[0..N-1]:
                    * for linear problems - must be filled with zeros
                    * for nonlinear problems - must store values of function being
            Y   -   array[0..N-1]:
                    * for linear and nonlinear problems - must store target values
            W   -   weights, array[0..N-1]:
                    * for linear and nonlinear problems - weights
            X   -   array[0..K-1]:
                    * for linear and nonlinear problems - current solution
            S   -   array[0..K-1]:
                    * its components should be strictly positive
                    * squared inverse of this diagonal matrix is used as damping
                      factor for covariance matrix (linear and nonlinear problems)
                    * for nonlinear problems, when scale of the variables is usually
                      explicitly given by user, you may use scale vector for this
                    * for linear problems you may set this parameter to
                    * this parameter is automatically rescaled by this function,
                      only relative magnitudes of its components (with respect to
                      each other) matter.
            N   -   number of points, N>0.
            K   -   number of dimensions
            Rep -   structure which is used to store results
            Z   -   additional matrix which, depending on ZKind, may contain some
                    information used to accelerate calculations - or just can be
                    temporary buffer:
                    * for ZKind=0       Z contains no information, just temporary
                                        buffer which can be resized and used as needed
                    * for ZKind=1       Z contains triangular matrix from QR
                                        decomposition of W*F1. This matrix can be used
                                        to speedup calculation of covariance matrix.
                                        It should not be changed by algorithm.
            ZKind-  contents of Z


        * Rep.CovPar        covariance matrix for parameters, array[K,K].
        * Rep.ErrPar        errors in parameters, array[K],
                            errpar = sqrt(diag(CovPar))
        * Rep.ErrCurve      vector of fit errors - standard deviations of empirical
                            best-fit curve from "ideal" best-fit curve built  with
                            infinite number of samples, array[N].
                            errcurve = sqrt(diag(J*CovPar*J')),
                            where J is Jacobian matrix.
        * Rep.Noise         vector of per-point estimates of noise, array[N]
        * Rep.R2            coefficient of determination (non-weighted)

        Other fields of Rep are not changed.

        IMPORTANT:  errors  in  parameters  are  calculated  without  taking  into
                    account boundary/linear constraints! Presence  of  constraints
                    changes distribution of errors, but there is no  easy  way  to
                    account for constraints when you calculate covariance matrix.
        NOTE:       noise in the data is estimated as follows:
                    * for fitting without user-supplied  weights  all  points  are
                      assumed to have same level of noise, which is estimated from
                      the data
                    * for fitting with user-supplied weights we assume that  noise
                      level in I-th point is inversely proportional to Ith weight.
                      Coefficient of proportionality is estimated from the data.
        NOTE:       we apply small amount of regularization when we invert squared
                    Jacobian and calculate covariance matrix. It  guarantees  that
                    algorithm won't divide by zero  during  inversion,  but  skews
                    error estimates a bit (fractional error is about 10^-9).
                    However, we believe that this difference is insignificant  for
                    all practical purposes except for the situation when you  want
                    to compare ALGLIB results with "reference"  implementation  up
                    to the last significant digit.

          -- ALGLIB PROJECT --
             Copyright 10.12.2009 by Bochkanov Sergey
        private static void estimateerrors(double[,] f1,
            double[] f0,
            double[] y,
            double[] w,
            double[] x,
            double[] s,
            int n,
            int k,
            lsfitreport rep,
            ref double[,] z,
            int zkind)
            int i = 0;
            int j = 0;
            int j1 = 0;
            double v = 0;
            double noisec = 0;
            int info = 0;
            matinv.matinvreport invrep = new matinv.matinvreport();
            int nzcnt = 0;
            double avg = 0;
            double rss = 0;
            double tss = 0;
            double sz = 0;
            double ss = 0;
            int i_ = 0;

            s = (double[])s.Clone();

            // Compute NZCnt - count of non-zero weights
            nzcnt = 0;
            for(i=0; i<=n-1; i++)
                if( (double)(w[i])!=(double)(0) )
                    nzcnt = nzcnt+1;
            // Compute R2
            if( nzcnt>0 )
                avg = 0.0;
                for(i=0; i<=n-1; i++)
                    if( (double)(w[i])!=(double)(0) )
                        avg = avg+y[i];
                avg = avg/nzcnt;
                rss = 0.0;
                tss = 0.0;
                for(i=0; i<=n-1; i++)
                    if( (double)(w[i])!=(double)(0) )
                        v = 0.0;
                        for(i_=0; i_<=k-1;i_++)
                            v += f1[i,i_]*x[i_];
                        v = v+f0[i];
                        rss = rss+math.sqr(v-y[i]);
                        tss = tss+math.sqr(y[i]-avg);
                if( (double)(tss)!=(double)(0) )
                    rep.r2 = Math.Max(1.0-rss/tss, 0.0);
                    rep.r2 = 1.0;
                rep.r2 = 0;
            // Compute estimate of proportionality between noise in the data and weights:
            //     NoiseC = mean(per-point-noise*per-point-weight)
            // Noise level (standard deviation) at each point is equal to NoiseC/W[I].
            if( nzcnt>k )
                noisec = 0.0;
                for(i=0; i<=n-1; i++)
                    if( (double)(w[i])!=(double)(0) )
                        v = 0.0;
                        for(i_=0; i_<=k-1;i_++)
                            v += f1[i,i_]*x[i_];
                        v = v+f0[i];
                        noisec = noisec+math.sqr((v-y[i])*w[i]);
                noisec = Math.Sqrt(noisec/(nzcnt-k));
                noisec = 0.0;
            // Two branches on noise level:
            // * NoiseC>0   normal situation
            // * NoiseC=0   degenerate case CovPar is filled by zeros
            apserv.rmatrixsetlengthatleast(ref rep.covpar, k, k);
            if( (double)(noisec)>(double)(0) )
                // Normal situation: non-zero noise level
                alglib.ap.assert(zkind==0 || zkind==1, "LSFit: internal error in EstimateErrors() function");
                if( zkind==0 )
                    // Z contains no additional information which can be used to speed up
                    // calculations. We have to calculate covariance matrix on our own:
                    // * Compute scaled Jacobian N*J, where N[i,i]=WCur[I]/NoiseC, store in Z
                    // * Compute Z'*Z, store in CovPar
                    // * Apply moderate regularization to CovPar and compute matrix inverse.
                    //   In case inverse failed, increase regularization parameter and try
                    //   again.
                    apserv.rmatrixsetlengthatleast(ref z, n, k);
                    for(i=0; i<=n-1; i++)
                        v = w[i]/noisec;
                        for(i_=0; i_<=k-1;i_++)
                            z[i,i_] = v*f1[i,i_];
                    // Convert S to automatically scaled damped matrix:
                    // * calculate SZ - sum of diagonal elements of Z'*Z
                    // * calculate SS - sum of diagonal elements of S^(-2)
                    // * overwrite S by (SZ/SS)*S^(-2)
                    // * now S has approximately same magnitude as giagonal of Z'*Z
                    sz = 0;
                    for(i=0; i<=n-1; i++)
                        for(j=0; j<=k-1; j++)
                            sz = sz+z[i,j]*z[i,j];
                    if( (double)(sz)==(double)(0) )
                        sz = 1;
                    ss = 0;
                    for(j=0; j<=k-1; j++)
                        ss = ss+1/math.sqr(s[j]);
                    for(j=0; j<=k-1; j++)
                        s[j] = sz/ss/math.sqr(s[j]);
                    // Calculate damped inverse inv(Z'*Z+S).
                    // We increase damping factor V until Z'*Z become well-conditioned.
                    v = 1.0E3*math.machineepsilon;
                        ablas.rmatrixsyrk(k, n, 1.0, z, 0, 0, 2, 0.0, rep.covpar, 0, 0, true);
                        for(i=0; i<=k-1; i++)
                            rep.covpar[i,i] = rep.covpar[i,i]+v*s[i];
                        matinv.spdmatrixinverse(ref rep.covpar, k, true, ref info, invrep);
                        v = 10*v;
                    while( info<=0 );
                    for(i=0; i<=k-1; i++)
                        for(j=i+1; j<=k-1; j++)
                            rep.covpar[j,i] = rep.covpar[i,j];
                if( zkind==1 )
                    // We can reuse additional information:
                    // * Z contains R matrix from QR decomposition of W*F1 
                    // * After multiplication by 1/NoiseC we get Z_mod = N*F1, where diag(N)=w[i]/NoiseC
                    // * Such triangular Z_mod is a Cholesky factor from decomposition of J'*N'*N*J.
                    //   Thus, we can calculate covariance matrix as inverse of the matrix given by
                    //   its Cholesky decomposition. It allow us to avoid time-consuming calculation
                    //   of J'*N'*N*J in CovPar - complexity is reduced from O(N*K^2) to O(K^3), which
                    //   is quite good because K is usually orders of magnitude smaller than N.
                    // First, convert S to automatically scaled damped matrix:
                    // * calculate SZ - sum of magnitudes of diagonal elements of Z/NoiseC
                    // * calculate SS - sum of diagonal elements of S^(-1)
                    // * overwrite S by (SZ/SS)*S^(-1)
                    // * now S has approximately same magnitude as giagonal of Z'*Z
                    sz = 0;
                    for(j=0; j<=k-1; j++)
                        sz = sz+Math.Abs(z[j,j]/noisec);
                    if( (double)(sz)==(double)(0) )
                        sz = 1;
                    ss = 0;
                    for(j=0; j<=k-1; j++)
                        ss = ss+1/s[j];
                    for(j=0; j<=k-1; j++)
                        s[j] = sz/ss/s[j];
                    // Calculate damped inverse of inv((Z+v*S)'*(Z+v*S))
                    // We increase damping factor V until matrix become well-conditioned.
                    v = 1.0E3*math.machineepsilon;
                        for(i=0; i<=k-1; i++)
                            for(j=i; j<=k-1; j++)
                                rep.covpar[i,j] = z[i,j]/noisec;
                            rep.covpar[i,i] = rep.covpar[i,i]+v*s[i];
                        matinv.spdmatrixcholeskyinverse(ref rep.covpar, k, true, ref info, invrep);
                        v = 10*v;
                    while( info<=0 );
                    for(i=0; i<=k-1; i++)
                        for(j=i+1; j<=k-1; j++)
                            rep.covpar[j,i] = rep.covpar[i,j];
                // Degenerate situation: zero noise level, covariance matrix is zero.
                for(i=0; i<=k-1; i++)
                    for(j=0; j<=k-1; j++)
                        rep.covpar[j,i] = 0;
            // Estimate erorrs in parameters, curve and per-point noise
            apserv.rvectorsetlengthatleast(ref rep.errpar, k);
            apserv.rvectorsetlengthatleast(ref rep.errcurve, n);
            apserv.rvectorsetlengthatleast(ref rep.noise, n);
            for(i=0; i<=k-1; i++)
                rep.errpar[i] = Math.Sqrt(rep.covpar[i,i]);
            for(i=0; i<=n-1; i++)
                // ErrCurve[I] is sqrt(P[i,i]) where P=J*CovPar*J'
                v = 0.0;
                for(j=0; j<=k-1; j++)
                    for(j1=0; j1<=k-1; j1++)
                        v = v+f1[i,j]*rep.covpar[j,j1]*f1[i,j1];
                rep.errcurve[i] = Math.Sqrt(v);
                // Noise[i] is filled using weights and current estimate of noise level
                if( (double)(w[i])!=(double)(0) )
                    rep.noise[i] = noisec/w[i];
                    rep.noise[i] = 0;
Example #10
 public override void init()
     s = new double[0];
     bndl = new double[0];
     bndu = new double[0];
     taskx = new double[0,0];
     tasky = new double[0];
     taskw = new double[0];
     x = new double[0];
     c = new double[0];
     g = new double[0];
     h = new double[0,0];
     wcur = new double[0];
     tmp = new double[0];
     tmpf = new double[0];
     tmpjac = new double[0,0];
     tmpjacw = new double[0,0];
     invrep = new matinv.matinvreport();
     rep = new lsfitreport();
     optstate = new minlm.minlmstate();
     optrep = new minlm.minlmreport();
     rstate = new rcommstate();
Example #11
        *  Neural network training  using  modified  Levenberg-Marquardt  with  exact
        *  Hessian calculation and regularization. Subroutine trains  neural  network
        *  with restarts from random positions. Algorithm is well  suited  for  small
        *  and medium scale problems (hundreds of weights).
        *   Network     -   neural network with initialized geometry
        *   XY          -   training set
        *   NPoints     -   training set size
        *   Decay       -   weight decay constant, >=0.001
        *                   Decay term 'Decay*||Weights||^2' is added to error
        *                   function.
        *                   If you don't know what Decay to choose, use 0.001.
        *   Restarts    -   number of restarts from random position, >0.
        *                   If you don't know what Restarts to choose, use 2.
        *   Network     -   trained neural network.
        *   Info        -   return code:
        * -9, if internal matrix inverse subroutine failed
        * -2, if there is a point with class number
        *                         outside of [0..NOut-1].
        * -1, if wrong parameters specified
        *                         (NPoints<0, Restarts<1).
        *  2, if task has been solved.
        *   Rep         -   training report
        *  -- ALGLIB --
        *    Copyright 10.03.2009 by Bochkanov Sergey
        public static void mlptrainlm(ref mlpbase.multilayerperceptron network,
                                      ref double[,] xy,
                                      int npoints,
                                      double decay,
                                      int restarts,
                                      ref int info,
                                      ref mlpreport rep)
            int    nin       = 0;
            int    nout      = 0;
            int    wcount    = 0;
            double lmftol    = 0;
            double lmsteptol = 0;
            int    i         = 0;
            int    k         = 0;
            double v         = 0;
            double e         = 0;
            double enew      = 0;
            double xnorm2    = 0;
            double stepnorm  = 0;

            double[] g = new double[0];
            double[] d = new double[0];
            double[,] h    = new double[0, 0];
            double[,] hmod = new double[0, 0];
            double[,] z    = new double[0, 0];
            bool   spd        = new bool();
            double nu         = 0;
            double lambda     = 0;
            double lambdaup   = 0;
            double lambdadown = 0;

            minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport();
            minlbfgs.minlbfgsstate  state       = new minlbfgs.minlbfgsstate();
            double[] x     = new double[0];
            double[] y     = new double[0];
            double[] wbase = new double[0];
            double[] wdir  = new double[0];
            double[] wt    = new double[0];
            double[] wx    = new double[0];
            int      pass  = 0;

            double[] wbest   = new double[0];
            double   ebest   = 0;
            int      invinfo = 0;

            matinv.matinvreport invrep = new matinv.matinvreport();
            int solverinfo             = 0;

            densesolver.densesolverreport solverrep = new densesolver.densesolverreport();
            int i_ = 0;

            mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount);
            lambdaup   = 10;
            lambdadown = 0.3;
            lmftol     = 0.001;
            lmsteptol  = 0.001;

            // Test for inputs
            if (npoints <= 0 | restarts < 1)
                info = -1;
            if (mlpbase.mlpissoftmax(ref network))
                for (i = 0; i <= npoints - 1; i++)
                    if ((int)Math.Round(xy[i, nin]) < 0 | (int)Math.Round(xy[i, nin]) >= nout)
                        info = -2;
            decay = Math.Max(decay, mindecay);
            info  = 2;

            // Initialize data
            rep.ngrad     = 0;
            rep.nhess     = 0;
            rep.ncholesky = 0;

            // General case.
            // Prepare task and network. Allocate space.
            mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints);
            g     = new double[wcount - 1 + 1];
            h     = new double[wcount - 1 + 1, wcount - 1 + 1];
            hmod  = new double[wcount - 1 + 1, wcount - 1 + 1];
            wbase = new double[wcount - 1 + 1];
            wdir  = new double[wcount - 1 + 1];
            wbest = new double[wcount - 1 + 1];
            wt    = new double[wcount - 1 + 1];
            wx    = new double[wcount - 1 + 1];
            ebest = AP.Math.MaxRealNumber;

            // Multiple passes
            for (pass = 1; pass <= restarts; pass++)
                // Initialize weights
                mlpbase.mlprandomize(ref network);

                // First stage of the hybrid algorithm: LBFGS
                for (i_ = 0; i_ <= wcount - 1; i_++)
                    wbase[i_] = network.weights[i_];
                minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), ref wbase, ref state);
                minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, Math.Max(25, wcount));
                while (minlbfgs.minlbfgsiteration(ref state))
                    // gradient
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        network.weights[i_] = state.x[i_];
                    mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g);

                    // weight decay
                    v = 0.0;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        v += network.weights[i_] * network.weights[i_];
                    state.f = state.f + 0.5 * decay * v;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        state.g[i_] = state.g[i_] + decay * network.weights[i_];

                    // next iteration
                    rep.ngrad = rep.ngrad + 1;
                minlbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep);
                for (i_ = 0; i_ <= wcount - 1; i_++)
                    network.weights[i_] = wbase[i_];

                // Second stage of the hybrid algorithm: LM
                // Initialize H with identity matrix,
                // G with gradient,
                // E with regularized error.
                mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
                v = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                    v += network.weights[i_] * network.weights[i_];
                e = e + 0.5 * decay * v;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                    g[i_] = g[i_] + decay * network.weights[i_];
                for (k = 0; k <= wcount - 1; k++)
                    h[k, k] = h[k, k] + decay;
                rep.nhess = rep.nhess + 1;
                lambda    = 0.001;
                nu        = 2;
                while (true)
                    // 1. HMod = H+lambda*I
                    // 2. Try to solve (H+Lambda*I)*dx = -g.
                    //    Increase lambda if left part is not positive definite.
                    for (i = 0; i <= wcount - 1; i++)
                        for (i_ = 0; i_ <= wcount - 1; i_++)
                            hmod[i, i_] = h[i, i_];
                        hmod[i, i] = hmod[i, i] + lambda;
                    spd           = trfac.spdmatrixcholesky(ref hmod, wcount, true);
                    rep.ncholesky = rep.ncholesky + 1;
                    if (!spd)
                        lambda = lambda * lambdaup * nu;
                        nu     = nu * 2;
                    densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir);
                    if (solverinfo < 0)
                        lambda = lambda * lambdaup * nu;
                        nu     = nu * 2;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        wdir[i_] = -1 * wdir[i_];

                    // Lambda found.
                    // 1. Save old w in WBase
                    // 1. Test some stopping criterions
                    // 2. If error(w+wdir)>error(w), increase lambda
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        network.weights[i_] = network.weights[i_] + wdir[i_];
                    xnorm2 = 0.0;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        xnorm2 += network.weights[i_] * network.weights[i_];
                    stepnorm = 0.0;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        stepnorm += wdir[i_] * wdir[i_];
                    stepnorm = Math.Sqrt(stepnorm);
                    enew     = mlpbase.mlperror(ref network, ref xy, npoints) + 0.5 * decay * xnorm2;
                    if ((double)(stepnorm) < (double)(lmsteptol * (1 + Math.Sqrt(xnorm2))))
                    if ((double)(enew) > (double)(e))
                        lambda = lambda * lambdaup * nu;
                        nu     = nu * 2;

                    // Optimize using inv(cholesky(H)) as preconditioner
                    matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, ref invrep);
                    if (invinfo <= 0)
                        // if matrix can't be inverted then exit with errors
                        // TODO: make WCount steps in direction suggested by HMod
                        info = -9;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        wbase[i_] = network.weights[i_];
                    for (i = 0; i <= wcount - 1; i++)
                        wt[i] = 0;
                    minlbfgs.minlbfgscreatex(wcount, wcount, ref wt, 1, ref state);
                    minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 5);
                    while (minlbfgs.minlbfgsiteration(ref state))
                        // gradient
                        for (i = 0; i <= wcount - 1; i++)
                            v = 0.0;
                            for (i_ = i; i_ <= wcount - 1; i_++)
                                v += state.x[i_] * hmod[i, i_];
                            network.weights[i] = wbase[i] + v;
                        mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g);
                        for (i = 0; i <= wcount - 1; i++)
                            state.g[i] = 0;
                        for (i = 0; i <= wcount - 1; i++)
                            v = g[i];
                            for (i_ = i; i_ <= wcount - 1; i_++)
                                state.g[i_] = state.g[i_] + v * hmod[i, i_];

                        // weight decay
                        // grad(x'*x) = A'*(x0+A*t)
                        v = 0.0;
                        for (i_ = 0; i_ <= wcount - 1; i_++)
                            v += network.weights[i_] * network.weights[i_];
                        state.f = state.f + 0.5 * decay * v;
                        for (i = 0; i <= wcount - 1; i++)
                            v = decay * network.weights[i];
                            for (i_ = i; i_ <= wcount - 1; i_++)
                                state.g[i_] = state.g[i_] + v * hmod[i, i_];

                        // next iteration
                        rep.ngrad = rep.ngrad + 1;
                    minlbfgs.minlbfgsresults(ref state, ref wt, ref internalrep);

                    // Accept new position.
                    // Calculate Hessian
                    for (i = 0; i <= wcount - 1; i++)
                        v = 0.0;
                        for (i_ = i; i_ <= wcount - 1; i_++)
                            v += wt[i_] * hmod[i, i_];
                        network.weights[i] = wbase[i] + v;
                    mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h);
                    v = 0.0;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        v += network.weights[i_] * network.weights[i_];
                    e = e + 0.5 * decay * v;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        g[i_] = g[i_] + decay * network.weights[i_];
                    for (k = 0; k <= wcount - 1; k++)
                        h[k, k] = h[k, k] + decay;
                    rep.nhess = rep.nhess + 1;

                    // Update lambda
                    lambda = lambda * lambdadown;
                    nu     = 2;

                // update WBest
                v = 0.0;
                for (i_ = 0; i_ <= wcount - 1; i_++)
                    v += network.weights[i_] * network.weights[i_];
                e = 0.5 * decay * v + mlpbase.mlperror(ref network, ref xy, npoints);
                if ((double)(e) < (double)(ebest))
                    ebest = e;
                    for (i_ = 0; i_ <= wcount - 1; i_++)
                        wbest[i_] = network.weights[i_];

            // copy WBest to output
            for (i_ = 0; i_ <= wcount - 1; i_++)
                network.weights[i_] = wbest[i_];
Example #12
        *  Algorithm for reduction of the following generalized symmetric positive-
        *  definite eigenvalue problem:
        *   A*x = lambda*B*x (1) or
        *   A*B*x = lambda*x (2) or
        *   B*A*x = lambda*x (3)
        *  to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and
        *  the given problems are the same, and the eigenvectors of the given problem
        *  could be obtained by multiplying the obtained eigenvectors by the
        *  transformation matrix x = R*y).
        *  Here A is a symmetric matrix, B - symmetric positive-definite matrix.
        *  Input parameters:
        *   A           -   symmetric matrix which is given by its upper or lower
        *                   triangular part.
        *                   Array whose indexes range within [0..N-1, 0..N-1].
        *   N           -   size of matrices A and B.
        *   IsUpperA    -   storage format of matrix A.
        *   B           -   symmetric positive-definite matrix which is given by
        *                   its upper or lower triangular part.
        *                   Array whose indexes range within [0..N-1, 0..N-1].
        *   IsUpperB    -   storage format of matrix B.
        *   ProblemType -   if ProblemType is equal to:
        * 1, the following problem is solved: A*x = lambda*B*x;
        * 2, the following problem is solved: A*B*x = lambda*x;
        * 3, the following problem is solved: B*A*x = lambda*x.
        *  Output parameters:
        *   A           -   symmetric matrix which is given by its upper or lower
        *                   triangle depending on IsUpperA. Contains matrix C.
        *                   Array whose indexes range within [0..N-1, 0..N-1].
        *   R           -   upper triangular or low triangular transformation matrix
        *                   which is used to obtain the eigenvectors of a given problem
        *                   as the product of eigenvectors of C (from the right) and
        *                   matrix R (from the left). If the matrix is upper
        *                   triangular, the elements below the main diagonal
        *                   are equal to 0 (and vice versa). Thus, we can perform
        *                   the multiplication without taking into account the
        *                   internal structure (which is an easier though less
        *                   effective way).
        *                   Array whose indexes range within [0..N-1, 0..N-1].
        *   IsUpperR    -   type of matrix R (upper or lower triangular).
        *  Result:
        *   True, if the problem was reduced successfully.
        *   False, if the error occurred during the Cholesky decomposition of
        *       matrix B (the matrix is not positive-definite).
        *  -- ALGLIB --
        *    Copyright 1.28.2006 by Bochkanov Sergey
        public static bool smatrixgevdreduce(ref double[,] a,
                                             int n,
                                             bool isuppera,
                                             ref double[,] b,
                                             bool isupperb,
                                             int problemtype,
                                             ref double[,] r,
                                             ref bool isupperr)
            bool result = new bool();

            double[,] t = new double[0, 0];
            double[] w1 = new double[0];
            double[] w2 = new double[0];
            double[] w3 = new double[0];
            int      i  = 0;
            int      j  = 0;
            double   v  = 0;

            matinv.matinvreport rep = new matinv.matinvreport();
            int info = 0;
            int i_   = 0;
            int i1_  = 0;

            System.Diagnostics.Debug.Assert(n > 0, "SMatrixGEVDReduce: N<=0!");
            System.Diagnostics.Debug.Assert(problemtype == 1 | problemtype == 2 | problemtype == 3, "SMatrixGEVDReduce: incorrect ProblemType!");
            result = true;

            // Problem 1:  A*x = lambda*B*x
            // Reducing to:
            //     C*y = lambda*y
            //     C = L^(-1) * A * L^(-T)
            //     x = L^(-T) * y
            if (problemtype == 1)
                // Factorize B in T: B = LL'
                t = new double[n - 1 + 1, n - 1 + 1];
                if (isupperb)
                    for (i = 0; i <= n - 1; i++)
                        for (i_ = i; i_ <= n - 1; i_++)
                            t[i_, i] = b[i, i_];
                    for (i = 0; i <= n - 1; i++)
                        for (i_ = 0; i_ <= i; i_++)
                            t[i, i_] = b[i, i_];
                if (!trfac.spdmatrixcholesky(ref t, n, false))
                    result = false;

                // Invert L in T
                matinv.rmatrixtrinverse(ref t, n, false, false, ref info, ref rep);
                if (info <= 0)
                    result = false;

                // Build L^(-1) * A * L^(-T) in R
                w1 = new double[n + 1];
                w2 = new double[n + 1];
                r  = new double[n - 1 + 1, n - 1 + 1];
                for (j = 1; j <= n; j++)
                    // Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T))
                    i1_ = (0) - (1);
                    for (i_ = 1; i_ <= j; i_++)
                        w1[i_] = t[j - 1, i_ + i1_];
                    sblas.symmetricmatrixvectormultiply(ref a, isuppera, 0, j - 1, ref w1, 1.0, ref w2);
                    if (isuppera)
                        blas.matrixvectormultiply(ref a, 0, j - 1, j, n - 1, true, ref w1, 1, j, 1.0, ref w2, j + 1, n, 0.0);
                        blas.matrixvectormultiply(ref a, j, n - 1, 0, j - 1, false, ref w1, 1, j, 1.0, ref w2, j + 1, n, 0.0);

                    // Form l(i)*w2 (here l(i) is i-th row of L^(-1))
                    for (i = 1; i <= n; i++)
                        i1_ = (1) - (0);
                        v   = 0.0;
                        for (i_ = 0; i_ <= i - 1; i_++)
                            v += t[i - 1, i_] * w2[i_ + i1_];
                        r[i - 1, j - 1] = v;

                // Copy R to A
                for (i = 0; i <= n - 1; i++)
                    for (i_ = 0; i_ <= n - 1; i_++)
                        a[i, i_] = r[i, i_];

                // Copy L^(-1) from T to R and transpose
                isupperr = true;
                for (i = 0; i <= n - 1; i++)
                    for (j = 0; j <= i - 1; j++)
                        r[i, j] = 0;
                for (i = 0; i <= n - 1; i++)
                    for (i_ = i; i_ <= n - 1; i_++)
                        r[i, i_] = t[i_, i];

            // Problem 2:  A*B*x = lambda*x
            // or
            // problem 3:  B*A*x = lambda*x
            // Reducing to:
            //     C*y = lambda*y
            //     C = U * A * U'
            //     B = U'* U
            if (problemtype == 2 | problemtype == 3)
                // Factorize B in T: B = U'*U
                t = new double[n - 1 + 1, n - 1 + 1];
                if (isupperb)
                    for (i = 0; i <= n - 1; i++)
                        for (i_ = i; i_ <= n - 1; i_++)
                            t[i, i_] = b[i, i_];
                    for (i = 0; i <= n - 1; i++)
                        for (i_ = i; i_ <= n - 1; i_++)
                            t[i, i_] = b[i_, i];
                if (!trfac.spdmatrixcholesky(ref t, n, true))
                    result = false;

                // Build U * A * U' in R
                w1 = new double[n + 1];
                w2 = new double[n + 1];
                w3 = new double[n + 1];
                r  = new double[n - 1 + 1, n - 1 + 1];
                for (j = 1; j <= n; j++)
                    // Form w2 = A * u'(j) (here u'(j) is j-th column of U')
                    i1_ = (j - 1) - (1);
                    for (i_ = 1; i_ <= n - j + 1; i_++)
                        w1[i_] = t[j - 1, i_ + i1_];
                    sblas.symmetricmatrixvectormultiply(ref a, isuppera, j - 1, n - 1, ref w1, 1.0, ref w3);
                    i1_ = (1) - (j);
                    for (i_ = j; i_ <= n; i_++)
                        w2[i_] = w3[i_ + i1_];
                    i1_ = (j - 1) - (j);
                    for (i_ = j; i_ <= n; i_++)
                        w1[i_] = t[j - 1, i_ + i1_];
                    if (isuppera)
                        blas.matrixvectormultiply(ref a, 0, j - 2, j - 1, n - 1, false, ref w1, j, n, 1.0, ref w2, 1, j - 1, 0.0);
                        blas.matrixvectormultiply(ref a, j - 1, n - 1, 0, j - 2, true, ref w1, j, n, 1.0, ref w2, 1, j - 1, 0.0);

                    // Form u(i)*w2 (here u(i) is i-th row of U)
                    for (i = 1; i <= n; i++)
                        i1_ = (i) - (i - 1);
                        v   = 0.0;
                        for (i_ = i - 1; i_ <= n - 1; i_++)
                            v += t[i - 1, i_] * w2[i_ + i1_];
                        r[i - 1, j - 1] = v;

                // Copy R to A
                for (i = 0; i <= n - 1; i++)
                    for (i_ = 0; i_ <= n - 1; i_++)
                        a[i, i_] = r[i, i_];
                if (problemtype == 2)
                    // Invert U in T
                    matinv.rmatrixtrinverse(ref t, n, true, false, ref info, ref rep);
                    if (info <= 0)
                        result = false;

                    // Copy U^-1 from T to R
                    isupperr = true;
                    for (i = 0; i <= n - 1; i++)
                        for (j = 0; j <= i - 1; j++)
                            r[i, j] = 0;
                    for (i = 0; i <= n - 1; i++)
                        for (i_ = i; i_ <= n - 1; i_++)
                            r[i, i_] = t[i, i_];
                    // Copy U from T to R and transpose
                    isupperr = false;
                    for (i = 0; i <= n - 1; i++)
                        for (j = i + 1; j <= n - 1; j++)
                            r[i, j] = 0;
                    for (i = 0; i <= n - 1; i++)
                        for (i_ = i; i_ <= n - 1; i_++)
                            r[i_, i] = t[i, i_];