/************************************************************************* Neural network training using modified Levenberg-Marquardt with exact Hessian calculation and regularization. Subroutine trains neural network with restarts from random positions. Algorithm is well suited for small and medium scale problems (hundreds of weights). INPUT PARAMETERS: Network - neural network with initialized geometry XY - training set NPoints - training set size Decay - weight decay constant, >=0.001 Decay term 'Decay*||Weights||^2' is added to error function. If you don't know what Decay to choose, use 0.001. Restarts - number of restarts from random position, >0. If you don't know what Restarts to choose, use 2. OUTPUT PARAMETERS: Network - trained neural network. Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number outside of [0..NOut-1]. * -1, if wrong parameters specified (NPoints<0, Restarts<1). * 2, if task has been solved. Rep - training report -- ALGLIB -- Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(mlpbase.multilayerperceptron network, double[,] xy, int npoints, double decay, int restarts, ref int info, mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0,0]; double[,] hmod = new double[0,0]; double[,] z = new double[0,0]; bool spd = new bool(); double nu = 0; double lambdav = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; info = 0; mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if( npoints<=0 || restarts<1 ) { info = -1; return; } if( mlpbase.mlpissoftmax(network) ) { for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nin])<0 || (int)Math.Round(xy[i,nin])>=nout ) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(network, xy, npoints); g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; hmod = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; wbest = new double[wcount-1+1]; wt = new double[wcount-1+1]; wx = new double[wcount-1+1]; ebest = math.maxrealnumber; // // Multiple passes // for(pass=1; pass<=restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(network); // // First stage of the hybrid algorithm: LBFGS // for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), wbase, state); minlbfgs.minlbfgssetcond(state, 0, 0, 0, Math.Max(25, wcount)); while( minlbfgs.minlbfgsiteration(state) ) { // // gradient // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + decay*network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref wbase, internalrep); for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; lambdav = 0.001; nu = 2; while( true ) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for(i=0; i<=wcount-1; i++) { for(i_=0; i_<=wcount-1;i_++) { hmod[i,i_] = h[i,i_]; } hmod[i,i] = hmod[i,i]+lambdav; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky+1; if( !spd ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } densesolver.spdmatrixcholeskysolve(hmod, wcount, true, g, ref solverinfo, solverrep, ref wdir); if( solverinfo<0 ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for(i_=0; i_<=wcount-1;i_++) { xnorm2 += network.weights[i_]*network.weights[i_]; } stepnorm = 0.0; for(i_=0; i_<=wcount-1;i_++) { stepnorm += wdir[i_]*wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(network, xy, npoints)+0.5*decay*xnorm2; if( (double)(stepnorm)<(double)(lmsteptol*(1+Math.Sqrt(xnorm2))) ) { break; } if( (double)(enew)>(double)(e) ) { lambdav = lambdav*lambdaup*nu; nu = nu*2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, invrep); if( invinfo<=0 ) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for(i_=0; i_<=wcount-1;i_++) { wbase[i_] = network.weights[i_]; } for(i=0; i<=wcount-1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, wt, 1, 0.0, state); minlbfgs.minlbfgssetcond(state, 0, 0, 0, 5); while( minlbfgs.minlbfgsiteration(state) ) { // // gradient // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += state.x[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlpgradbatch(network, xy, npoints, ref state.f, ref g); for(i=0; i<=wcount-1; i++) { state.g[i] = 0; } for(i=0; i<=wcount-1; i++) { v = g[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } state.f = state.f+0.5*decay*v; for(i=0; i<=wcount-1; i++) { v = decay*network.weights[i]; for(i_=i; i_<=wcount-1;i_++) { state.g[i_] = state.g[i_] + v*hmod[i,i_]; } } // // next iteration // rep.ngrad = rep.ngrad+1; } minlbfgs.minlbfgsresults(state, ref wt, internalrep); // // Accept new position. // Calculate Hessian // for(i=0; i<=wcount-1; i++) { v = 0.0; for(i_=i; i_<=wcount-1;i_++) { v += wt[i_]*hmod[i,i_]; } network.weights[i] = wbase[i]+v; } mlpbase.mlphessianbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Update lambda // lambdav = lambdav*lambdadown; nu = 2; } // // update WBest // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = 0.5*decay*v+mlpbase.mlperror(network, xy, npoints); if( (double)(e)<(double)(ebest) ) { ebest = e; for(i_=0; i_<=wcount-1;i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for(i_=0; i_<=wcount-1;i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* HPD test *************************************************************************/ private static void testhpdinv(int maxn, int passcount, double threshold, ref bool hpderrors) { complex[,] a = new complex[0,0]; complex[,] cha = new complex[0,0]; complex[,] inva = new complex[0,0]; complex[,] invcha = new complex[0,0]; bool isupper = new bool(); int i = 0; int j = 0; int k = 0; int n = 0; int pass = 0; int taskkind = 0; int info = 0; matinv.matinvreport rep = new matinv.matinvreport(); int i_ = 0; // // General square matrices: // * test general solvers // * test least squares solver // for(pass=1; pass<=passcount; pass++) { for(n=1; n<=maxn; n++) { isupper = (double)(math.randomreal())>(double)(0.5); // // ******************************************************** // WELL CONDITIONED TASKS // ability to find correct solution is tested // ******************************************************** // // 1. generate random well conditioned matrix A. // 2. generate random solution vector xe // 3. generate right part b=A*xe // 4. test different methods on original A // matgen.hpdmatrixrndcond(n, 1000, ref a); cmatrixdrophalf(ref a, n, isupper); cmatrixmakeacopy(a, n, n, ref cha); if( !trfac.hpdmatrixcholesky(ref cha, n, isupper) ) { continue; } cmatrixmakeacopy(a, n, n, ref inva); cmatrixmakeacopy(cha, n, n, ref invcha); info = 0; unsetrep(rep); matinv.hpdmatrixinverse(ref inva, n, isupper, ref info, rep); hpderrors = hpderrors | !hpdmatrixcheckinverse(a, inva, isupper, n, threshold, info, rep); info = 0; unsetrep(rep); matinv.hpdmatrixcholeskyinverse(ref invcha, n, isupper, ref info, rep); hpderrors = hpderrors | !hpdmatrixcheckinverse(a, invcha, isupper, n, threshold, info, rep); // // ******************************************************** // EXACTLY SINGULAR MATRICES // ability to detect singularity is tested // ******************************************************** // // 1. generate different types of singular matrices: // * zero // * with zero columns // * with zero rows // 2. test different methods // for(taskkind=0; taskkind<=2; taskkind++) { cunset2d(ref a); if( taskkind==0 ) { // // all zeros // a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } } if( taskkind==1 ) { // // there is zero column // a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j].x = 2*math.randomreal()-1; a[i,j].y = 2*math.randomreal()-1; } } k = math.randominteger(n); for(i_=0; i_<=n-1;i_++) { a[i_,k] = 0*a[i_,k]; } for(i_=0; i_<=n-1;i_++) { a[k,i_] = 0*a[k,i_]; } } if( taskkind==2 ) { // // there is zero row // a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j].x = 2*math.randomreal()-1; a[i,j].y = 2*math.randomreal()-1; } } k = math.randominteger(n); for(i_=0; i_<=n-1;i_++) { a[k,i_] = 0*a[k,i_]; } for(i_=0; i_<=n-1;i_++) { a[i_,k] = 0*a[i_,k]; } } info = 0; unsetrep(rep); matinv.hpdmatrixcholeskyinverse(ref a, n, isupper, ref info, rep); if( info!=-3 & info!=1 ) { hpderrors = true; } else { hpderrors = (hpderrors | (double)(rep.r1)<(double)(0)) | (double)(rep.r1)>(double)(1000*math.machineepsilon); hpderrors = (hpderrors | (double)(rep.rinf)<(double)(0)) | (double)(rep.rinf)>(double)(1000*math.machineepsilon); } } } } }
public minlmstate() { x = new double[0]; fi = new double[0]; j = new double[0,0]; h = new double[0,0]; g = new double[0]; internalstate = new minlbfgs.minlbfgsstate(); internalrep = new minlbfgs.minlbfgsreport(); xprec = new double[0]; xbase = new double[0]; xdir = new double[0]; gbase = new double[0]; xprev = new double[0]; rawmodel = new double[0,0]; model = new double[0,0]; work = new double[0]; rstate = new rcommstate(); solverrep = new densesolver.densesolverreport(); invrep = new matinv.matinvreport(); }
/************************************************************************* Complex TR inverse *************************************************************************/ private static void testctrinv(int maxn, int passcount, double threshold, ref bool ctrerrors) { complex[,] a = new complex[0,0]; complex[,] b = new complex[0,0]; int n = 0; int pass = 0; int i = 0; int j = 0; int task = 0; bool isupper = new bool(); bool isunit = new bool(); complex v = 0; bool waserrors = new bool(); int info = 0; matinv.matinvreport rep = new matinv.matinvreport(); int i_ = 0; waserrors = false; // // Test // for(n=1; n<=maxn; n++) { a = new complex[n, n]; b = new complex[n, n]; for(task=0; task<=3; task++) { for(pass=1; pass<=passcount; pass++) { // // Determine task // isupper = task%2==0; isunit = task/2%2==0; // // Generate matrix // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( i==j ) { a[i,i].x = 1+math.randomreal(); a[i,i].y = 1+math.randomreal(); } else { a[i,j].x = 0.2*math.randomreal()-0.1; a[i,j].y = 0.2*math.randomreal()-0.1; } b[i,j] = a[i,j]; } } // // Inverse // matinv.cmatrixtrinverse(ref b, n, isupper, isunit, ref info, rep); if( info<=0 ) { ctrerrors = true; return; } // // Structural test // if( isunit ) { for(i=0; i<=n-1; i++) { ctrerrors = ctrerrors | a[i,i]!=b[i,i]; } } if( isupper ) { for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { ctrerrors = ctrerrors | a[i,j]!=b[i,j]; } } } else { for(i=0; i<=n-1; i++) { for(j=i+1; j<=n-1; j++) { ctrerrors = ctrerrors | a[i,j]!=b[i,j]; } } } // // Inverse test // for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { if( (j<i & isupper) | (j>i & !isupper) ) { a[i,j] = 0; b[i,j] = 0; } } } if( isunit ) { for(i=0; i<=n-1; i++) { a[i,i] = 1; b[i,i] = 1; } } for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { v = 0.0; for(i_=0; i_<=n-1;i_++) { v += a[i,i_]*b[i_,j]; } if( j!=i ) { ctrerrors = ctrerrors | (double)(math.abscomplex(v))>(double)(threshold); } else { ctrerrors = ctrerrors | (double)(math.abscomplex(v-1))>(double)(threshold); } } } } } } }
/************************************************************************* Complex test *************************************************************************/ private static void testcinv(int maxn, int passcount, double threshold, ref bool cerrors) { complex[,] a = new complex[0,0]; complex[,] lua = new complex[0,0]; complex[,] inva = new complex[0,0]; complex[,] invlua = new complex[0,0]; int[] p = new int[0]; int i = 0; int j = 0; int k = 0; int n = 0; int pass = 0; int taskkind = 0; int info = 0; matinv.matinvreport rep = new matinv.matinvreport(); int i_ = 0; // // General square matrices: // * test general solvers // * test least squares solver // for(pass=1; pass<=passcount; pass++) { for(n=1; n<=maxn; n++) { // // ******************************************************** // WELL CONDITIONED TASKS // ability to find correct solution is tested // ******************************************************** // // 1. generate random well conditioned matrix A. // 2. generate random solution vector xe // 3. generate right part b=A*xe // 4. test different methods on original A // matgen.cmatrixrndcond(n, 1000, ref a); cmatrixmakeacopy(a, n, n, ref lua); trfac.cmatrixlu(ref lua, n, n, ref p); cmatrixmakeacopy(a, n, n, ref inva); cmatrixmakeacopy(lua, n, n, ref invlua); info = 0; unsetrep(rep); matinv.cmatrixinverse(ref inva, n, ref info, rep); cerrors = cerrors | !cmatrixcheckinverse(a, inva, n, threshold, info, rep); info = 0; unsetrep(rep); matinv.cmatrixluinverse(ref invlua, p, n, ref info, rep); cerrors = cerrors | !cmatrixcheckinverse(a, invlua, n, threshold, info, rep); // // ******************************************************** // EXACTLY SINGULAR MATRICES // ability to detect singularity is tested // ******************************************************** // // 1. generate different types of singular matrices: // * zero // * with zero columns // * with zero rows // * with equal rows/columns // 2. test different methods // for(taskkind=0; taskkind<=4; taskkind++) { cunset2d(ref a); if( taskkind==0 ) { // // all zeros // a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j] = 0; } } } if( taskkind==1 ) { // // there is zero column // a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j].x = 2*math.randomreal()-1; a[i,j].y = 2*math.randomreal()-1; } } k = math.randominteger(n); for(i_=0; i_<=n-1;i_++) { a[i_,k] = 0*a[i_,k]; } } if( taskkind==2 ) { // // there is zero row // a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j].x = 2*math.randomreal()-1; a[i,j].y = 2*math.randomreal()-1; } } k = math.randominteger(n); for(i_=0; i_<=n-1;i_++) { a[k,i_] = 0*a[k,i_]; } } if( taskkind==3 ) { // // equal columns // if( n<2 ) { continue; } a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j].x = 2*math.randomreal()-1; a[i,j].y = 2*math.randomreal()-1; } } k = 1+math.randominteger(n-1); for(i_=0; i_<=n-1;i_++) { a[i_,0] = a[i_,k]; } } if( taskkind==4 ) { // // equal rows // if( n<2 ) { continue; } a = new complex[n, n]; for(i=0; i<=n-1; i++) { for(j=0; j<=n-1; j++) { a[i,j].x = 2*math.randomreal()-1; a[i,j].y = 2*math.randomreal()-1; } } k = 1+math.randominteger(n-1); for(i_=0; i_<=n-1;i_++) { a[0,i_] = a[k,i_]; } } cmatrixmakeacopy(a, n, n, ref lua); trfac.cmatrixlu(ref lua, n, n, ref p); info = 0; unsetrep(rep); matinv.cmatrixinverse(ref a, n, ref info, rep); cerrors = cerrors | !cmatrixcheckinversesingular(a, n, threshold, info, rep); info = 0; unsetrep(rep); matinv.cmatrixluinverse(ref lua, p, n, ref info, rep); cerrors = cerrors | !cmatrixcheckinversesingular(lua, n, threshold, info, rep); } } } }
/************************************************************************* Algorithm for reduction of the following generalized symmetric positive- definite eigenvalue problem: A*x = lambda*B*x (1) or A*B*x = lambda*x (2) or B*A*x = lambda*x (3) to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and the given problems are the same, and the eigenvectors of the given problem could be obtained by multiplying the obtained eigenvectors by the transformation matrix x = R*y). Here A is a symmetric matrix, B - symmetric positive-definite matrix. Input parameters: A - symmetric matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrices A and B. IsUpperA - storage format of matrix A. B - symmetric positive-definite matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. IsUpperB - storage format of matrix B. ProblemType - if ProblemType is equal to: * 1, the following problem is solved: A*x = lambda*B*x; * 2, the following problem is solved: A*B*x = lambda*x; * 3, the following problem is solved: B*A*x = lambda*x. Output parameters: A - symmetric matrix which is given by its upper or lower triangle depending on IsUpperA. Contains matrix C. Array whose indexes range within [0..N-1, 0..N-1]. R - upper triangular or low triangular transformation matrix which is used to obtain the eigenvectors of a given problem as the product of eigenvectors of C (from the right) and matrix R (from the left). If the matrix is upper triangular, the elements below the main diagonal are equal to 0 (and vice versa). Thus, we can perform the multiplication without taking into account the internal structure (which is an easier though less effective way). Array whose indexes range within [0..N-1, 0..N-1]. IsUpperR - type of matrix R (upper or lower triangular). Result: True, if the problem was reduced successfully. False, if the error occurred during the Cholesky decomposition of matrix B (the matrix is not positive-definite). -- ALGLIB -- Copyright 1.28.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixgevdreduce(ref double[,] a, int n, bool isuppera, double[,] b, bool isupperb, int problemtype, ref double[,] r, ref bool isupperr) { bool result = new bool(); double[,] t = new double[0,0]; double[] w1 = new double[0]; double[] w2 = new double[0]; double[] w3 = new double[0]; int i = 0; int j = 0; double v = 0; matinv.matinvreport rep = new matinv.matinvreport(); int info = 0; int i_ = 0; int i1_ = 0; r = new double[0,0]; isupperr = new bool(); ap.assert(n>0, "SMatrixGEVDReduce: N<=0!"); ap.assert((problemtype==1 | problemtype==2) | problemtype==3, "SMatrixGEVDReduce: incorrect ProblemType!"); result = true; // // Problem 1: A*x = lambda*B*x // // Reducing to: // C*y = lambda*y // C = L^(-1) * A * L^(-T) // x = L^(-T) * y // if( problemtype==1 ) { // // Factorize B in T: B = LL' // t = new double[n-1+1, n-1+1]; if( isupperb ) { for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { t[i_,i] = b[i,i_]; } } } else { for(i=0; i<=n-1; i++) { for(i_=0; i_<=i;i_++) { t[i,i_] = b[i,i_]; } } } if( !trfac.spdmatrixcholesky(ref t, n, false) ) { result = false; return result; } // // Invert L in T // matinv.rmatrixtrinverse(ref t, n, false, false, ref info, rep); if( info<=0 ) { result = false; return result; } // // Build L^(-1) * A * L^(-T) in R // w1 = new double[n+1]; w2 = new double[n+1]; r = new double[n-1+1, n-1+1]; for(j=1; j<=n; j++) { // // Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T)) // i1_ = (0) - (1); for(i_=1; i_<=j;i_++) { w1[i_] = t[j-1,i_+i1_]; } sblas.symmetricmatrixvectormultiply(a, isuppera, 0, j-1, w1, 1.0, ref w2); if( isuppera ) { blas.matrixvectormultiply(a, 0, j-1, j, n-1, true, w1, 1, j, 1.0, ref w2, j+1, n, 0.0); } else { blas.matrixvectormultiply(a, j, n-1, 0, j-1, false, w1, 1, j, 1.0, ref w2, j+1, n, 0.0); } // // Form l(i)*w2 (here l(i) is i-th row of L^(-1)) // for(i=1; i<=n; i++) { i1_ = (1)-(0); v = 0.0; for(i_=0; i_<=i-1;i_++) { v += t[i-1,i_]*w2[i_+i1_]; } r[i-1,j-1] = v; } } // // Copy R to A // for(i=0; i<=n-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = r[i,i_]; } } // // Copy L^(-1) from T to R and transpose // isupperr = true; for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { r[i,j] = 0; } } for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i,i_] = t[i_,i]; } } return result; } // // Problem 2: A*B*x = lambda*x // or // problem 3: B*A*x = lambda*x // // Reducing to: // C*y = lambda*y // C = U * A * U' // B = U'* U // if( problemtype==2 | problemtype==3 ) { // // Factorize B in T: B = U'*U // t = new double[n-1+1, n-1+1]; if( isupperb ) { for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { t[i,i_] = b[i,i_]; } } } else { for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { t[i,i_] = b[i_,i]; } } } if( !trfac.spdmatrixcholesky(ref t, n, true) ) { result = false; return result; } // // Build U * A * U' in R // w1 = new double[n+1]; w2 = new double[n+1]; w3 = new double[n+1]; r = new double[n-1+1, n-1+1]; for(j=1; j<=n; j++) { // // Form w2 = A * u'(j) (here u'(j) is j-th column of U') // i1_ = (j-1) - (1); for(i_=1; i_<=n-j+1;i_++) { w1[i_] = t[j-1,i_+i1_]; } sblas.symmetricmatrixvectormultiply(a, isuppera, j-1, n-1, w1, 1.0, ref w3); i1_ = (1) - (j); for(i_=j; i_<=n;i_++) { w2[i_] = w3[i_+i1_]; } i1_ = (j-1) - (j); for(i_=j; i_<=n;i_++) { w1[i_] = t[j-1,i_+i1_]; } if( isuppera ) { blas.matrixvectormultiply(a, 0, j-2, j-1, n-1, false, w1, j, n, 1.0, ref w2, 1, j-1, 0.0); } else { blas.matrixvectormultiply(a, j-1, n-1, 0, j-2, true, w1, j, n, 1.0, ref w2, 1, j-1, 0.0); } // // Form u(i)*w2 (here u(i) is i-th row of U) // for(i=1; i<=n; i++) { i1_ = (i)-(i-1); v = 0.0; for(i_=i-1; i_<=n-1;i_++) { v += t[i-1,i_]*w2[i_+i1_]; } r[i-1,j-1] = v; } } // // Copy R to A // for(i=0; i<=n-1; i++) { for(i_=0; i_<=n-1;i_++) { a[i,i_] = r[i,i_]; } } if( problemtype==2 ) { // // Invert U in T // matinv.rmatrixtrinverse(ref t, n, true, false, ref info, rep); if( info<=0 ) { result = false; return result; } // // Copy U^-1 from T to R // isupperr = true; for(i=0; i<=n-1; i++) { for(j=0; j<=i-1; j++) { r[i,j] = 0; } } for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i,i_] = t[i,i_]; } } } else { // // Copy U from T to R and transpose // isupperr = false; for(i=0; i<=n-1; i++) { for(j=i+1; j<=n-1; j++) { r[i,j] = 0; } } for(i=0; i<=n-1; i++) { for(i_=i; i_<=n-1;i_++) { r[i_,i] = t[i,i_]; } } } } return result; }
public matinvreport(matinv.matinvreport obj) { _innerobj = obj; }
public matinvreport() { _innerobj = new matinv.matinvreport(); }
/************************************************************************* This internal function estimates covariance matrix and other error-related information for linear/nonlinear least squares model. It has a bit awkward interface, but it can be used for both linear and nonlinear problems. INPUT PARAMETERS: F1 - array[0..N-1,0..K-1]: * for linear problems - matrix of function values * for nonlinear problems - Jacobian matrix F0 - array[0..N-1]: * for linear problems - must be filled with zeros * for nonlinear problems - must store values of function being fitted Y - array[0..N-1]: * for linear and nonlinear problems - must store target values W - weights, array[0..N-1]: * for linear and nonlinear problems - weights X - array[0..K-1]: * for linear and nonlinear problems - current solution S - array[0..K-1]: * its components should be strictly positive * squared inverse of this diagonal matrix is used as damping factor for covariance matrix (linear and nonlinear problems) * for nonlinear problems, when scale of the variables is usually explicitly given by user, you may use scale vector for this parameter * for linear problems you may set this parameter to S=sqrt(1/diag(F'*F)) * this parameter is automatically rescaled by this function, only relative magnitudes of its components (with respect to each other) matter. N - number of points, N>0. K - number of dimensions Rep - structure which is used to store results Z - additional matrix which, depending on ZKind, may contain some information used to accelerate calculations - or just can be temporary buffer: * for ZKind=0 Z contains no information, just temporary buffer which can be resized and used as needed * for ZKind=1 Z contains triangular matrix from QR decomposition of W*F1. This matrix can be used to speedup calculation of covariance matrix. It should not be changed by algorithm. ZKind- contents of Z OUTPUT PARAMETERS: * Rep.CovPar covariance matrix for parameters, array[K,K]. * Rep.ErrPar errors in parameters, array[K], errpar = sqrt(diag(CovPar)) * Rep.ErrCurve vector of fit errors - standard deviations of empirical best-fit curve from "ideal" best-fit curve built with infinite number of samples, array[N]. errcurve = sqrt(diag(J*CovPar*J')), where J is Jacobian matrix. * Rep.Noise vector of per-point estimates of noise, array[N] * Rep.R2 coefficient of determination (non-weighted) Other fields of Rep are not changed. IMPORTANT: errors in parameters are calculated without taking into account boundary/linear constraints! Presence of constraints changes distribution of errors, but there is no easy way to account for constraints when you calculate covariance matrix. NOTE: noise in the data is estimated as follows: * for fitting without user-supplied weights all points are assumed to have same level of noise, which is estimated from the data * for fitting with user-supplied weights we assume that noise level in I-th point is inversely proportional to Ith weight. Coefficient of proportionality is estimated from the data. NOTE: we apply small amount of regularization when we invert squared Jacobian and calculate covariance matrix. It guarantees that algorithm won't divide by zero during inversion, but skews error estimates a bit (fractional error is about 10^-9). However, we believe that this difference is insignificant for all practical purposes except for the situation when you want to compare ALGLIB results with "reference" implementation up to the last significant digit. -- ALGLIB PROJECT -- Copyright 10.12.2009 by Bochkanov Sergey *************************************************************************/ private static void estimateerrors(double[,] f1, double[] f0, double[] y, double[] w, double[] x, double[] s, int n, int k, lsfitreport rep, ref double[,] z, int zkind) { int i = 0; int j = 0; int j1 = 0; double v = 0; double noisec = 0; int info = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int nzcnt = 0; double avg = 0; double rss = 0; double tss = 0; double sz = 0; double ss = 0; int i_ = 0; s = (double[])s.Clone(); // // Compute NZCnt - count of non-zero weights // nzcnt = 0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { nzcnt = nzcnt+1; } } // // Compute R2 // if( nzcnt>0 ) { avg = 0.0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { avg = avg+y[i]; } } avg = avg/nzcnt; rss = 0.0; tss = 0.0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { v = 0.0; for(i_=0; i_<=k-1;i_++) { v += f1[i,i_]*x[i_]; } v = v+f0[i]; rss = rss+math.sqr(v-y[i]); tss = tss+math.sqr(y[i]-avg); } } if( (double)(tss)!=(double)(0) ) { rep.r2 = Math.Max(1.0-rss/tss, 0.0); } else { rep.r2 = 1.0; } } else { rep.r2 = 0; } // // Compute estimate of proportionality between noise in the data and weights: // NoiseC = mean(per-point-noise*per-point-weight) // Noise level (standard deviation) at each point is equal to NoiseC/W[I]. // if( nzcnt>k ) { noisec = 0.0; for(i=0; i<=n-1; i++) { if( (double)(w[i])!=(double)(0) ) { v = 0.0; for(i_=0; i_<=k-1;i_++) { v += f1[i,i_]*x[i_]; } v = v+f0[i]; noisec = noisec+math.sqr((v-y[i])*w[i]); } } noisec = Math.Sqrt(noisec/(nzcnt-k)); } else { noisec = 0.0; } // // Two branches on noise level: // * NoiseC>0 normal situation // * NoiseC=0 degenerate case CovPar is filled by zeros // apserv.rmatrixsetlengthatleast(ref rep.covpar, k, k); if( (double)(noisec)>(double)(0) ) { // // Normal situation: non-zero noise level // alglib.ap.assert(zkind==0 || zkind==1, "LSFit: internal error in EstimateErrors() function"); if( zkind==0 ) { // // Z contains no additional information which can be used to speed up // calculations. We have to calculate covariance matrix on our own: // * Compute scaled Jacobian N*J, where N[i,i]=WCur[I]/NoiseC, store in Z // * Compute Z'*Z, store in CovPar // * Apply moderate regularization to CovPar and compute matrix inverse. // In case inverse failed, increase regularization parameter and try // again. // apserv.rmatrixsetlengthatleast(ref z, n, k); for(i=0; i<=n-1; i++) { v = w[i]/noisec; for(i_=0; i_<=k-1;i_++) { z[i,i_] = v*f1[i,i_]; } } // // Convert S to automatically scaled damped matrix: // * calculate SZ - sum of diagonal elements of Z'*Z // * calculate SS - sum of diagonal elements of S^(-2) // * overwrite S by (SZ/SS)*S^(-2) // * now S has approximately same magnitude as giagonal of Z'*Z // sz = 0; for(i=0; i<=n-1; i++) { for(j=0; j<=k-1; j++) { sz = sz+z[i,j]*z[i,j]; } } if( (double)(sz)==(double)(0) ) { sz = 1; } ss = 0; for(j=0; j<=k-1; j++) { ss = ss+1/math.sqr(s[j]); } for(j=0; j<=k-1; j++) { s[j] = sz/ss/math.sqr(s[j]); } // // Calculate damped inverse inv(Z'*Z+S). // We increase damping factor V until Z'*Z become well-conditioned. // v = 1.0E3*math.machineepsilon; do { ablas.rmatrixsyrk(k, n, 1.0, z, 0, 0, 2, 0.0, rep.covpar, 0, 0, true); for(i=0; i<=k-1; i++) { rep.covpar[i,i] = rep.covpar[i,i]+v*s[i]; } matinv.spdmatrixinverse(ref rep.covpar, k, true, ref info, invrep); v = 10*v; } while( info<=0 ); for(i=0; i<=k-1; i++) { for(j=i+1; j<=k-1; j++) { rep.covpar[j,i] = rep.covpar[i,j]; } } } if( zkind==1 ) { // // We can reuse additional information: // * Z contains R matrix from QR decomposition of W*F1 // * After multiplication by 1/NoiseC we get Z_mod = N*F1, where diag(N)=w[i]/NoiseC // * Such triangular Z_mod is a Cholesky factor from decomposition of J'*N'*N*J. // Thus, we can calculate covariance matrix as inverse of the matrix given by // its Cholesky decomposition. It allow us to avoid time-consuming calculation // of J'*N'*N*J in CovPar - complexity is reduced from O(N*K^2) to O(K^3), which // is quite good because K is usually orders of magnitude smaller than N. // // First, convert S to automatically scaled damped matrix: // * calculate SZ - sum of magnitudes of diagonal elements of Z/NoiseC // * calculate SS - sum of diagonal elements of S^(-1) // * overwrite S by (SZ/SS)*S^(-1) // * now S has approximately same magnitude as giagonal of Z'*Z // sz = 0; for(j=0; j<=k-1; j++) { sz = sz+Math.Abs(z[j,j]/noisec); } if( (double)(sz)==(double)(0) ) { sz = 1; } ss = 0; for(j=0; j<=k-1; j++) { ss = ss+1/s[j]; } for(j=0; j<=k-1; j++) { s[j] = sz/ss/s[j]; } // // Calculate damped inverse of inv((Z+v*S)'*(Z+v*S)) // We increase damping factor V until matrix become well-conditioned. // v = 1.0E3*math.machineepsilon; do { for(i=0; i<=k-1; i++) { for(j=i; j<=k-1; j++) { rep.covpar[i,j] = z[i,j]/noisec; } rep.covpar[i,i] = rep.covpar[i,i]+v*s[i]; } matinv.spdmatrixcholeskyinverse(ref rep.covpar, k, true, ref info, invrep); v = 10*v; } while( info<=0 ); for(i=0; i<=k-1; i++) { for(j=i+1; j<=k-1; j++) { rep.covpar[j,i] = rep.covpar[i,j]; } } } } else { // // Degenerate situation: zero noise level, covariance matrix is zero. // for(i=0; i<=k-1; i++) { for(j=0; j<=k-1; j++) { rep.covpar[j,i] = 0; } } } // // Estimate erorrs in parameters, curve and per-point noise // apserv.rvectorsetlengthatleast(ref rep.errpar, k); apserv.rvectorsetlengthatleast(ref rep.errcurve, n); apserv.rvectorsetlengthatleast(ref rep.noise, n); for(i=0; i<=k-1; i++) { rep.errpar[i] = Math.Sqrt(rep.covpar[i,i]); } for(i=0; i<=n-1; i++) { // // ErrCurve[I] is sqrt(P[i,i]) where P=J*CovPar*J' // v = 0.0; for(j=0; j<=k-1; j++) { for(j1=0; j1<=k-1; j1++) { v = v+f1[i,j]*rep.covpar[j,j1]*f1[i,j1]; } } rep.errcurve[i] = Math.Sqrt(v); // // Noise[i] is filled using weights and current estimate of noise level // if( (double)(w[i])!=(double)(0) ) { rep.noise[i] = noisec/w[i]; } else { rep.noise[i] = 0; } } }
public override void init() { s = new double[0]; bndl = new double[0]; bndu = new double[0]; taskx = new double[0,0]; tasky = new double[0]; taskw = new double[0]; x = new double[0]; c = new double[0]; g = new double[0]; h = new double[0,0]; wcur = new double[0]; tmp = new double[0]; tmpf = new double[0]; tmpjac = new double[0,0]; tmpjacw = new double[0,0]; invrep = new matinv.matinvreport(); rep = new lsfitreport(); optstate = new minlm.minlmstate(); optrep = new minlm.minlmreport(); rstate = new rcommstate(); }
/************************************************************************* * Neural network training using modified Levenberg-Marquardt with exact * Hessian calculation and regularization. Subroutine trains neural network * with restarts from random positions. Algorithm is well suited for small * and medium scale problems (hundreds of weights). * * INPUT PARAMETERS: * Network - neural network with initialized geometry * XY - training set * NPoints - training set size * Decay - weight decay constant, >=0.001 * Decay term 'Decay*||Weights||^2' is added to error * function. * If you don't know what Decay to choose, use 0.001. * Restarts - number of restarts from random position, >0. * If you don't know what Restarts to choose, use 2. * * OUTPUT PARAMETERS: * Network - trained neural network. * Info - return code: * -9, if internal matrix inverse subroutine failed * -2, if there is a point with class number * outside of [0..NOut-1]. * -1, if wrong parameters specified * (NPoints<0, Restarts<1). * 2, if task has been solved. * Rep - training report * * -- ALGLIB -- * Copyright 10.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mlptrainlm(ref mlpbase.multilayerperceptron network, ref double[,] xy, int npoints, double decay, int restarts, ref int info, ref mlpreport rep) { int nin = 0; int nout = 0; int wcount = 0; double lmftol = 0; double lmsteptol = 0; int i = 0; int k = 0; double v = 0; double e = 0; double enew = 0; double xnorm2 = 0; double stepnorm = 0; double[] g = new double[0]; double[] d = new double[0]; double[,] h = new double[0, 0]; double[,] hmod = new double[0, 0]; double[,] z = new double[0, 0]; bool spd = new bool(); double nu = 0; double lambda = 0; double lambdaup = 0; double lambdadown = 0; minlbfgs.minlbfgsreport internalrep = new minlbfgs.minlbfgsreport(); minlbfgs.minlbfgsstate state = new minlbfgs.minlbfgsstate(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double[] wdir = new double[0]; double[] wt = new double[0]; double[] wx = new double[0]; int pass = 0; double[] wbest = new double[0]; double ebest = 0; int invinfo = 0; matinv.matinvreport invrep = new matinv.matinvreport(); int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; mlpbase.mlpproperties(ref network, ref nin, ref nout, ref wcount); lambdaup = 10; lambdadown = 0.3; lmftol = 0.001; lmsteptol = 0.001; // // Test for inputs // if (npoints <= 0 | restarts < 1) { info = -1; return; } if (mlpbase.mlpissoftmax(ref network)) { for (i = 0; i <= npoints - 1; i++) { if ((int)Math.Round(xy[i, nin]) < 0 | (int)Math.Round(xy[i, nin]) >= nout) { info = -2; return; } } } decay = Math.Max(decay, mindecay); info = 2; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; rep.ncholesky = 0; // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpinitpreprocessor(ref network, ref xy, npoints); g = new double[wcount - 1 + 1]; h = new double[wcount - 1 + 1, wcount - 1 + 1]; hmod = new double[wcount - 1 + 1, wcount - 1 + 1]; wbase = new double[wcount - 1 + 1]; wdir = new double[wcount - 1 + 1]; wbest = new double[wcount - 1 + 1]; wt = new double[wcount - 1 + 1]; wx = new double[wcount - 1 + 1]; ebest = AP.Math.MaxRealNumber; // // Multiple passes // for (pass = 1; pass <= restarts; pass++) { // // Initialize weights // mlpbase.mlprandomize(ref network); // // First stage of the hybrid algorithm: LBFGS // for (i_ = 0; i_ <= wcount - 1; i_++) { wbase[i_] = network.weights[i_]; } minlbfgs.minlbfgscreate(wcount, Math.Min(wcount, 5), ref wbase, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, Math.Max(25, wcount)); while (minlbfgs.minlbfgsiteration(ref state)) { // // gradient // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = state.x[i_]; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref state.g); // // weight decay // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } state.f = state.f + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + decay * network.weights[i_]; } // // next iteration // rep.ngrad = rep.ngrad + 1; } minlbfgs.minlbfgsresults(ref state, ref wbase, ref internalrep); for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = wbase[i_]; } // // Second stage of the hybrid algorithm: LM // // Initialize H with identity matrix, // G with gradient, // E with regularized error. // mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = e + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { g[i_] = g[i_] + decay * network.weights[i_]; } for (k = 0; k <= wcount - 1; k++) { h[k, k] = h[k, k] + decay; } rep.nhess = rep.nhess + 1; lambda = 0.001; nu = 2; while (true) { // // 1. HMod = H+lambda*I // 2. Try to solve (H+Lambda*I)*dx = -g. // Increase lambda if left part is not positive definite. // for (i = 0; i <= wcount - 1; i++) { for (i_ = 0; i_ <= wcount - 1; i_++) { hmod[i, i_] = h[i, i_]; } hmod[i, i] = hmod[i, i] + lambda; } spd = trfac.spdmatrixcholesky(ref hmod, wcount, true); rep.ncholesky = rep.ncholesky + 1; if (!spd) { lambda = lambda * lambdaup * nu; nu = nu * 2; continue; } densesolver.spdmatrixcholeskysolve(ref hmod, wcount, true, ref g, ref solverinfo, ref solverrep, ref wdir); if (solverinfo < 0) { lambda = lambda * lambdaup * nu; nu = nu * 2; continue; } for (i_ = 0; i_ <= wcount - 1; i_++) { wdir[i_] = -1 * wdir[i_]; } // // Lambda found. // 1. Save old w in WBase // 1. Test some stopping criterions // 2. If error(w+wdir)>error(w), increase lambda // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = network.weights[i_] + wdir[i_]; } xnorm2 = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { xnorm2 += network.weights[i_] * network.weights[i_]; } stepnorm = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { stepnorm += wdir[i_] * wdir[i_]; } stepnorm = Math.Sqrt(stepnorm); enew = mlpbase.mlperror(ref network, ref xy, npoints) + 0.5 * decay * xnorm2; if ((double)(stepnorm) < (double)(lmsteptol * (1 + Math.Sqrt(xnorm2)))) { break; } if ((double)(enew) > (double)(e)) { lambda = lambda * lambdaup * nu; nu = nu * 2; continue; } // // Optimize using inv(cholesky(H)) as preconditioner // matinv.rmatrixtrinverse(ref hmod, wcount, true, false, ref invinfo, ref invrep); if (invinfo <= 0) { // // if matrix can't be inverted then exit with errors // TODO: make WCount steps in direction suggested by HMod // info = -9; return; } for (i_ = 0; i_ <= wcount - 1; i_++) { wbase[i_] = network.weights[i_]; } for (i = 0; i <= wcount - 1; i++) { wt[i] = 0; } minlbfgs.minlbfgscreatex(wcount, wcount, ref wt, 1, ref state); minlbfgs.minlbfgssetcond(ref state, 0, 0, 0, 5); while (minlbfgs.minlbfgsiteration(ref state)) { // // gradient // for (i = 0; i <= wcount - 1; i++) { v = 0.0; for (i_ = i; i_ <= wcount - 1; i_++) { v += state.x[i_] * hmod[i, i_]; } network.weights[i] = wbase[i] + v; } mlpbase.mlpgradbatch(ref network, ref xy, npoints, ref state.f, ref g); for (i = 0; i <= wcount - 1; i++) { state.g[i] = 0; } for (i = 0; i <= wcount - 1; i++) { v = g[i]; for (i_ = i; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + v * hmod[i, i_]; } } // // weight decay // grad(x'*x) = A'*(x0+A*t) // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } state.f = state.f + 0.5 * decay * v; for (i = 0; i <= wcount - 1; i++) { v = decay * network.weights[i]; for (i_ = i; i_ <= wcount - 1; i_++) { state.g[i_] = state.g[i_] + v * hmod[i, i_]; } } // // next iteration // rep.ngrad = rep.ngrad + 1; } minlbfgs.minlbfgsresults(ref state, ref wt, ref internalrep); // // Accept new position. // Calculate Hessian // for (i = 0; i <= wcount - 1; i++) { v = 0.0; for (i_ = i; i_ <= wcount - 1; i_++) { v += wt[i_] * hmod[i, i_]; } network.weights[i] = wbase[i] + v; } mlpbase.mlphessianbatch(ref network, ref xy, npoints, ref e, ref g, ref h); v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = e + 0.5 * decay * v; for (i_ = 0; i_ <= wcount - 1; i_++) { g[i_] = g[i_] + decay * network.weights[i_]; } for (k = 0; k <= wcount - 1; k++) { h[k, k] = h[k, k] + decay; } rep.nhess = rep.nhess + 1; // // Update lambda // lambda = lambda * lambdadown; nu = 2; } // // update WBest // v = 0.0; for (i_ = 0; i_ <= wcount - 1; i_++) { v += network.weights[i_] * network.weights[i_]; } e = 0.5 * decay * v + mlpbase.mlperror(ref network, ref xy, npoints); if ((double)(e) < (double)(ebest)) { ebest = e; for (i_ = 0; i_ <= wcount - 1; i_++) { wbest[i_] = network.weights[i_]; } } } // // copy WBest to output // for (i_ = 0; i_ <= wcount - 1; i_++) { network.weights[i_] = wbest[i_]; } }
/************************************************************************* * Algorithm for reduction of the following generalized symmetric positive- * definite eigenvalue problem: * A*x = lambda*B*x (1) or * A*B*x = lambda*x (2) or * B*A*x = lambda*x (3) * to the symmetric eigenvalues problem C*y = lambda*y (eigenvalues of this and * the given problems are the same, and the eigenvectors of the given problem * could be obtained by multiplying the obtained eigenvectors by the * transformation matrix x = R*y). * * Here A is a symmetric matrix, B - symmetric positive-definite matrix. * * Input parameters: * A - symmetric matrix which is given by its upper or lower * triangular part. * Array whose indexes range within [0..N-1, 0..N-1]. * N - size of matrices A and B. * IsUpperA - storage format of matrix A. * B - symmetric positive-definite matrix which is given by * its upper or lower triangular part. * Array whose indexes range within [0..N-1, 0..N-1]. * IsUpperB - storage format of matrix B. * ProblemType - if ProblemType is equal to: * 1, the following problem is solved: A*x = lambda*B*x; * 2, the following problem is solved: A*B*x = lambda*x; * 3, the following problem is solved: B*A*x = lambda*x. * * Output parameters: * A - symmetric matrix which is given by its upper or lower * triangle depending on IsUpperA. Contains matrix C. * Array whose indexes range within [0..N-1, 0..N-1]. * R - upper triangular or low triangular transformation matrix * which is used to obtain the eigenvectors of a given problem * as the product of eigenvectors of C (from the right) and * matrix R (from the left). If the matrix is upper * triangular, the elements below the main diagonal * are equal to 0 (and vice versa). Thus, we can perform * the multiplication without taking into account the * internal structure (which is an easier though less * effective way). * Array whose indexes range within [0..N-1, 0..N-1]. * IsUpperR - type of matrix R (upper or lower triangular). * * Result: * True, if the problem was reduced successfully. * False, if the error occurred during the Cholesky decomposition of * matrix B (the matrix is not positive-definite). * * -- ALGLIB -- * Copyright 1.28.2006 by Bochkanov Sergey *************************************************************************/ public static bool smatrixgevdreduce(ref double[,] a, int n, bool isuppera, ref double[,] b, bool isupperb, int problemtype, ref double[,] r, ref bool isupperr) { bool result = new bool(); double[,] t = new double[0, 0]; double[] w1 = new double[0]; double[] w2 = new double[0]; double[] w3 = new double[0]; int i = 0; int j = 0; double v = 0; matinv.matinvreport rep = new matinv.matinvreport(); int info = 0; int i_ = 0; int i1_ = 0; System.Diagnostics.Debug.Assert(n > 0, "SMatrixGEVDReduce: N<=0!"); System.Diagnostics.Debug.Assert(problemtype == 1 | problemtype == 2 | problemtype == 3, "SMatrixGEVDReduce: incorrect ProblemType!"); result = true; // // Problem 1: A*x = lambda*B*x // // Reducing to: // C*y = lambda*y // C = L^(-1) * A * L^(-T) // x = L^(-T) * y // if (problemtype == 1) { // // Factorize B in T: B = LL' // t = new double[n - 1 + 1, n - 1 + 1]; if (isupperb) { for (i = 0; i <= n - 1; i++) { for (i_ = i; i_ <= n - 1; i_++) { t[i_, i] = b[i, i_]; } } } else { for (i = 0; i <= n - 1; i++) { for (i_ = 0; i_ <= i; i_++) { t[i, i_] = b[i, i_]; } } } if (!trfac.spdmatrixcholesky(ref t, n, false)) { result = false; return(result); } // // Invert L in T // matinv.rmatrixtrinverse(ref t, n, false, false, ref info, ref rep); if (info <= 0) { result = false; return(result); } // // Build L^(-1) * A * L^(-T) in R // w1 = new double[n + 1]; w2 = new double[n + 1]; r = new double[n - 1 + 1, n - 1 + 1]; for (j = 1; j <= n; j++) { // // Form w2 = A * l'(j) (here l'(j) is j-th column of L^(-T)) // i1_ = (0) - (1); for (i_ = 1; i_ <= j; i_++) { w1[i_] = t[j - 1, i_ + i1_]; } sblas.symmetricmatrixvectormultiply(ref a, isuppera, 0, j - 1, ref w1, 1.0, ref w2); if (isuppera) { blas.matrixvectormultiply(ref a, 0, j - 1, j, n - 1, true, ref w1, 1, j, 1.0, ref w2, j + 1, n, 0.0); } else { blas.matrixvectormultiply(ref a, j, n - 1, 0, j - 1, false, ref w1, 1, j, 1.0, ref w2, j + 1, n, 0.0); } // // Form l(i)*w2 (here l(i) is i-th row of L^(-1)) // for (i = 1; i <= n; i++) { i1_ = (1) - (0); v = 0.0; for (i_ = 0; i_ <= i - 1; i_++) { v += t[i - 1, i_] * w2[i_ + i1_]; } r[i - 1, j - 1] = v; } } // // Copy R to A // for (i = 0; i <= n - 1; i++) { for (i_ = 0; i_ <= n - 1; i_++) { a[i, i_] = r[i, i_]; } } // // Copy L^(-1) from T to R and transpose // isupperr = true; for (i = 0; i <= n - 1; i++) { for (j = 0; j <= i - 1; j++) { r[i, j] = 0; } } for (i = 0; i <= n - 1; i++) { for (i_ = i; i_ <= n - 1; i_++) { r[i, i_] = t[i_, i]; } } return(result); } // // Problem 2: A*B*x = lambda*x // or // problem 3: B*A*x = lambda*x // // Reducing to: // C*y = lambda*y // C = U * A * U' // B = U'* U // if (problemtype == 2 | problemtype == 3) { // // Factorize B in T: B = U'*U // t = new double[n - 1 + 1, n - 1 + 1]; if (isupperb) { for (i = 0; i <= n - 1; i++) { for (i_ = i; i_ <= n - 1; i_++) { t[i, i_] = b[i, i_]; } } } else { for (i = 0; i <= n - 1; i++) { for (i_ = i; i_ <= n - 1; i_++) { t[i, i_] = b[i_, i]; } } } if (!trfac.spdmatrixcholesky(ref t, n, true)) { result = false; return(result); } // // Build U * A * U' in R // w1 = new double[n + 1]; w2 = new double[n + 1]; w3 = new double[n + 1]; r = new double[n - 1 + 1, n - 1 + 1]; for (j = 1; j <= n; j++) { // // Form w2 = A * u'(j) (here u'(j) is j-th column of U') // i1_ = (j - 1) - (1); for (i_ = 1; i_ <= n - j + 1; i_++) { w1[i_] = t[j - 1, i_ + i1_]; } sblas.symmetricmatrixvectormultiply(ref a, isuppera, j - 1, n - 1, ref w1, 1.0, ref w3); i1_ = (1) - (j); for (i_ = j; i_ <= n; i_++) { w2[i_] = w3[i_ + i1_]; } i1_ = (j - 1) - (j); for (i_ = j; i_ <= n; i_++) { w1[i_] = t[j - 1, i_ + i1_]; } if (isuppera) { blas.matrixvectormultiply(ref a, 0, j - 2, j - 1, n - 1, false, ref w1, j, n, 1.0, ref w2, 1, j - 1, 0.0); } else { blas.matrixvectormultiply(ref a, j - 1, n - 1, 0, j - 2, true, ref w1, j, n, 1.0, ref w2, 1, j - 1, 0.0); } // // Form u(i)*w2 (here u(i) is i-th row of U) // for (i = 1; i <= n; i++) { i1_ = (i) - (i - 1); v = 0.0; for (i_ = i - 1; i_ <= n - 1; i_++) { v += t[i - 1, i_] * w2[i_ + i1_]; } r[i - 1, j - 1] = v; } } // // Copy R to A // for (i = 0; i <= n - 1; i++) { for (i_ = 0; i_ <= n - 1; i_++) { a[i, i_] = r[i, i_]; } } if (problemtype == 2) { // // Invert U in T // matinv.rmatrixtrinverse(ref t, n, true, false, ref info, ref rep); if (info <= 0) { result = false; return(result); } // // Copy U^-1 from T to R // isupperr = true; for (i = 0; i <= n - 1; i++) { for (j = 0; j <= i - 1; j++) { r[i, j] = 0; } } for (i = 0; i <= n - 1; i++) { for (i_ = i; i_ <= n - 1; i_++) { r[i, i_] = t[i, i_]; } } } else { // // Copy U from T to R and transpose // isupperr = false; for (i = 0; i <= n - 1; i++) { for (j = i + 1; j <= n - 1; j++) { r[i, j] = 0; } } for (i = 0; i <= n - 1; i++) { for (i_ = i; i_ <= n - 1; i_++) { r[i_, i] = t[i, i_]; } } } } return(result); }