/************************************************************************* Relative classification error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: percent of incorrectly classified cases. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlrelclserror(logitmodel lm, double[,] xy, int npoints) { double result = 0; result = (double)mnlclserror(lm, xy, npoints)/(double)npoints; return result; }
/************************************************************************* Average relative error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: average relative error (error when estimating posterior probabilities). -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlavgrelerror(logitmodel lm, double[,] xy, int ssize) { double result = 0; double relcls = 0; double avgce = 0; double rms = 0; double avg = 0; double avgrel = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==logitvnum, "MNLRMSError: Incorrect MNL version!"); mnlallerrors(lm, xy, ssize, ref relcls, ref avgce, ref rms, ref avg, ref avgrel); result = avgrel; return result; }
/************************************************************************* Copying of LogitModel strucure INPUT PARAMETERS: LM1 - original OUTPUT PARAMETERS: LM2 - copy -- ALGLIB -- Copyright 15.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mnlcopy(logitmodel lm1, logitmodel lm2) { int k = 0; int i_ = 0; k = (int)Math.Round(lm1.w[0]); lm2.w = new double[k-1+1]; for(i_=0; i_<=k-1;i_++) { lm2.w[i_] = lm1.w[i_]; } }
/************************************************************************* Average cross-entropy (in bits per element) on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: CrossEntropy/(NPoints*ln(2)). -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlavgce(logitmodel lm, double[,] xy, int npoints) { double result = 0; int nvars = 0; int nclasses = 0; int i = 0; double[] workx = new double[0]; double[] worky = new double[0]; int i_ = 0; alglib.ap.assert((double)(lm.w[1])==(double)(logitvnum), "MNLClsError: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); workx = new double[nvars-1+1]; worky = new double[nclasses-1+1]; result = 0; for(i=0; i<=npoints-1; i++) { alglib.ap.assert((int)Math.Round(xy[i,nvars])>=0 && (int)Math.Round(xy[i,nvars])<nclasses, "MNLAvgCE: incorrect class number!"); // // Process // for(i_=0; i_<=nvars-1;i_++) { workx[i_] = xy[i,i_]; } mnlprocess(lm, workx, ref worky); if( (double)(worky[(int)Math.Round(xy[i,nvars])])>(double)(0) ) { result = result-Math.Log(worky[(int)Math.Round(xy[i,nvars])]); } else { result = result-Math.Log(math.minrealnumber); } } result = result/(npoints*Math.Log(2)); return result; }
/************************************************************************* Unpacks coefficients of logit model. Logit model have form: P(class=i) = S(i) / (S(0) + S(1) + ... +S(M-1)) S(i) = Exp(A[i,0]*X[0] + ... + A[i,N-1]*X[N-1] + A[i,N]), when i<M-1 S(M-1) = 1 INPUT PARAMETERS: LM - logit model in ALGLIB format OUTPUT PARAMETERS: V - coefficients, array[0..NClasses-2,0..NVars] NVars - number of independent variables NClasses - number of classes -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnlunpack(logitmodel lm, ref double[,] a, ref int nvars, ref int nclasses) { int offs = 0; int i = 0; int i_ = 0; int i1_ = 0; a = new double[0,0]; nvars = 0; nclasses = 0; alglib.ap.assert((double)(lm.w[1])==(double)(logitvnum), "MNLUnpack: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); offs = (int)Math.Round(lm.w[4]); a = new double[nclasses-2+1, nvars+1]; for(i=0; i<=nclasses-2; i++) { i1_ = (offs+i*(nvars+1)) - (0); for(i_=0; i_<=nvars;i_++) { a[i,i_] = lm.w[i_+i1_]; } } }
/************************************************************************* "Packs" coefficients and creates logit model in ALGLIB format (MNLUnpack reversed). INPUT PARAMETERS: A - model (see MNLUnpack) NVars - number of independent variables NClasses - number of classes OUTPUT PARAMETERS: LM - logit model. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnlpack(double[,] a, int nvars, int nclasses, logitmodel lm) { int offs = 0; int i = 0; int wdim = 0; int ssize = 0; int i_ = 0; int i1_ = 0; wdim = (nvars+1)*(nclasses-1); offs = 5; ssize = 5+(nvars+1)*(nclasses-1)+nclasses; lm.w = new double[ssize-1+1]; lm.w[0] = ssize; lm.w[1] = logitvnum; lm.w[2] = nvars; lm.w[3] = nclasses; lm.w[4] = offs; for(i=0; i<=nclasses-2; i++) { i1_ = (0) - (offs+i*(nvars+1)); for(i_=offs+i*(nvars+1); i_<=offs+i*(nvars+1)+nvars;i_++) { lm.w[i_] = a[i,i_+i1_]; } } }
/************************************************************************* Procesing INPUT PARAMETERS: LM - logit model, passed by non-constant reference (some fields of structure are used as temporaries when calculating model output). X - input vector, array[0..NVars-1]. Y - (possibly) preallocated buffer; if size of Y is less than NClasses, it will be reallocated.If it is large enough, it is NOT reallocated, so we can save some time on reallocation. OUTPUT PARAMETERS: Y - result, array[0..NClasses-1] Vector of posterior probabilities for classification task. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnlprocess(logitmodel lm, double[] x, ref double[] y) { int nvars = 0; int nclasses = 0; int offs = 0; int i = 0; int i1 = 0; double s = 0; alglib.ap.assert((double)(lm.w[1])==(double)(logitvnum), "MNLProcess: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); offs = (int)Math.Round(lm.w[4]); mnliexp(ref lm.w, x); s = 0; i1 = offs+(nvars+1)*(nclasses-1); for(i=i1; i<=i1+nclasses-1; i++) { s = s+lm.w[i]; } if( alglib.ap.len(y)<nclasses ) { y = new double[nclasses]; } for(i=0; i<=nclasses-1; i++) { y[i] = lm.w[i1+i]/s; } }
/************************************************************************* Serialization of LogitModel strucure INPUT PARAMETERS: LM - original OUTPUT PARAMETERS: RA - array of real numbers which stores model, array[0..RLen-1] RLen - RA lenght -- ALGLIB -- Copyright 15.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mnlserialize(ref logitmodel lm, ref double[] ra, ref int rlen) { int i_ = 0; int i1_ = 0; rlen = (int)Math.Round(lm.w[0])+1; ra = new double[rlen-1+1]; ra[0] = logitvnum; i1_ = (0) - (1); for(i_=1; i_<=rlen-1;i_++) { ra[i_] = lm.w[i_+i1_]; } }
public override alglib.apobject make_copy() { logitmodel _result = new logitmodel(); _result.w = (double[])w.Clone(); return _result; }
/************************************************************************* This subroutine trains logit model. INPUT PARAMETERS: XY - training set, array[0..NPoints-1,0..NVars] First NVars columns store values of independent variables, next column stores number of class (from 0 to NClasses-1) which dataset element belongs to. Fractional values are rounded to nearest integer. NPoints - training set size, NPoints>=1 NVars - number of independent variables, NVars>=1 NClasses - number of classes, NClasses>=2 OUTPUT PARAMETERS: Info - return code: * -2, if there is a point with class number outside of [0..NClasses-1]. * -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1, NClasses<2). * 1, if task has been solved LM - model built Rep - training report -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnltrainh(double[,] xy, int npoints, int nvars, int nclasses, ref int info, logitmodel lm, mnlreport rep) { int i = 0; int j = 0; int k = 0; int ssize = 0; bool allsame = new bool(); int offs = 0; double threshold = 0; double wminstep = 0; double decay = 0; int wdim = 0; int expoffs = 0; double v = 0; double s = 0; mlpbase.multilayerperceptron network = new mlpbase.multilayerperceptron(); int nin = 0; int nout = 0; int wcount = 0; double e = 0; double[] g = new double[0]; double[,] h = new double[0,0]; bool spd = new bool(); double[] x = new double[0]; double[] y = new double[0]; double[] wbase = new double[0]; double wstep = 0; double[] wdir = new double[0]; double[] work = new double[0]; int mcstage = 0; logitmcstate mcstate = new logitmcstate(); int mcinfo = 0; int mcnfev = 0; int solverinfo = 0; densesolver.densesolverreport solverrep = new densesolver.densesolverreport(); int i_ = 0; int i1_ = 0; info = 0; threshold = 1000*math.machineepsilon; wminstep = 0.001; decay = 0.001; // // Test for inputs // if( (npoints<nvars+2 || nvars<1) || nclasses<2 ) { info = -1; return; } for(i=0; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])<0 || (int)Math.Round(xy[i,nvars])>=nclasses ) { info = -2; return; } } info = 1; // // Initialize data // rep.ngrad = 0; rep.nhess = 0; // // Allocate array // wdim = (nvars+1)*(nclasses-1); offs = 5; expoffs = offs+wdim; ssize = 5+(nvars+1)*(nclasses-1)+nclasses; lm.w = new double[ssize-1+1]; lm.w[0] = ssize; lm.w[1] = logitvnum; lm.w[2] = nvars; lm.w[3] = nclasses; lm.w[4] = offs; // // Degenerate case: all outputs are equal // allsame = true; for(i=1; i<=npoints-1; i++) { if( (int)Math.Round(xy[i,nvars])!=(int)Math.Round(xy[i-1,nvars]) ) { allsame = false; } } if( allsame ) { for(i=0; i<=(nvars+1)*(nclasses-1)-1; i++) { lm.w[offs+i] = 0; } v = -(2*Math.Log(math.minrealnumber)); k = (int)Math.Round(xy[0,nvars]); if( k==nclasses-1 ) { for(i=0; i<=nclasses-2; i++) { lm.w[offs+i*(nvars+1)+nvars] = -v; } } else { for(i=0; i<=nclasses-2; i++) { if( i==k ) { lm.w[offs+i*(nvars+1)+nvars] = v; } else { lm.w[offs+i*(nvars+1)+nvars] = 0; } } } return; } // // General case. // Prepare task and network. Allocate space. // mlpbase.mlpcreatec0(nvars, nclasses, network); mlpbase.mlpinitpreprocessor(network, xy, npoints); mlpbase.mlpproperties(network, ref nin, ref nout, ref wcount); for(i=0; i<=wcount-1; i++) { network.weights[i] = (2*math.randomreal()-1)/nvars; } g = new double[wcount-1+1]; h = new double[wcount-1+1, wcount-1+1]; wbase = new double[wcount-1+1]; wdir = new double[wcount-1+1]; work = new double[wcount-1+1]; // // First stage: optimize in gradient direction. // for(k=0; k<=wcount/3+10; k++) { // // Calculate gradient in starting point // mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; // // Setup optimization scheme // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); } } // // Second stage: use Hessian when we are close to the minimum // while( true ) { // // Calculate and update E/G/H // mlpbase.mlphessiannbatch(network, xy, npoints, ref e, ref g, ref h); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } for(k=0; k<=wcount-1; k++) { h[k,k] = h[k,k]+decay; } rep.nhess = rep.nhess+1; // // Select step direction // NOTE: it is important to use lower-triangle Cholesky // factorization since it is much faster than higher-triangle version. // spd = trfac.spdmatrixcholesky(ref h, wcount, false); densesolver.spdmatrixcholeskysolve(h, wcount, false, g, ref solverinfo, solverrep, ref wdir); spd = solverinfo>0; if( spd ) { // // H is positive definite. // Step in Newton direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -1*wdir[i_]; } spd = true; } else { // // H is indefinite. // Step in gradient direction. // for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = -g[i_]; } spd = false; } // // Optimize in WDir direction // v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += wdir[i_]*wdir[i_]; } wstep = Math.Sqrt(v); v = 1/Math.Sqrt(v); for(i_=0; i_<=wcount-1;i_++) { wdir[i_] = v*wdir[i_]; } mcstage = 0; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); while( mcstage!=0 ) { mlpbase.mlpgradnbatch(network, xy, npoints, ref e, ref g); v = 0.0; for(i_=0; i_<=wcount-1;i_++) { v += network.weights[i_]*network.weights[i_]; } e = e+0.5*decay*v; for(i_=0; i_<=wcount-1;i_++) { g[i_] = g[i_] + decay*network.weights[i_]; } rep.ngrad = rep.ngrad+1; mnlmcsrch(wcount, ref network.weights, ref e, ref g, wdir, ref wstep, ref mcinfo, ref mcnfev, ref work, mcstate, ref mcstage); } if( spd && ((mcinfo==2 || mcinfo==4) || mcinfo==6) ) { break; } } // // Convert from NN format to MNL format // i1_ = (0) - (offs); for(i_=offs; i_<=offs+wcount-1;i_++) { lm.w[i_] = network.weights[i_+i1_]; } for(k=0; k<=nvars-1; k++) { for(i=0; i<=nclasses-2; i++) { s = network.columnsigmas[k]; if( (double)(s)==(double)(0) ) { s = 1; } j = offs+(nvars+1)*i; v = lm.w[j+k]; lm.w[j+k] = v/s; lm.w[j+nvars] = lm.w[j+nvars]+v*network.columnmeans[k]/s; } } for(k=0; k<=nclasses-2; k++) { lm.w[offs+(nvars+1)*k+nvars] = -lm.w[offs+(nvars+1)*k+nvars]; } }
/************************************************************************* Average relative error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: average relative error (error when estimating posterior probabilities). -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlavgrelerror(ref logitmodel lm, ref double[,] xy, int ssize) { double result = 0; double relcls = 0; double avgce = 0; double rms = 0; double avg = 0; double avgrel = 0; System.Diagnostics.Debug.Assert((int)Math.Round(lm.w[1])==logitvnum, "MNLRMSError: Incorrect MNL version!"); mnlallerrors(ref lm, ref xy, ssize, ref relcls, ref avgce, ref rms, ref avg, ref avgrel); result = avgrel; return result; }
/************************************************************************* Relative classification error on the test set INPUT PARAMETERS: LM - logit model XY - test set NPoints - test set size RESULT: percent of incorrectly classified cases. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static double mnlrelclserror(ref logitmodel lm, ref double[,] xy, int npoints) { double result = 0; result = (double)(mnlclserror(ref lm, ref xy, npoints))/(double)(npoints); return result; }
/************************************************************************* Unserialization of LogitModel strucure INPUT PARAMETERS: RA - real array which stores model OUTPUT PARAMETERS: LM - restored model -- ALGLIB -- Copyright 15.03.2009 by Bochkanov Sergey *************************************************************************/ public static void mnlunserialize(ref double[] ra, ref logitmodel lm) { int i_ = 0; int i1_ = 0; System.Diagnostics.Debug.Assert((int)Math.Round(ra[0])==logitvnum, "MNLUnserialize: incorrect array!"); lm.w = new double[(int)Math.Round(ra[1])-1+1]; i1_ = (1) - (0); for(i_=0; i_<=(int)Math.Round(ra[1])-1;i_++) { lm.w[i_] = ra[i_+i1_]; } }
/************************************************************************* Classification error on test set = MNLRelClsError*NPoints -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static int mnlclserror(logitmodel lm, double[,] xy, int npoints) { int result = 0; int nvars = 0; int nclasses = 0; int i = 0; int j = 0; double[] workx = new double[0]; double[] worky = new double[0]; int nmax = 0; int i_ = 0; alglib.ap.assert((double)(lm.w[1])==(double)(logitvnum), "MNLClsError: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); workx = new double[nvars-1+1]; worky = new double[nclasses-1+1]; result = 0; for(i=0; i<=npoints-1; i++) { // // Process // for(i_=0; i_<=nvars-1;i_++) { workx[i_] = xy[i,i_]; } mnlprocess(lm, workx, ref worky); // // Logit version of the answer // nmax = 0; for(j=0; j<=nclasses-1; j++) { if( (double)(worky[j])>(double)(worky[nmax]) ) { nmax = j; } } // // compare // if( nmax!=(int)Math.Round(xy[i,nvars]) ) { result = result+1; } } return result; }
/************************************************************************* 'interactive' variant of MNLProcess for languages like Python which support constructs like "Y = MNLProcess(LM,X)" and interactive mode of the interpreter This function allocates new array on each call, so it is significantly slower than its 'non-interactive' counterpart, but it is more convenient when you call it from command line. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnlprocessi(logitmodel lm, double[] x, ref double[] y) { y = new double[0]; mnlprocess(lm, x, ref y); }
/************************************************************************* Calculation of all types of errors -- ALGLIB -- Copyright 30.08.2008 by Bochkanov Sergey *************************************************************************/ private static void mnlallerrors(logitmodel lm, double[,] xy, int npoints, ref double relcls, ref double avgce, ref double rms, ref double avg, ref double avgrel) { int nvars = 0; int nclasses = 0; int i = 0; double[] buf = new double[0]; double[] workx = new double[0]; double[] y = new double[0]; double[] dy = new double[0]; int i_ = 0; relcls = 0; avgce = 0; rms = 0; avg = 0; avgrel = 0; alglib.ap.assert((int)Math.Round(lm.w[1])==logitvnum, "MNL unit: Incorrect MNL version!"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); workx = new double[nvars-1+1]; y = new double[nclasses-1+1]; dy = new double[0+1]; bdss.dserrallocate(nclasses, ref buf); for(i=0; i<=npoints-1; i++) { for(i_=0; i_<=nvars-1;i_++) { workx[i_] = xy[i,i_]; } mnlprocess(lm, workx, ref y); dy[0] = xy[i,nvars]; bdss.dserraccumulate(ref buf, y, dy); } bdss.dserrfinish(ref buf); relcls = buf[0]; avgce = buf[1]; rms = buf[2]; avg = buf[3]; avgrel = buf[4]; }
/************************************************************************* Procesing INPUT PARAMETERS: LM - logit model, passed by non-constant reference (some fields of structure are used as temporaries when calculating model output). X - input vector, array[0..NVars-1]. OUTPUT PARAMETERS: Y - result, array[0..NClasses-1] Vector of posterior probabilities for classification task. Subroutine does not allocate memory for this vector, it is responsibility of a caller to allocate it. Array must be at least [0..NClasses-1]. -- ALGLIB -- Copyright 10.09.2008 by Bochkanov Sergey *************************************************************************/ public static void mnlprocess(ref logitmodel lm, ref double[] x, ref double[] y) { int nvars = 0; int nclasses = 0; int offs = 0; int i = 0; int i1 = 0; double s = 0; System.Diagnostics.Debug.Assert((double)(lm.w[1])==(double)(logitvnum), "MNLProcess: unexpected model version"); nvars = (int)Math.Round(lm.w[2]); nclasses = (int)Math.Round(lm.w[3]); offs = (int)Math.Round(lm.w[4]); mnliexp(ref lm.w, ref x); s = 0; i1 = offs+(nvars+1)*(nclasses-1); for(i=i1; i<=i1+nclasses-1; i++) { s = s+lm.w[i]; } for(i=0; i<=nclasses-1; i++) { y[i] = lm.w[i1+i]/s; } }