public hqrndstate(hqrnd.hqrndstate obj) { _innerobj = obj; }
/************************************************************************* Builds one decision tree (internal recursive subroutine) Parameters: TreeBuf - large enough array, at least TreeSize IdxBuf - at least NPoints elements TmpBufR - at least NPoints TmpBufR2 - at least NPoints TmpBufI - at least NPoints TmpBufI2 - at least NPoints+1 *************************************************************************/ private static void dfbuildtreerec(double[,] xy, int npoints, int nvars, int nclasses, int nfeatures, int nvarsinpool, int flags, ref int numprocessed, int idx1, int idx2, dfinternalbuffers bufs, hqrnd.hqrndstate rs) { int i = 0; int j = 0; int k = 0; bool bflag = new bool(); int i1 = 0; int i2 = 0; int info = 0; double sl = 0; double sr = 0; double w = 0; int idxbest = 0; double ebest = 0; double tbest = 0; int varcur = 0; double s = 0; double v = 0; double v1 = 0; double v2 = 0; double threshold = 0; int oldnp = 0; double currms = 0; bool useevs = new bool(); // // these initializers are not really necessary, // but without them compiler complains about uninitialized locals // tbest = 0; // // Prepare // alglib.ap.assert(npoints>0); alglib.ap.assert(idx2>=idx1); useevs = flags/dfuseevs%2!=0; // // Leaf node // if( idx2==idx1 ) { bufs.treebuf[numprocessed] = -1; bufs.treebuf[numprocessed+1] = xy[bufs.idxbuf[idx1],nvars]; numprocessed = numprocessed+leafnodewidth; return; } // // Non-leaf node. // Select random variable, prepare split: // 1. prepare default solution - no splitting, class at random // 2. investigate possible splits, compare with default/best // idxbest = -1; if( nclasses>1 ) { // // default solution for classification // for(i=0; i<=nclasses-1; i++) { bufs.classibuf[i] = 0; } s = idx2-idx1+1; for(i=idx1; i<=idx2; i++) { j = (int)Math.Round(xy[bufs.idxbuf[i],nvars]); bufs.classibuf[j] = bufs.classibuf[j]+1; } ebest = 0; for(i=0; i<=nclasses-1; i++) { ebest = ebest+bufs.classibuf[i]*math.sqr(1-bufs.classibuf[i]/s)+(s-bufs.classibuf[i])*math.sqr(bufs.classibuf[i]/s); } ebest = Math.Sqrt(ebest/(nclasses*(idx2-idx1+1))); } else { // // default solution for regression // v = 0; for(i=idx1; i<=idx2; i++) { v = v+xy[bufs.idxbuf[i],nvars]; } v = v/(idx2-idx1+1); ebest = 0; for(i=idx1; i<=idx2; i++) { ebest = ebest+math.sqr(xy[bufs.idxbuf[i],nvars]-v); } ebest = Math.Sqrt(ebest/(idx2-idx1+1)); } i = 0; while( i<=Math.Min(nfeatures, nvarsinpool)-1 ) { // // select variables from pool // j = i+hqrnd.hqrnduniformi(rs, nvarsinpool-i); k = bufs.varpool[i]; bufs.varpool[i] = bufs.varpool[j]; bufs.varpool[j] = k; varcur = bufs.varpool[i]; // // load variable values to working array // // apply EVS preprocessing: if all variable values are same, // variable is excluded from pool. // // This is necessary for binary pre-splits (see later) to work. // for(j=idx1; j<=idx2; j++) { bufs.tmpbufr[j-idx1] = xy[bufs.idxbuf[j],varcur]; } if( useevs ) { bflag = false; v = bufs.tmpbufr[0]; for(j=0; j<=idx2-idx1; j++) { if( (double)(bufs.tmpbufr[j])!=(double)(v) ) { bflag = true; break; } } if( !bflag ) { // // exclude variable from pool, // go to the next iteration. // I is not increased. // k = bufs.varpool[i]; bufs.varpool[i] = bufs.varpool[nvarsinpool-1]; bufs.varpool[nvarsinpool-1] = k; nvarsinpool = nvarsinpool-1; continue; } } // // load labels to working array // if( nclasses>1 ) { for(j=idx1; j<=idx2; j++) { bufs.tmpbufi[j-idx1] = (int)Math.Round(xy[bufs.idxbuf[j],nvars]); } } else { for(j=idx1; j<=idx2; j++) { bufs.tmpbufr2[j-idx1] = xy[bufs.idxbuf[j],nvars]; } } // // calculate split // if( useevs && bufs.evsbin[varcur] ) { // // Pre-calculated splits for binary variables. // Threshold is already known, just calculate RMS error // threshold = bufs.evssplits[varcur]; if( nclasses>1 ) { // // classification-specific code // for(j=0; j<=2*nclasses-1; j++) { bufs.classibuf[j] = 0; } sl = 0; sr = 0; for(j=0; j<=idx2-idx1; j++) { k = bufs.tmpbufi[j]; if( (double)(bufs.tmpbufr[j])<(double)(threshold) ) { bufs.classibuf[k] = bufs.classibuf[k]+1; sl = sl+1; } else { bufs.classibuf[k+nclasses] = bufs.classibuf[k+nclasses]+1; sr = sr+1; } } alglib.ap.assert((double)(sl)!=(double)(0) && (double)(sr)!=(double)(0), "DFBuildTreeRec: something strange!"); currms = 0; for(j=0; j<=nclasses-1; j++) { w = bufs.classibuf[j]; currms = currms+w*math.sqr(w/sl-1); currms = currms+(sl-w)*math.sqr(w/sl); w = bufs.classibuf[nclasses+j]; currms = currms+w*math.sqr(w/sr-1); currms = currms+(sr-w)*math.sqr(w/sr); } currms = Math.Sqrt(currms/(nclasses*(idx2-idx1+1))); } else { // // regression-specific code // sl = 0; sr = 0; v1 = 0; v2 = 0; for(j=0; j<=idx2-idx1; j++) { if( (double)(bufs.tmpbufr[j])<(double)(threshold) ) { v1 = v1+bufs.tmpbufr2[j]; sl = sl+1; } else { v2 = v2+bufs.tmpbufr2[j]; sr = sr+1; } } alglib.ap.assert((double)(sl)!=(double)(0) && (double)(sr)!=(double)(0), "DFBuildTreeRec: something strange!"); v1 = v1/sl; v2 = v2/sr; currms = 0; for(j=0; j<=idx2-idx1; j++) { if( (double)(bufs.tmpbufr[j])<(double)(threshold) ) { currms = currms+math.sqr(v1-bufs.tmpbufr2[j]); } else { currms = currms+math.sqr(v2-bufs.tmpbufr2[j]); } } currms = Math.Sqrt(currms/(idx2-idx1+1)); } info = 1; } else { // // Generic splits // if( nclasses>1 ) { dfsplitc(ref bufs.tmpbufr, ref bufs.tmpbufi, ref bufs.classibuf, idx2-idx1+1, nclasses, dfusestrongsplits, ref info, ref threshold, ref currms, ref bufs.sortrbuf, ref bufs.sortibuf); } else { dfsplitr(ref bufs.tmpbufr, ref bufs.tmpbufr2, idx2-idx1+1, dfusestrongsplits, ref info, ref threshold, ref currms, ref bufs.sortrbuf, ref bufs.sortrbuf2); } } if( info>0 ) { if( (double)(currms)<=(double)(ebest) ) { ebest = currms; idxbest = varcur; tbest = threshold; } } // // Next iteration // i = i+1; } // // to split or not to split // if( idxbest<0 ) { // // All values are same, cannot split. // bufs.treebuf[numprocessed] = -1; if( nclasses>1 ) { // // Select random class label (randomness allows us to // approximate distribution of the classes) // bufs.treebuf[numprocessed+1] = (int)Math.Round(xy[bufs.idxbuf[idx1+hqrnd.hqrnduniformi(rs, idx2-idx1+1)],nvars]); } else { // // Select average (for regression task). // v = 0; for(i=idx1; i<=idx2; i++) { v = v+xy[bufs.idxbuf[i],nvars]/(idx2-idx1+1); } bufs.treebuf[numprocessed+1] = v; } numprocessed = numprocessed+leafnodewidth; } else { // // we can split // bufs.treebuf[numprocessed] = idxbest; bufs.treebuf[numprocessed+1] = tbest; i1 = idx1; i2 = idx2; while( i1<=i2 ) { // // Reorder indices so that left partition is in [Idx1..I1-1], // and right partition is in [I2+1..Idx2] // if( (double)(xy[bufs.idxbuf[i1],idxbest])<(double)(tbest) ) { i1 = i1+1; continue; } if( (double)(xy[bufs.idxbuf[i2],idxbest])>=(double)(tbest) ) { i2 = i2-1; continue; } j = bufs.idxbuf[i1]; bufs.idxbuf[i1] = bufs.idxbuf[i2]; bufs.idxbuf[i2] = j; i1 = i1+1; i2 = i2-1; } oldnp = numprocessed; numprocessed = numprocessed+innernodewidth; dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, idx1, i1-1, bufs, rs); bufs.treebuf[oldnp+2] = numprocessed; dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, i2+1, idx2, bufs, rs); } }
/************************************************************************* Unsets HQRNDState structure *************************************************************************/ private static void unsetstate(hqrnd.hqrndstate state) { state.s1 = 0; state.s2 = 0; state.v = 0; state.magicv = 0; }
/************************************************************************* Builds one decision tree. Just a wrapper for the DFBuildTreeRec. *************************************************************************/ private static void dfbuildtree(double[,] xy, int npoints, int nvars, int nclasses, int nfeatures, int nvarsinpool, int flags, dfinternalbuffers bufs, hqrnd.hqrndstate rs) { int numprocessed = 0; int i = 0; alglib.ap.assert(npoints>0); // // Prepare IdxBuf. It stores indices of the training set elements. // When training set is being split, contents of IdxBuf is // correspondingly reordered so we can know which elements belong // to which branch of decision tree. // for(i=0; i<=npoints-1; i++) { bufs.idxbuf[i] = i; } // // Recursive procedure // numprocessed = 1; dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, 0, npoints-1, bufs, rs); bufs.treebuf[0] = numprocessed; }