public hqrndstate(hqrnd.hqrndstate obj)
 {
     _innerobj = obj;
 }
Ejemplo n.º 2
0
        /*************************************************************************
        Builds one decision tree (internal recursive subroutine)

        Parameters:
            TreeBuf     -   large enough array, at least TreeSize
            IdxBuf      -   at least NPoints elements
            TmpBufR     -   at least NPoints
            TmpBufR2    -   at least NPoints
            TmpBufI     -   at least NPoints
            TmpBufI2    -   at least NPoints+1
        *************************************************************************/
        private static void dfbuildtreerec(double[,] xy,
            int npoints,
            int nvars,
            int nclasses,
            int nfeatures,
            int nvarsinpool,
            int flags,
            ref int numprocessed,
            int idx1,
            int idx2,
            dfinternalbuffers bufs,
            hqrnd.hqrndstate rs)
        {
            int i = 0;
            int j = 0;
            int k = 0;
            bool bflag = new bool();
            int i1 = 0;
            int i2 = 0;
            int info = 0;
            double sl = 0;
            double sr = 0;
            double w = 0;
            int idxbest = 0;
            double ebest = 0;
            double tbest = 0;
            int varcur = 0;
            double s = 0;
            double v = 0;
            double v1 = 0;
            double v2 = 0;
            double threshold = 0;
            int oldnp = 0;
            double currms = 0;
            bool useevs = new bool();

            
            //
            // these initializers are not really necessary,
            // but without them compiler complains about uninitialized locals
            //
            tbest = 0;
            
            //
            // Prepare
            //
            alglib.ap.assert(npoints>0);
            alglib.ap.assert(idx2>=idx1);
            useevs = flags/dfuseevs%2!=0;
            
            //
            // Leaf node
            //
            if( idx2==idx1 )
            {
                bufs.treebuf[numprocessed] = -1;
                bufs.treebuf[numprocessed+1] = xy[bufs.idxbuf[idx1],nvars];
                numprocessed = numprocessed+leafnodewidth;
                return;
            }
            
            //
            // Non-leaf node.
            // Select random variable, prepare split:
            // 1. prepare default solution - no splitting, class at random
            // 2. investigate possible splits, compare with default/best
            //
            idxbest = -1;
            if( nclasses>1 )
            {
                
                //
                // default solution for classification
                //
                for(i=0; i<=nclasses-1; i++)
                {
                    bufs.classibuf[i] = 0;
                }
                s = idx2-idx1+1;
                for(i=idx1; i<=idx2; i++)
                {
                    j = (int)Math.Round(xy[bufs.idxbuf[i],nvars]);
                    bufs.classibuf[j] = bufs.classibuf[j]+1;
                }
                ebest = 0;
                for(i=0; i<=nclasses-1; i++)
                {
                    ebest = ebest+bufs.classibuf[i]*math.sqr(1-bufs.classibuf[i]/s)+(s-bufs.classibuf[i])*math.sqr(bufs.classibuf[i]/s);
                }
                ebest = Math.Sqrt(ebest/(nclasses*(idx2-idx1+1)));
            }
            else
            {
                
                //
                // default solution for regression
                //
                v = 0;
                for(i=idx1; i<=idx2; i++)
                {
                    v = v+xy[bufs.idxbuf[i],nvars];
                }
                v = v/(idx2-idx1+1);
                ebest = 0;
                for(i=idx1; i<=idx2; i++)
                {
                    ebest = ebest+math.sqr(xy[bufs.idxbuf[i],nvars]-v);
                }
                ebest = Math.Sqrt(ebest/(idx2-idx1+1));
            }
            i = 0;
            while( i<=Math.Min(nfeatures, nvarsinpool)-1 )
            {
                
                //
                // select variables from pool
                //
                j = i+hqrnd.hqrnduniformi(rs, nvarsinpool-i);
                k = bufs.varpool[i];
                bufs.varpool[i] = bufs.varpool[j];
                bufs.varpool[j] = k;
                varcur = bufs.varpool[i];
                
                //
                // load variable values to working array
                //
                // apply EVS preprocessing: if all variable values are same,
                // variable is excluded from pool.
                //
                // This is necessary for binary pre-splits (see later) to work.
                //
                for(j=idx1; j<=idx2; j++)
                {
                    bufs.tmpbufr[j-idx1] = xy[bufs.idxbuf[j],varcur];
                }
                if( useevs )
                {
                    bflag = false;
                    v = bufs.tmpbufr[0];
                    for(j=0; j<=idx2-idx1; j++)
                    {
                        if( (double)(bufs.tmpbufr[j])!=(double)(v) )
                        {
                            bflag = true;
                            break;
                        }
                    }
                    if( !bflag )
                    {
                        
                        //
                        // exclude variable from pool,
                        // go to the next iteration.
                        // I is not increased.
                        //
                        k = bufs.varpool[i];
                        bufs.varpool[i] = bufs.varpool[nvarsinpool-1];
                        bufs.varpool[nvarsinpool-1] = k;
                        nvarsinpool = nvarsinpool-1;
                        continue;
                    }
                }
                
                //
                // load labels to working array
                //
                if( nclasses>1 )
                {
                    for(j=idx1; j<=idx2; j++)
                    {
                        bufs.tmpbufi[j-idx1] = (int)Math.Round(xy[bufs.idxbuf[j],nvars]);
                    }
                }
                else
                {
                    for(j=idx1; j<=idx2; j++)
                    {
                        bufs.tmpbufr2[j-idx1] = xy[bufs.idxbuf[j],nvars];
                    }
                }
                
                //
                // calculate split
                //
                if( useevs && bufs.evsbin[varcur] )
                {
                    
                    //
                    // Pre-calculated splits for binary variables.
                    // Threshold is already known, just calculate RMS error
                    //
                    threshold = bufs.evssplits[varcur];
                    if( nclasses>1 )
                    {
                        
                        //
                        // classification-specific code
                        //
                        for(j=0; j<=2*nclasses-1; j++)
                        {
                            bufs.classibuf[j] = 0;
                        }
                        sl = 0;
                        sr = 0;
                        for(j=0; j<=idx2-idx1; j++)
                        {
                            k = bufs.tmpbufi[j];
                            if( (double)(bufs.tmpbufr[j])<(double)(threshold) )
                            {
                                bufs.classibuf[k] = bufs.classibuf[k]+1;
                                sl = sl+1;
                            }
                            else
                            {
                                bufs.classibuf[k+nclasses] = bufs.classibuf[k+nclasses]+1;
                                sr = sr+1;
                            }
                        }
                        alglib.ap.assert((double)(sl)!=(double)(0) && (double)(sr)!=(double)(0), "DFBuildTreeRec: something strange!");
                        currms = 0;
                        for(j=0; j<=nclasses-1; j++)
                        {
                            w = bufs.classibuf[j];
                            currms = currms+w*math.sqr(w/sl-1);
                            currms = currms+(sl-w)*math.sqr(w/sl);
                            w = bufs.classibuf[nclasses+j];
                            currms = currms+w*math.sqr(w/sr-1);
                            currms = currms+(sr-w)*math.sqr(w/sr);
                        }
                        currms = Math.Sqrt(currms/(nclasses*(idx2-idx1+1)));
                    }
                    else
                    {
                        
                        //
                        // regression-specific code
                        //
                        sl = 0;
                        sr = 0;
                        v1 = 0;
                        v2 = 0;
                        for(j=0; j<=idx2-idx1; j++)
                        {
                            if( (double)(bufs.tmpbufr[j])<(double)(threshold) )
                            {
                                v1 = v1+bufs.tmpbufr2[j];
                                sl = sl+1;
                            }
                            else
                            {
                                v2 = v2+bufs.tmpbufr2[j];
                                sr = sr+1;
                            }
                        }
                        alglib.ap.assert((double)(sl)!=(double)(0) && (double)(sr)!=(double)(0), "DFBuildTreeRec: something strange!");
                        v1 = v1/sl;
                        v2 = v2/sr;
                        currms = 0;
                        for(j=0; j<=idx2-idx1; j++)
                        {
                            if( (double)(bufs.tmpbufr[j])<(double)(threshold) )
                            {
                                currms = currms+math.sqr(v1-bufs.tmpbufr2[j]);
                            }
                            else
                            {
                                currms = currms+math.sqr(v2-bufs.tmpbufr2[j]);
                            }
                        }
                        currms = Math.Sqrt(currms/(idx2-idx1+1));
                    }
                    info = 1;
                }
                else
                {
                    
                    //
                    // Generic splits
                    //
                    if( nclasses>1 )
                    {
                        dfsplitc(ref bufs.tmpbufr, ref bufs.tmpbufi, ref bufs.classibuf, idx2-idx1+1, nclasses, dfusestrongsplits, ref info, ref threshold, ref currms, ref bufs.sortrbuf, ref bufs.sortibuf);
                    }
                    else
                    {
                        dfsplitr(ref bufs.tmpbufr, ref bufs.tmpbufr2, idx2-idx1+1, dfusestrongsplits, ref info, ref threshold, ref currms, ref bufs.sortrbuf, ref bufs.sortrbuf2);
                    }
                }
                if( info>0 )
                {
                    if( (double)(currms)<=(double)(ebest) )
                    {
                        ebest = currms;
                        idxbest = varcur;
                        tbest = threshold;
                    }
                }
                
                //
                // Next iteration
                //
                i = i+1;
            }
            
            //
            // to split or not to split
            //
            if( idxbest<0 )
            {
                
                //
                // All values are same, cannot split.
                //
                bufs.treebuf[numprocessed] = -1;
                if( nclasses>1 )
                {
                    
                    //
                    // Select random class label (randomness allows us to
                    // approximate distribution of the classes)
                    //
                    bufs.treebuf[numprocessed+1] = (int)Math.Round(xy[bufs.idxbuf[idx1+hqrnd.hqrnduniformi(rs, idx2-idx1+1)],nvars]);
                }
                else
                {
                    
                    //
                    // Select average (for regression task).
                    //
                    v = 0;
                    for(i=idx1; i<=idx2; i++)
                    {
                        v = v+xy[bufs.idxbuf[i],nvars]/(idx2-idx1+1);
                    }
                    bufs.treebuf[numprocessed+1] = v;
                }
                numprocessed = numprocessed+leafnodewidth;
            }
            else
            {
                
                //
                // we can split
                //
                bufs.treebuf[numprocessed] = idxbest;
                bufs.treebuf[numprocessed+1] = tbest;
                i1 = idx1;
                i2 = idx2;
                while( i1<=i2 )
                {
                    
                    //
                    // Reorder indices so that left partition is in [Idx1..I1-1],
                    // and right partition is in [I2+1..Idx2]
                    //
                    if( (double)(xy[bufs.idxbuf[i1],idxbest])<(double)(tbest) )
                    {
                        i1 = i1+1;
                        continue;
                    }
                    if( (double)(xy[bufs.idxbuf[i2],idxbest])>=(double)(tbest) )
                    {
                        i2 = i2-1;
                        continue;
                    }
                    j = bufs.idxbuf[i1];
                    bufs.idxbuf[i1] = bufs.idxbuf[i2];
                    bufs.idxbuf[i2] = j;
                    i1 = i1+1;
                    i2 = i2-1;
                }
                oldnp = numprocessed;
                numprocessed = numprocessed+innernodewidth;
                dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, idx1, i1-1, bufs, rs);
                bufs.treebuf[oldnp+2] = numprocessed;
                dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, i2+1, idx2, bufs, rs);
            }
        }
Ejemplo n.º 3
0
 /*************************************************************************
 Unsets HQRNDState structure
 *************************************************************************/
 private static void unsetstate(hqrnd.hqrndstate state)
 {
     state.s1 = 0;
     state.s2 = 0;
     state.v = 0;
     state.magicv = 0;
 }
Ejemplo n.º 4
0
        /*************************************************************************
        Builds one decision tree. Just a wrapper for the DFBuildTreeRec.
        *************************************************************************/
        private static void dfbuildtree(double[,] xy,
            int npoints,
            int nvars,
            int nclasses,
            int nfeatures,
            int nvarsinpool,
            int flags,
            dfinternalbuffers bufs,
            hqrnd.hqrndstate rs)
        {
            int numprocessed = 0;
            int i = 0;

            alglib.ap.assert(npoints>0);
            
            //
            // Prepare IdxBuf. It stores indices of the training set elements.
            // When training set is being split, contents of IdxBuf is
            // correspondingly reordered so we can know which elements belong
            // to which branch of decision tree.
            //
            for(i=0; i<=npoints-1; i++)
            {
                bufs.idxbuf[i] = i;
            }
            
            //
            // Recursive procedure
            //
            numprocessed = 1;
            dfbuildtreerec(xy, npoints, nvars, nclasses, nfeatures, nvarsinpool, flags, ref numprocessed, 0, npoints-1, bufs, rs);
            bufs.treebuf[0] = numprocessed;
        }