Esempio n. 1
0
 public override void init()
 {
     xy = new double[0,0];
     d = new double[0,0];
     tmpd = new double[0,0];
     distbuf = new apserv.apbuffers();
     kmeanstmp = new kmeansbuffers();
 }
Esempio n. 2
0
        /*************************************************************************
        K-means++ clusterization

        INPUT PARAMETERS:
            XY          -   dataset, array [0..NPoints-1,0..NVars-1].
            NPoints     -   dataset size, NPoints>=K
            NVars       -   number of variables, NVars>=1
            K           -   desired number of clusters, K>=1
            InitAlgo    -   initialization algorithm:
                            * 0 - automatic selection of best algorithm
                            * 1 - random selection of centers
                            * 2 - k-means++
                            * 3 - fast-greedy init
                            *-1 - first K rows of dataset are used
                                  (special debug algorithm)
            MaxIts      -   iterations limit or zero for no limit
            Restarts    -   number of restarts, Restarts>=1
            KMeansDbgNoIts- debug flag; if set, Lloyd's iteration is not performed,
                            only initialization phase.
            Buf         -   special reusable structure which stores previously allocated
                            memory, intended to avoid memory fragmentation when solving
                            multiple subsequent problems:
                            * MUST BE INITIALIZED WITH KMeansInitBuffers() CALL BEFORE
                              FIRST PASS TO THIS FUNCTION!
                            * subsequent passes must be made without re-initialization

        OUTPUT PARAMETERS:
            Info        -   return code:
                            * -3, if task is degenerate (number of distinct points is
                                  less than K)
                            * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
                            *  1, if subroutine finished successfully
            IterationsCount- actual number of iterations performed by clusterizer
            CCol        -   array[0..NVars-1,0..K-1].matrix whose columns store
                            cluster's centers
            NeedCCol    -   True in case caller requires to store result in CCol
            CRow        -   array[0..K-1,0..NVars-1], same as CCol, but centers are
                            stored in rows
            NeedCRow    -   True in case caller requires to store result in CCol
            XYC         -   array[NPoints], which contains cluster indexes
            Energy      -   merit function of clusterization

          -- ALGLIB --
             Copyright 21.03.2009 by Bochkanov Sergey
        *************************************************************************/
        public static void kmeansgenerateinternal(double[,] xy,
            int npoints,
            int nvars,
            int k,
            int initalgo,
            int maxits,
            int restarts,
            bool kmeansdbgnoits,
            ref int info,
            ref int iterationscount,
            ref double[,] ccol,
            bool needccol,
            ref double[,] crow,
            bool needcrow,
            ref int[] xyc,
            ref double energy,
            kmeansbuffers buf)
        {
            int i = 0;
            int j = 0;
            int i1 = 0;
            double e = 0;
            double eprev = 0;
            double v = 0;
            double vv = 0;
            bool waschanges = new bool();
            bool zerosizeclusters = new bool();
            int pass = 0;
            int itcnt = 0;
            hqrnd.hqrndstate rs = new hqrnd.hqrndstate();
            int i_ = 0;

            info = 0;
            iterationscount = 0;
            ccol = new double[0,0];
            crow = new double[0,0];
            xyc = new int[0];
            energy = 0;

            
            //
            // Test parameters
            //
            if( ((npoints<k || nvars<1) || k<1) || restarts<1 )
            {
                info = -1;
                iterationscount = 0;
                return;
            }
            
            //
            // TODO: special case K=1
            // TODO: special case K=NPoints
            //
            info = 1;
            iterationscount = 0;
            
            //
            // Multiple passes of k-means++ algorithm
            //
            xyc = new int[npoints];
            apserv.rmatrixsetlengthatleast(ref buf.ct, k, nvars);
            apserv.rmatrixsetlengthatleast(ref buf.ctbest, k, nvars);
            apserv.ivectorsetlengthatleast(ref buf.xycprev, npoints);
            apserv.ivectorsetlengthatleast(ref buf.xycbest, npoints);
            apserv.rvectorsetlengthatleast(ref buf.d2, npoints);
            apserv.ivectorsetlengthatleast(ref buf.csizes, k);
            energy = math.maxrealnumber;
            hqrnd.hqrndrandomize(rs);
            for(pass=1; pass<=restarts; pass++)
            {
                
                //
                // Select initial centers.
                //
                // Note that for performance reasons centers are stored in ROWS of CT, not
                // in columns. We'll transpose CT in the end and store it in the C.
                //
                // Also note that SelectInitialCenters() may return degenerate set of centers
                // (some of them have no corresponding points in dataset, some are non-distinct).
                // Algorithm below is robust enough to deal with such set.
                //
                selectinitialcenters(xy, npoints, nvars, initalgo, k, ref buf.ct, buf.initbuf, buf.updatepool);
                
                //
                // Lloyd's iteration
                //
                if( !kmeansdbgnoits )
                {
                    
                    //
                    // Perform iteration as usual, in normal mode
                    //
                    for(i=0; i<=npoints-1; i++)
                    {
                        xyc[i] = -1;
                    }
                    eprev = math.maxrealnumber;
                    e = math.maxrealnumber;
                    itcnt = 0;
                    while( maxits==0 || itcnt<maxits )
                    {
                        
                        //
                        // Update iteration counter
                        //
                        itcnt = itcnt+1;
                        apserv.inc(ref iterationscount);
                        
                        //
                        // Call KMeansUpdateDistances(), fill XYC with center numbers,
                        // D2 with center distances.
                        //
                        for(i=0; i<=npoints-1; i++)
                        {
                            buf.xycprev[i] = xyc[i];
                        }
                        kmeansupdatedistances(xy, 0, npoints, nvars, buf.ct, 0, k, xyc, buf.d2, buf.updatepool);
                        waschanges = false;
                        for(i=0; i<=npoints-1; i++)
                        {
                            waschanges = waschanges || xyc[i]!=buf.xycprev[i];
                        }
                        
                        //
                        // Update centers
                        //
                        for(j=0; j<=k-1; j++)
                        {
                            buf.csizes[j] = 0;
                        }
                        for(i=0; i<=k-1; i++)
                        {
                            for(j=0; j<=nvars-1; j++)
                            {
                                buf.ct[i,j] = 0;
                            }
                        }
                        for(i=0; i<=npoints-1; i++)
                        {
                            buf.csizes[xyc[i]] = buf.csizes[xyc[i]]+1;
                            for(i_=0; i_<=nvars-1;i_++)
                            {
                                buf.ct[xyc[i],i_] = buf.ct[xyc[i],i_] + xy[i,i_];
                            }
                        }
                        zerosizeclusters = false;
                        for(j=0; j<=k-1; j++)
                        {
                            if( buf.csizes[j]!=0 )
                            {
                                v = (double)1/(double)buf.csizes[j];
                                for(i_=0; i_<=nvars-1;i_++)
                                {
                                    buf.ct[j,i_] = v*buf.ct[j,i_];
                                }
                            }
                            zerosizeclusters = zerosizeclusters || buf.csizes[j]==0;
                        }
                        if( zerosizeclusters )
                        {
                            
                            //
                            // Some clusters have zero size - rare, but possible.
                            // We'll choose new centers for such clusters using k-means++ rule
                            // and restart algorithm
                            //
                            if( !fixcenters(xy, npoints, nvars, buf.ct, k, buf.initbuf, buf.updatepool) )
                            {
                                info = -3;
                                return;
                            }
                            continue;
                        }
                        
                        //
                        // Stop if one of two conditions is met:
                        // 1. nothing has changed during iteration
                        // 2. energy function increased after recalculation on new centers
                        //
                        e = 0;
                        for(i=0; i<=npoints-1; i++)
                        {
                            v = 0.0;
                            i1 = xyc[i];
                            for(j=0; j<=nvars-1; j++)
                            {
                                vv = xy[i,j]-buf.ct[i1,j];
                                v = v+vv*vv;
                            }
                            e = e+v;
                        }
                        if( !waschanges || (double)(e)>=(double)(eprev) )
                        {
                            break;
                        }
                        
                        //
                        // Update EPrev
                        //
                        eprev = e;
                    }
                }
                else
                {
                    
                    //
                    // Debug mode: no Lloyd's iteration.
                    // We just calculate potential E.
                    //
                    kmeansupdatedistances(xy, 0, npoints, nvars, buf.ct, 0, k, xyc, buf.d2, buf.updatepool);
                    e = 0;
                    for(i=0; i<=npoints-1; i++)
                    {
                        e = e+buf.d2[i];
                    }
                }
                
                //
                // Compare E with best centers found so far
                //
                if( (double)(e)<(double)(energy) )
                {
                    
                    //
                    // store partition.
                    //
                    energy = e;
                    blas.copymatrix(buf.ct, 0, k-1, 0, nvars-1, ref buf.ctbest, 0, k-1, 0, nvars-1);
                    for(i=0; i<=npoints-1; i++)
                    {
                        buf.xycbest[i] = xyc[i];
                    }
                }
            }
            
            //
            // Copy and transpose
            //
            if( needccol )
            {
                ccol = new double[nvars, k];
                blas.copyandtranspose(buf.ctbest, 0, k-1, 0, nvars-1, ref ccol, 0, nvars-1, 0, k-1);
            }
            if( needcrow )
            {
                crow = new double[k, nvars];
                ablas.rmatrixcopy(k, nvars, buf.ctbest, 0, 0, ref crow, 0, 0);
            }
            for(i=0; i<=npoints-1; i++)
            {
                xyc[i] = buf.xycbest[i];
            }
        }
Esempio n. 3
0
 public override alglib.apobject make_copy()
 {
     kmeansbuffers _result = new kmeansbuffers();
     _result.ct = (double[,])ct.Clone();
     _result.ctbest = (double[,])ctbest.Clone();
     _result.xycbest = (int[])xycbest.Clone();
     _result.xycprev = (int[])xycprev.Clone();
     _result.d2 = (double[])d2.Clone();
     _result.csizes = (int[])csizes.Clone();
     _result.initbuf = (apserv.apbuffers)initbuf.make_copy();
     _result.updatepool = (alglib.smp.shared_pool)updatepool.make_copy();
     return _result;
 }
Esempio n. 4
0
        /*************************************************************************
        K-means++ initialization

        INPUT PARAMETERS:
            Buf         -   special reusable structure which stores previously allocated
                            memory, intended to avoid memory fragmentation when solving
                            multiple subsequent problems. Must be initialized prior to
                            usage.

        OUTPUT PARAMETERS:
            Buf         -   initialized structure

          -- ALGLIB --
             Copyright 24.07.2015 by Bochkanov Sergey
        *************************************************************************/
        public static void kmeansinitbuf(kmeansbuffers buf)
        {
            apserv.apbuffers updateseed = new apserv.apbuffers();

            alglib.smp.ae_shared_pool_set_seed(buf.updatepool, updateseed);
        }