Example #1
0
 /*************************************************************************
 Single-threaded stub. HPC ALGLIB replaces it by multithreaded code.
 *************************************************************************/
 public static void _pexec_clusterizerrunahc(clusterizerstate s,
     ahcreport rep)
 {
     clusterizerrunahc(s,rep);
 }
Example #2
0
        /*************************************************************************
        This function takes as input clusterization report Rep,  desired  clusters
        count K, and builds top K clusters from hierarchical clusterization  tree.
        It returns assignment of points to clusters (array of cluster indexes).

        INPUT PARAMETERS:
            Rep     -   report from ClusterizerRunAHC() performed on XY
            K       -   desired number of clusters, 1<=K<=NPoints.
                        K can be zero only when NPoints=0.

        OUTPUT PARAMETERS:
            CIdx    -   array[NPoints], I-th element contains cluster index  (from
                        0 to K-1) for I-th point of the dataset.
            CZ      -   array[K]. This array allows  to  convert  cluster  indexes
                        returned by this function to indexes used by  Rep.Z.  J-th
                        cluster returned by this function corresponds to  CZ[J]-th
                        cluster stored in Rep.Z/PZ/PM.
                        It is guaranteed that CZ[I]<CZ[I+1].

        NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
              Although  they  were  obtained  by  manipulation with top K nodes of
              dendrogram  (i.e.  hierarchical  decomposition  of  dataset),   this
              function does not return information about hierarchy.  Each  of  the
              clusters stand on its own.
              
        NOTE: Cluster indexes returned by this function  does  not  correspond  to
              indexes returned in Rep.Z/PZ/PM. Either you work  with  hierarchical
              representation of the dataset (dendrogram), or you work with  "flat"
              representation returned by this function.  Each  of  representations
              has its own clusters indexing system (former uses [0, 2*NPoints-2]),
              while latter uses [0..K-1]), although  it  is  possible  to  perform
              conversion from one system to another by means of CZ array, returned
              by this function, which allows you to convert indexes stored in CIdx
              to the numeration system used by Rep.Z.
              
        NOTE: this subroutine is optimized for moderate values of K. Say, for  K=5
              it will perform many times faster than  for  K=100.  Its  worst-case
              performance is O(N*K), although in average case  it  perform  better
              (up to O(N*log(K))).

          -- ALGLIB --
             Copyright 10.07.2012 by Bochkanov Sergey
        *************************************************************************/
        public static void clusterizergetkclusters(ahcreport rep,
            int k,
            ref int[] cidx,
            ref int[] cz)
        {
            int i = 0;
            int mergeidx = 0;
            int i0 = 0;
            int i1 = 0;
            int t = 0;
            bool[] presentclusters = new bool[0];
            int[] clusterindexes = new int[0];
            int[] clustersizes = new int[0];
            int[] tmpidx = new int[0];
            int npoints = 0;

            cidx = new int[0];
            cz = new int[0];

            npoints = rep.npoints;
            alglib.ap.assert(npoints>=0, "ClusterizerGetKClusters: internal error in Rep integrity");
            alglib.ap.assert(k>=0, "ClusterizerGetKClusters: K<=0");
            alglib.ap.assert(k<=npoints, "ClusterizerGetKClusters: K>NPoints");
            alglib.ap.assert(k>0 || npoints==0, "ClusterizerGetKClusters: K<=0");
            alglib.ap.assert(npoints==rep.npoints, "ClusterizerGetKClusters: NPoints<>Rep.NPoints");
            
            //
            // Quick exit
            //
            if( npoints==0 )
            {
                return;
            }
            if( npoints==1 )
            {
                cz = new int[1];
                cidx = new int[1];
                cz[0] = 0;
                cidx[0] = 0;
                return;
            }
            
            //
            // Replay merges, from top to bottom,
            // keep track of clusters being present at the moment
            //
            presentclusters = new bool[2*npoints-1];
            tmpidx = new int[npoints];
            for(i=0; i<=2*npoints-3; i++)
            {
                presentclusters[i] = false;
            }
            presentclusters[2*npoints-2] = true;
            for(i=0; i<=npoints-1; i++)
            {
                tmpidx[i] = 2*npoints-2;
            }
            for(mergeidx=npoints-2; mergeidx>=npoints-k; mergeidx--)
            {
                
                //
                // Update information about clusters being present at the moment
                //
                presentclusters[npoints+mergeidx] = false;
                presentclusters[rep.z[mergeidx,0]] = true;
                presentclusters[rep.z[mergeidx,1]] = true;
                
                //
                // Update TmpIdx according to the current state of the dataset
                //
                // NOTE: TmpIdx contains cluster indexes from [0..2*NPoints-2];
                //       we will convert them to [0..K-1] later.
                //
                i0 = rep.pm[mergeidx,0];
                i1 = rep.pm[mergeidx,1];
                t = rep.z[mergeidx,0];
                for(i=i0; i<=i1; i++)
                {
                    tmpidx[i] = t;
                }
                i0 = rep.pm[mergeidx,2];
                i1 = rep.pm[mergeidx,3];
                t = rep.z[mergeidx,1];
                for(i=i0; i<=i1; i++)
                {
                    tmpidx[i] = t;
                }
            }
            
            //
            // Fill CZ - array which allows us to convert cluster indexes
            // from one system to another.
            //
            cz = new int[k];
            clusterindexes = new int[2*npoints-1];
            t = 0;
            for(i=0; i<=2*npoints-2; i++)
            {
                if( presentclusters[i] )
                {
                    cz[t] = i;
                    clusterindexes[i] = t;
                    t = t+1;
                }
            }
            alglib.ap.assert(t==k, "ClusterizerGetKClusters: internal error");
            
            //
            // Convert indexes stored in CIdx
            //
            cidx = new int[npoints];
            for(i=0; i<=npoints-1; i++)
            {
                cidx[i] = clusterindexes[tmpidx[rep.p[i]]];
            }
        }
Example #3
0
        /*************************************************************************
        This function performs agglomerative hierarchical clustering

        COMMERCIAL EDITION OF ALGLIB:

          ! Commercial version of ALGLIB includes two  important  improvements  of
          ! this function, which can be used from C++ and C#:
          ! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
          ! * multicore support
          !
          ! Agglomerative  hierarchical  clustering  algorithm  has  two   phases:
          ! distance matrix calculation  and  clustering  itself. Only first phase
          ! (distance matrix calculation) is accelerated by Intel MKL  and  multi-
          ! threading. Thus, acceleration is significant only for  medium or high-
          ! dimensional problems.
          !
          ! We recommend you to read 'Working with commercial version' section  of
          ! ALGLIB Reference Manual in order to find out how to  use  performance-
          ! related features provided by commercial edition of ALGLIB.

        INPUT PARAMETERS:
            S       -   clusterizer state, initialized by ClusterizerCreate()

        OUTPUT PARAMETERS:
            Rep     -   clustering results; see description of AHCReport
                        structure for more information.

        NOTE 1: hierarchical clustering algorithms require large amounts of memory.
                In particular, this implementation needs  sizeof(double)*NPoints^2
                bytes, which are used to store distance matrix. In  case  we  work
                with user-supplied matrix, this amount is multiplied by 2 (we have
                to store original matrix and to work with its copy).
                
                For example, problem with 10000 points  would require 800M of RAM,
                even when working in a 1-dimensional space.

          -- ALGLIB --
             Copyright 10.07.2012 by Bochkanov Sergey
        *************************************************************************/
        public static void clusterizerrunahc(clusterizerstate s,
            ahcreport rep)
        {
            int npoints = 0;
            int nfeatures = 0;

            npoints = s.npoints;
            nfeatures = s.nfeatures;
            
            //
            // Fill Rep.NPoints, quick exit when NPoints<=1
            //
            rep.npoints = npoints;
            if( npoints==0 )
            {
                rep.p = new int[0];
                rep.z = new int[0, 0];
                rep.pz = new int[0, 0];
                rep.pm = new int[0, 0];
                rep.mergedist = new double[0];
                rep.terminationtype = 1;
                return;
            }
            if( npoints==1 )
            {
                rep.p = new int[1];
                rep.z = new int[0, 0];
                rep.pz = new int[0, 0];
                rep.pm = new int[0, 0];
                rep.mergedist = new double[0];
                rep.p[0] = 0;
                rep.terminationtype = 1;
                return;
            }
            
            //
            // More than one point
            //
            if( s.disttype==-1 )
            {
                
                //
                // Run clusterizer with user-supplied distance matrix
                //
                clusterizerrunahcinternal(s, ref s.d, rep);
                return;
            }
            else
            {
                
                //
                // Check combination of AHC algo and distance type
                //
                if( s.ahcalgo==4 && s.disttype!=2 )
                {
                    rep.terminationtype = -5;
                    return;
                }
                
                //
                // Build distance matrix D.
                //
                clusterizergetdistancesbuf(s.distbuf, s.xy, npoints, nfeatures, s.disttype, ref s.tmpd);
                
                //
                // Run clusterizer
                //
                clusterizerrunahcinternal(s, ref s.tmpd, rep);
                return;
            }
        }
Example #4
0
 public override alglib.apobject make_copy()
 {
     ahcreport _result = new ahcreport();
     _result.terminationtype = terminationtype;
     _result.npoints = npoints;
     _result.p = (int[])p.Clone();
     _result.z = (int[,])z.Clone();
     _result.pz = (int[,])pz.Clone();
     _result.pm = (int[,])pm.Clone();
     _result.mergedist = (double[])mergedist.Clone();
     return _result;
 }
Example #5
0
        /*************************************************************************
        This  function  performs  agglomerative  hierarchical  clustering    using
        precomputed  distance  matrix.  Internal  function,  should  not be called
        directly.

        INPUT PARAMETERS:
            S       -   clusterizer state, initialized by ClusterizerCreate()
            D       -   distance matrix, array[S.NFeatures,S.NFeatures]
                        Contents of the matrix is destroyed during
                        algorithm operation.

        OUTPUT PARAMETERS:
            Rep     -   clustering results; see description of AHCReport
                        structure for more information.

          -- ALGLIB --
             Copyright 10.07.2012 by Bochkanov Sergey
        *************************************************************************/
        private static void clusterizerrunahcinternal(clusterizerstate s,
            ref double[,] d,
            ahcreport rep)
        {
            int i = 0;
            int j = 0;
            int k = 0;
            double v = 0;
            int mergeidx = 0;
            int c0 = 0;
            int c1 = 0;
            int s0 = 0;
            int s1 = 0;
            int ar = 0;
            int br = 0;
            int npoints = 0;
            int[] cidx = new int[0];
            int[] csizes = new int[0];
            int[] nnidx = new int[0];
            int[,] cinfo = new int[0,0];
            int n0 = 0;
            int n1 = 0;
            int ni = 0;
            double d01 = 0;

            npoints = s.npoints;
            
            //
            // Fill Rep.NPoints, quick exit when NPoints<=1
            //
            rep.npoints = npoints;
            if( npoints==0 )
            {
                rep.p = new int[0];
                rep.z = new int[0, 0];
                rep.pz = new int[0, 0];
                rep.pm = new int[0, 0];
                rep.mergedist = new double[0];
                rep.terminationtype = 1;
                return;
            }
            if( npoints==1 )
            {
                rep.p = new int[1];
                rep.z = new int[0, 0];
                rep.pz = new int[0, 0];
                rep.pm = new int[0, 0];
                rep.mergedist = new double[0];
                rep.p[0] = 0;
                rep.terminationtype = 1;
                return;
            }
            rep.z = new int[npoints-1, 2];
            rep.mergedist = new double[npoints-1];
            rep.terminationtype = 1;
            
            //
            // Build list of nearest neighbors
            //
            nnidx = new int[npoints];
            for(i=0; i<=npoints-1; i++)
            {
                
                //
                // Calculate index of the nearest neighbor
                //
                k = -1;
                v = math.maxrealnumber;
                for(j=0; j<=npoints-1; j++)
                {
                    if( j!=i && (double)(d[i,j])<(double)(v) )
                    {
                        k = j;
                        v = d[i,j];
                    }
                }
                alglib.ap.assert((double)(v)<(double)(math.maxrealnumber), "ClusterizerRunAHC: internal error");
                nnidx[i] = k;
            }
            
            //
            // For AHCAlgo=4 (Ward's method) replace distances by their squares times 0.5
            //
            if( s.ahcalgo==4 )
            {
                for(i=0; i<=npoints-1; i++)
                {
                    for(j=0; j<=npoints-1; j++)
                    {
                        d[i,j] = 0.5*d[i,j]*d[i,j];
                    }
                }
            }
            
            //
            // Distance matrix is built, perform merges.
            //
            // NOTE 1: CIdx is array[NPoints] which maps rows/columns of the
            //         distance matrix D to indexes of clusters. Values of CIdx
            //         from [0,NPoints) denote single-point clusters, and values
            //         from [NPoints,2*NPoints-1) denote ones obtained by merging
            //         smaller clusters. Negative calues correspond to absent clusters.
            //
            //         Initially it contains [0...NPoints-1], after each merge
            //         one element of CIdx (one with index C0) is replaced by
            //         NPoints+MergeIdx, and another one with index C1 is
            //         rewritten by -1.
            // 
            // NOTE 2: CSizes is array[NPoints] which stores sizes of clusters.
            //         
            //
            cidx = new int[npoints];
            csizes = new int[npoints];
            for(i=0; i<=npoints-1; i++)
            {
                cidx[i] = i;
                csizes[i] = 1;
            }
            for(mergeidx=0; mergeidx<=npoints-2; mergeidx++)
            {
                
                //
                // Select pair of clusters (C0,C1) with CIdx[C0]<CIdx[C1] to merge.
                //
                c0 = -1;
                c1 = -1;
                d01 = math.maxrealnumber;
                for(i=0; i<=npoints-1; i++)
                {
                    if( cidx[i]>=0 )
                    {
                        if( (double)(d[i,nnidx[i]])<(double)(d01) )
                        {
                            c0 = i;
                            c1 = nnidx[i];
                            d01 = d[i,nnidx[i]];
                        }
                    }
                }
                alglib.ap.assert((double)(d01)<(double)(math.maxrealnumber), "ClusterizerRunAHC: internal error");
                if( cidx[c0]>cidx[c1] )
                {
                    i = c1;
                    c1 = c0;
                    c0 = i;
                }
                
                //
                // Fill one row of Rep.Z and one element of Rep.MergeDist
                //
                rep.z[mergeidx,0] = cidx[c0];
                rep.z[mergeidx,1] = cidx[c1];
                rep.mergedist[mergeidx] = d01;
                
                //
                // Update distance matrix:
                // * row/column C0 are updated by distances to the new cluster
                // * row/column C1 are considered empty (we can fill them by zeros,
                //   but do not want to spend time - we just ignore them)
                //
                // NOTE: it is important to update distance matrix BEFORE CIdx/CSizes
                //       are updated.
                //
                alglib.ap.assert((((s.ahcalgo==0 || s.ahcalgo==1) || s.ahcalgo==2) || s.ahcalgo==3) || s.ahcalgo==4, "ClusterizerRunAHC: internal error");
                for(i=0; i<=npoints-1; i++)
                {
                    if( i!=c0 && i!=c1 )
                    {
                        n0 = csizes[c0];
                        n1 = csizes[c1];
                        ni = csizes[i];
                        if( s.ahcalgo==0 )
                        {
                            d[i,c0] = Math.Max(d[i,c0], d[i,c1]);
                        }
                        if( s.ahcalgo==1 )
                        {
                            d[i,c0] = Math.Min(d[i,c0], d[i,c1]);
                        }
                        if( s.ahcalgo==2 )
                        {
                            d[i,c0] = (csizes[c0]*d[i,c0]+csizes[c1]*d[i,c1])/(csizes[c0]+csizes[c1]);
                        }
                        if( s.ahcalgo==3 )
                        {
                            d[i,c0] = (d[i,c0]+d[i,c1])/2;
                        }
                        if( s.ahcalgo==4 )
                        {
                            d[i,c0] = ((n0+ni)*d[i,c0]+(n1+ni)*d[i,c1]-ni*d01)/(n0+n1+ni);
                        }
                        d[c0,i] = d[i,c0];
                    }
                }
                
                //
                // Update CIdx and CSizes
                //
                cidx[c0] = npoints+mergeidx;
                cidx[c1] = -1;
                csizes[c0] = csizes[c0]+csizes[c1];
                csizes[c1] = 0;
                
                //
                // Update nearest neighbors array:
                // * update nearest neighbors of everything except for C0/C1
                // * update neighbors of C0/C1
                //
                for(i=0; i<=npoints-1; i++)
                {
                    if( (cidx[i]>=0 && i!=c0) && (nnidx[i]==c0 || nnidx[i]==c1) )
                    {
                        
                        //
                        // I-th cluster which is distinct from C0/C1 has former C0/C1 cluster as its nearest
                        // neighbor. We handle this issue depending on specific AHC algorithm being used.
                        //
                        if( s.ahcalgo==1 )
                        {
                            
                            //
                            // Single linkage. Merging of two clusters together
                            // does NOT change distances between new cluster and
                            // other clusters.
                            //
                            // The only thing we have to do is to update nearest neighbor index
                            //
                            nnidx[i] = c0;
                        }
                        else
                        {
                            
                            //
                            // Something other than single linkage. We have to re-examine
                            // all the row to find nearest neighbor.
                            //
                            k = -1;
                            v = math.maxrealnumber;
                            for(j=0; j<=npoints-1; j++)
                            {
                                if( (cidx[j]>=0 && j!=i) && (double)(d[i,j])<(double)(v) )
                                {
                                    k = j;
                                    v = d[i,j];
                                }
                            }
                            alglib.ap.assert((double)(v)<(double)(math.maxrealnumber) || mergeidx==npoints-2, "ClusterizerRunAHC: internal error");
                            nnidx[i] = k;
                        }
                    }
                }
                k = -1;
                v = math.maxrealnumber;
                for(j=0; j<=npoints-1; j++)
                {
                    if( (cidx[j]>=0 && j!=c0) && (double)(d[c0,j])<(double)(v) )
                    {
                        k = j;
                        v = d[c0,j];
                    }
                }
                alglib.ap.assert((double)(v)<(double)(math.maxrealnumber) || mergeidx==npoints-2, "ClusterizerRunAHC: internal error");
                nnidx[c0] = k;
            }
            
            //
            // Calculate Rep.P and Rep.PM.
            //
            // In order to do that, we fill CInfo matrix - (2*NPoints-1)*3 matrix,
            // with I-th row containing:
            // * CInfo[I,0]     -   size of I-th cluster
            // * CInfo[I,1]     -   beginning of I-th cluster
            // * CInfo[I,2]     -   end of I-th cluster
            // * CInfo[I,3]     -   height of I-th cluster
            //
            // We perform it as follows:
            // * first NPoints clusters have unit size (CInfo[I,0]=1) and zero
            //   height (CInfo[I,3]=0)
            // * we replay NPoints-1 merges from first to last and fill sizes of
            //   corresponding clusters (new size is a sum of sizes of clusters
            //   being merged) and height (new height is max(heights)+1).
            // * now we ready to determine locations of clusters. Last cluster
            //   spans entire dataset, we know it. We replay merges from last to
            //   first, during each merge we already know location of the merge
            //   result, and we can position first cluster to the left part of
            //   the result, and second cluster to the right part.
            //
            rep.p = new int[npoints];
            rep.pm = new int[npoints-1, 6];
            cinfo = new int[2*npoints-1, 4];
            for(i=0; i<=npoints-1; i++)
            {
                cinfo[i,0] = 1;
                cinfo[i,3] = 0;
            }
            for(i=0; i<=npoints-2; i++)
            {
                cinfo[npoints+i,0] = cinfo[rep.z[i,0],0]+cinfo[rep.z[i,1],0];
                cinfo[npoints+i,3] = Math.Max(cinfo[rep.z[i,0],3], cinfo[rep.z[i,1],3])+1;
            }
            cinfo[2*npoints-2,1] = 0;
            cinfo[2*npoints-2,2] = npoints-1;
            for(i=npoints-2; i>=0; i--)
            {
                
                //
                // We merge C0 which spans [A0,B0] and C1 (spans [A1,B1]),
                // with unknown A0, B0, A1, B1. However, we know that result
                // is CR, which spans [AR,BR] with known AR/BR, and we know
                // sizes of C0, C1, CR (denotes as S0, S1, SR).
                //
                c0 = rep.z[i,0];
                c1 = rep.z[i,1];
                s0 = cinfo[c0,0];
                s1 = cinfo[c1,0];
                ar = cinfo[npoints+i,1];
                br = cinfo[npoints+i,2];
                cinfo[c0,1] = ar;
                cinfo[c0,2] = ar+s0-1;
                cinfo[c1,1] = br-(s1-1);
                cinfo[c1,2] = br;
                rep.pm[i,0] = cinfo[c0,1];
                rep.pm[i,1] = cinfo[c0,2];
                rep.pm[i,2] = cinfo[c1,1];
                rep.pm[i,3] = cinfo[c1,2];
                rep.pm[i,4] = cinfo[c0,3];
                rep.pm[i,5] = cinfo[c1,3];
            }
            for(i=0; i<=npoints-1; i++)
            {
                alglib.ap.assert(cinfo[i,1]==cinfo[i,2]);
                rep.p[i] = cinfo[i,1];
            }
            
            //
            // Calculate Rep.PZ
            //
            rep.pz = new int[npoints-1, 2];
            for(i=0; i<=npoints-2; i++)
            {
                rep.pz[i,0] = rep.z[i,0];
                rep.pz[i,1] = rep.z[i,1];
                if( rep.pz[i,0]<npoints )
                {
                    rep.pz[i,0] = rep.p[rep.pz[i,0]];
                }
                if( rep.pz[i,1]<npoints )
                {
                    rep.pz[i,1] = rep.p[rep.pz[i,1]];
                }
            }
        }
Example #6
0
        /*************************************************************************
        This  function  accepts  AHC  report  Rep,  desired  maximum  intercluster
        correlation and returns top clusters from hierarchical clusterization tree
        which are separated by correlation R or LOWER.

        It returns assignment of points to clusters (array of cluster indexes).

        There is one more function with similar name - ClusterizerSeparatedByDist,
        which returns clusters with intercluster distance equal  to  R  or  HIGHER
        (note: higher for distance, lower for correlation).

        INPUT PARAMETERS:
            Rep     -   report from ClusterizerRunAHC() performed on XY
            R       -   desired maximum intercluster correlation, -1<=R<=+1

        OUTPUT PARAMETERS:
            K       -   number of clusters, 1<=K<=NPoints
            CIdx    -   array[NPoints], I-th element contains cluster index  (from
                        0 to K-1) for I-th point of the dataset.
            CZ      -   array[K]. This array allows  to  convert  cluster  indexes
                        returned by this function to indexes used by  Rep.Z.  J-th
                        cluster returned by this function corresponds to  CZ[J]-th
                        cluster stored in Rep.Z/PZ/PM.
                        It is guaranteed that CZ[I]<CZ[I+1].

        NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
              Although  they  were  obtained  by  manipulation with top K nodes of
              dendrogram  (i.e.  hierarchical  decomposition  of  dataset),   this
              function does not return information about hierarchy.  Each  of  the
              clusters stand on its own.
              
        NOTE: Cluster indexes returned by this function  does  not  correspond  to
              indexes returned in Rep.Z/PZ/PM. Either you work  with  hierarchical
              representation of the dataset (dendrogram), or you work with  "flat"
              representation returned by this function.  Each  of  representations
              has its own clusters indexing system (former uses [0, 2*NPoints-2]),
              while latter uses [0..K-1]), although  it  is  possible  to  perform
              conversion from one system to another by means of CZ array, returned
              by this function, which allows you to convert indexes stored in CIdx
              to the numeration system used by Rep.Z.
              
        NOTE: this subroutine is optimized for moderate values of K. Say, for  K=5
              it will perform many times faster than  for  K=100.  Its  worst-case
              performance is O(N*K), although in average case  it  perform  better
              (up to O(N*log(K))).

          -- ALGLIB --
             Copyright 10.07.2012 by Bochkanov Sergey
        *************************************************************************/
        public static void clusterizerseparatedbycorr(ahcreport rep,
            double r,
            ref int k,
            ref int[] cidx,
            ref int[] cz)
        {
            k = 0;
            cidx = new int[0];
            cz = new int[0];

            alglib.ap.assert((math.isfinite(r) && (double)(r)>=(double)(-1)) && (double)(r)<=(double)(1), "ClusterizerSeparatedByCorr: R is infinite or less than 0");
            k = 1;
            while( k<rep.npoints && (double)(rep.mergedist[rep.npoints-1-k])>=(double)(1-r) )
            {
                k = k+1;
            }
            clusterizergetkclusters(rep, k, ref cidx, ref cz);
        }
Example #7
0
            /*************************************************************************
            This function performs agglomerative hierarchical clustering

            FOR USERS OF SMP EDITION:

              ! This function can utilize multicore capabilities of  your system.  In
              ! order to do this you have to call version with "smp_" prefix,   which
              ! indicates that multicore code will be used.
              ! 
              ! This note is given for users of SMP edition; if you use GPL  edition,
              ! or commercial edition of ALGLIB without SMP support, you  still  will
              ! be able to call smp-version of this function,  but  all  computations
              ! will be done serially.
              !
              ! We recommend you to carefully read ALGLIB Reference  Manual,  section
              ! called 'SMP support', before using parallel version of this function.
              !
              ! You should remember that starting/stopping worker thread always  have
              ! non-zero  cost.  Multicore  version  is  pretty  efficient  on  large
              ! problems  which  need  more  than  1.000.000 operations to be solved,
              ! gives  moderate  speed-up in mid-range (from 100.000 to 1.000.000 CPU
              ! cycles), but gives no speed-up for small problems (less than  100.000
              ! operations).

            INPUT PARAMETERS:
                S       -   clusterizer state, initialized by ClusterizerCreate()

            OUTPUT PARAMETERS:
                Rep     -   clustering results; see description of AHCReport
                            structure for more information.

            NOTE 1: hierarchical clustering algorithms require large amounts of memory.
                    In particular, this implementation needs  sizeof(double)*NPoints^2
                    bytes, which are used to store distance matrix. In  case  we  work
                    with user-supplied matrix, this amount is multiplied by 2 (we have
                    to store original matrix and to work with its copy).
                
                    For example, problem with 10000 points  would require 800M of RAM,
                    even when working in a 1-dimensional space.

              -- ALGLIB --
                 Copyright 10.07.2012 by Bochkanov Sergey
            *************************************************************************/
            public static void clusterizerrunahc(clusterizerstate s,
                ahcreport rep)
            {
                int npoints = 0;
                int nfeatures = 0;
                double[,] d = new double[0, 0];

                npoints = s.npoints;
                nfeatures = s.nfeatures;

                //
                // Fill Rep.NPoints, quick exit when NPoints<=1
                //
                rep.npoints = npoints;
                if (npoints == 0)
                {
                    rep.p = new int[0];
                    rep.z = new int[0, 0];
                    rep.pz = new int[0, 0];
                    rep.pm = new int[0, 0];
                    rep.mergedist = new double[0];
                    return;
                }
                if (npoints == 1)
                {
                    rep.p = new int[1];
                    rep.z = new int[0, 0];
                    rep.pz = new int[0, 0];
                    rep.pm = new int[0, 0];
                    rep.mergedist = new double[0];
                    rep.p[0] = 0;
                    return;
                }

                //
                // More than one point
                //
                if (s.disttype == -1)
                {

                    //
                    // Run clusterizer with user-supplied distance matrix
                    //
                    clusterizerrunahcinternal(s, ref s.d, rep);
                    return;
                }
                else
                {

                    //
                    // Build distance matrix D.
                    //
                    clusterizergetdistances(s.xy, npoints, nfeatures, s.disttype, ref d);

                    //
                    // Run clusterizer
                    //
                    clusterizerrunahcinternal(s, ref d, rep);
                    return;
                }
            }