Пример #1
0
 public void SetCentroidsAsMeansHighDimensionality()
 {
     foreach (Cluster Clust in clusters)
     {
         Clust.SetCentroidAsMean(true);
     }
 }
Пример #2
0
 public void SetCentroidsAsMeans()
 {
     foreach (Cluster Clust in clusters)
     {
         Clust.SetCentroidAsMean();
     }
 }
Пример #3
0
        /**
         * ART 2A algorithm, inputs: examples and input parameters given by an user
         * How exactly it is working can be found at www.fi.muni.cz/~xhudik/art/drafts
         * \param sample  set if input examples (Eks)
         * \param par all input parameters set by an user or default
         **/
        public void art2A(List<Math.DynamicVector<float>> sample, in_param param, Clust results)
        {
            // prototype with highest score
            Math.DynamicVector<float> P;

            // list of all prototypes
            List<Math.DynamicVector<float>> prot;

            // the best representation of the prototypes of the whole history
            List<Math.DynamicVector<float>> prot_best;

            // sequences of samples Ek from which prototype has been created
            // it is possible to reconstruct a prototype from the sequence
            // defined in art_common.h
            List<List<int>> prot_seq;

            // the best representation of the prototypes of the whole history
            List<List<int>> prot_seq_best;

            // list of prototypes which were used already
            List<Math.DynamicVector<float>> used;

            used = new List<Math.DynamicVector<float>>();
            prot = new List<Math.DynamicVector<float>>();
            prot_seq = new List<List<int>>();
            prot_best = new List<Math.DynamicVector<float>>();
            prot_seq_best = new List<List<int>>();

            float fluctuation = 100.0f;

            // the lowest error of the whole history
            // it is initialized as some impossible number(higher than 100% can't be), to avoid problems with first iteration
            float fluctuation_best = 120.0f;

            // how many times it run throughout the samples
            int pass = 0;

            // how many Ek's has been reassign to other cluster (prototype) in a previous pass (run)
            List<bool> changed;

            int i, j;

            changed = new List<bool>();
            for( i = 0; i < sample.Count; i++ )
            {
                changed.Add(true);
            }

            // do cycle while error is higher than the parameter -e  or a number of passes is lower than the parameter -E
            while((pass<param.pass)&&(fluctuation>param.error))
            {
                int number_changed;

                // nullifying changed values
                for( i = 0; i < sample.Count; i++ )
                {
                    changed[i] = false;
                }

                // cycle for instances
                for( i = 0; i < sample.Count; i++ )
                {
                    // zeroing 'used' prototypes
                    used.Clear();

                    do
                    {
                        float score;
                        float alphaSum;

                        // find the best prototype for current Ek
                        P = bestPrototype2A(sample[i],prot,used);

                        // if there is no best prototype
                        if( P.array.Length == 0 )
                        {
                            int prototypeIndex;

                            //check if the instance is not included already in some other prototype
                            prototypeIndex = Common.instanceInSequence(prot_seq,i);
                            if( prototypeIndex != -1 )
                            {
                                //if so, remove it (recreate prototype--without the instance)
                                removeInstance(sample, i, prot, prototypeIndex, prot_seq, param.beta, param.vigilance);
                            }

                            createPrototype(sample, i, prot, prot_seq, param.vigilance);
                            changed[i] = true;
                            break;
                        }

                        // add P among 'used'
                        used.Add(P);

                        //count similarity between P and Ek (it is called "score") and alpha*sum_i Eki
                        score = countScore(P, sample[i]);
                        alphaSum = 0.0f;
                        for( j = 0; j < sample[i].array.Length; j++ )
                        {
                            alphaSum += param.alpha*sample[i][j];
                        }

                        // if similarity is sufficient -- sample[i] is member of the P
                        if( score >= alphaSum )
                        {
                            if( score >= param.vigilance )
                            {
                                int prot_index;
                                int Pindex;

                                // if the example Ek is already included in some prototype -- find it
                  	            prot_index = Common.instanceInSequence(prot_seq,i);
                  	            if( prot_index != -1 )
                                {
                                    // test if the found prototype is not actual one (P) in that case try - go for another Ek
                                    if( prot[prot_index] == P )
                                    {
                                        break;
                                    }
                                    else
                                    {
                                        // re-build prototype - without the sample
                                        removeInstance(sample,i,prot,prot_index,prot_seq,param.beta,param.vigilance);
                                    }
                                }

                                // find an index of P in prototypes
                                Pindex = Common.findItem(prot, P, true);

                                // add instance to the current prototype
                                addInstance(sample[i],prot[Pindex],param.beta);
                                prot_seq[Pindex].Add(i);
                                changed[i]=true;
                                break;
                            }

                            // try other best P
                            else
                            {
                                continue;
                            }
                        } //score=>alphaSize
                        else
                        {
                            int prot_index;

                            // if prototype is not enough similar to the example(sample[i]) then create a new prototype

                            // check if the instance is not already in some other prototype
                            prot_index=Common.instanceInSequence(prot_seq,i);
                            if( prot_index != -1 )
                            {
                                // if so, remove it (recreate prototype--without the instance)
                                removeInstance(sample,i,prot,prot_index,prot_seq,param.beta,param.vigilance);
                            }

                            createPrototype(sample,i,prot, prot_seq,param.vigilance);
                            changed[i] = true;
                            break;
                        }
                    }
                    while( prot.Count != sample.Count );

                } // for sample

                //count statistics for this pass
                number_changed=0;
                for( j = 0; j < changed.Count; j++ )
                {
                    if( changed[j] )
                    {
                        number_changed++;
                    }
                }
                fluctuation = ((float)number_changed/sample.Count)*100;

                pass++;

                //cout << "Pass: "******", fluctuation: " << fluctuation << "%" << ", clusters: " << prot.size() << endl;

                //test if this iteration has not lower error
                if(fluctuation < fluctuation_best){

                   //if it is so - assign the new best results
                   prot_best = prot;
                   prot_seq_best = prot_seq;
                   fluctuation_best = fluctuation;
                   }

            } // while

            // create results
            results.proto = prot_best;
            results.proto_seq = prot_seq_best;
            results.fluctuation  = fluctuation_best;
        }
Пример #4
0
        public override void IterateOnce()
        {
            double AlphaI = 0;
            double AlphaJ = 0;
            double Beta   = 0;
            double Gamma  = 0;
            int    Winner;

            if (!stopped)
            {
                if (clusters.Count > 1)
                {
                    DenseMatrix InterimMatrix = new DenseMatrix(currentDistanceMatrix.RowCount);
                    currentDistanceMatrix.CopyTo(InterimMatrix);
                    Cluster[] InterimClusters = new Cluster[clusters.Count];
                    Parallel.For(0, clusters.Count, i => {
                        InterimClusters[i] = (Cluster)clusters[i].Clone();
                    });
                    List <Cluster> InterimClusterList = InterimClusters.ToList <Cluster>();

                    Tuple <int, int, double> CurrentMinimumDistance = new Tuple <int, int, double>(0, 0, 0);
                    Parallel.ForEach <Tuple <int, int, double> >(currentDistanceMatrix.EnumerateIndexed(), CurrentTuple =>
                    {
                        if (CurrentTuple.Item3 == nextDistance)
                        {
                            CurrentMinimumDistance = CurrentTuple;
                        }
                    });
                    switch (style)
                    //this loops initialises coefficient values for the lance williams algorithm - this is within the iterate once method
                    //as some styles require iterative recalculation
                    {
                    case HACDistanceStyle.Centroid:
                        AlphaI = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2]);
                        AlphaJ = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item2], clusters[CurrentMinimumDistance.Item1]);
                        Beta   = calculateCentroidBeta(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2]);
                        Gamma  = 0;
                        break;

                    case HACDistanceStyle.CLink:
                        AlphaI = 0.5;
                        AlphaJ = 0.5;
                        Beta   = 0;
                        Gamma  = 0.5;
                        break;

                    case HACDistanceStyle.MeanDist:
                        AlphaI = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2]);
                        AlphaJ = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item2], clusters[CurrentMinimumDistance.Item1]);
                        Beta   = 0;
                        Gamma  = 0;
                        break;

                    case HACDistanceStyle.SLink:
                        AlphaI = 0.5;
                        AlphaJ = 0.5;
                        Beta   = 0;
                        Gamma  = -0.5;
                        break;

                    case HACDistanceStyle.Ward:
                        //has to be implemented inside matrix update loop
                        //as requires values for cluster k
                        break;
                    }
                    //handle cluster merging in list
                    InterimClusterList[CurrentMinimumDistance.Item1].Merge(InterimClusterList[CurrentMinimumDistance.Item2]);
                    InterimClusterList.RemoveAt(CurrentMinimumDistance.Item2);
                    updateClusterIndexes(InterimClusterList);
                    //handles removal of subsumed cluster's distances from distance matrix
                    if (CurrentMinimumDistance.Item1 < CurrentMinimumDistance.Item2)
                    {
                        Winner = CurrentMinimumDistance.Item1;
                    }
                    else
                    {
                        //if the item 1 value is further down the distances matrix than the row/column to be removed
                        Winner = CurrentMinimumDistance.Item1 - 1;
                    }
                    InterimMatrix = (DenseMatrix)InterimMatrix.RemoveColumn(CurrentMinimumDistance.Item2).RemoveRow(CurrentMinimumDistance.Item2);
                    Parallel.For(0, InterimMatrix.RowCount, i =>
                    {
                        //recalculate distances from new cluster to all other clusters
                        if (style == HACDistanceStyle.Ward)
                        {
                            AlphaI = calculateWardAlpha(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2], clusters[i]);
                            AlphaJ = calculateWardAlpha(clusters[CurrentMinimumDistance.Item2], clusters[CurrentMinimumDistance.Item1], clusters[i]);
                            Beta   = calculateWardBeta(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2], clusters[i]);
                            Gamma  = 0;
                        }
                        double DistanceIJ             = nextDistance;
                        double DistanceIK             = currentDistanceMatrix[CurrentMinimumDistance.Item1, i];
                        double DistanceJK             = currentDistanceMatrix[CurrentMinimumDistance.Item2, i];
                        InterimMatrix[Winner, i]      = runLanceWilliamsEquation(DistanceIK, DistanceJK, DistanceIJ, AlphaI, AlphaJ, Beta, Gamma);
                        InterimMatrix[i, Winner]      = InterimMatrix[Winner, i];
                        InterimMatrix[Winner, Winner] = artificialMax;
                    });
                    lastDistance = nextDistance;
                    nextDistance = InterimMatrix.Values.Min();
                    if (!stopped)
                    {
                        currentDistanceMatrix = (DenseMatrix)InterimMatrix.Clone();
                        double XValue = InterimClusterList[Winner].XValue;

                        Tuple <string, string, double, double> ThisDendroEntry = new Tuple <string, string, double, double>(clusters[CurrentMinimumDistance.Item1].ClusterID, clusters[CurrentMinimumDistance.Item2].ClusterID, lastDistance, XValue);
                        dendrogram.Add(ThisDendroEntry);
                        clusters = InterimClusterList;

                        iterator++;
                        stopped = StoppingConditionMet();
                    }
                    else
                    {
                        Parallel.ForEach(clusters, Clust => {
                            Clust.SetCentroidAsMean();
                        });
                        stopped = true;
                    }
                }
                else
                {
                    stopped = true;
                }
            }
            else
            {
                Parallel.ForEach(clusters, Clust => {
                    Clust.SetCentroidAsMean();
                });
            }
        }