public void SetCentroidsAsMeansHighDimensionality() { foreach (Cluster Clust in clusters) { Clust.SetCentroidAsMean(true); } }
public void SetCentroidsAsMeans() { foreach (Cluster Clust in clusters) { Clust.SetCentroidAsMean(); } }
/** * ART 2A algorithm, inputs: examples and input parameters given by an user * How exactly it is working can be found at www.fi.muni.cz/~xhudik/art/drafts * \param sample set if input examples (Eks) * \param par all input parameters set by an user or default **/ public void art2A(List<Math.DynamicVector<float>> sample, in_param param, Clust results) { // prototype with highest score Math.DynamicVector<float> P; // list of all prototypes List<Math.DynamicVector<float>> prot; // the best representation of the prototypes of the whole history List<Math.DynamicVector<float>> prot_best; // sequences of samples Ek from which prototype has been created // it is possible to reconstruct a prototype from the sequence // defined in art_common.h List<List<int>> prot_seq; // the best representation of the prototypes of the whole history List<List<int>> prot_seq_best; // list of prototypes which were used already List<Math.DynamicVector<float>> used; used = new List<Math.DynamicVector<float>>(); prot = new List<Math.DynamicVector<float>>(); prot_seq = new List<List<int>>(); prot_best = new List<Math.DynamicVector<float>>(); prot_seq_best = new List<List<int>>(); float fluctuation = 100.0f; // the lowest error of the whole history // it is initialized as some impossible number(higher than 100% can't be), to avoid problems with first iteration float fluctuation_best = 120.0f; // how many times it run throughout the samples int pass = 0; // how many Ek's has been reassign to other cluster (prototype) in a previous pass (run) List<bool> changed; int i, j; changed = new List<bool>(); for( i = 0; i < sample.Count; i++ ) { changed.Add(true); } // do cycle while error is higher than the parameter -e or a number of passes is lower than the parameter -E while((pass<param.pass)&&(fluctuation>param.error)) { int number_changed; // nullifying changed values for( i = 0; i < sample.Count; i++ ) { changed[i] = false; } // cycle for instances for( i = 0; i < sample.Count; i++ ) { // zeroing 'used' prototypes used.Clear(); do { float score; float alphaSum; // find the best prototype for current Ek P = bestPrototype2A(sample[i],prot,used); // if there is no best prototype if( P.array.Length == 0 ) { int prototypeIndex; //check if the instance is not included already in some other prototype prototypeIndex = Common.instanceInSequence(prot_seq,i); if( prototypeIndex != -1 ) { //if so, remove it (recreate prototype--without the instance) removeInstance(sample, i, prot, prototypeIndex, prot_seq, param.beta, param.vigilance); } createPrototype(sample, i, prot, prot_seq, param.vigilance); changed[i] = true; break; } // add P among 'used' used.Add(P); //count similarity between P and Ek (it is called "score") and alpha*sum_i Eki score = countScore(P, sample[i]); alphaSum = 0.0f; for( j = 0; j < sample[i].array.Length; j++ ) { alphaSum += param.alpha*sample[i][j]; } // if similarity is sufficient -- sample[i] is member of the P if( score >= alphaSum ) { if( score >= param.vigilance ) { int prot_index; int Pindex; // if the example Ek is already included in some prototype -- find it prot_index = Common.instanceInSequence(prot_seq,i); if( prot_index != -1 ) { // test if the found prototype is not actual one (P) in that case try - go for another Ek if( prot[prot_index] == P ) { break; } else { // re-build prototype - without the sample removeInstance(sample,i,prot,prot_index,prot_seq,param.beta,param.vigilance); } } // find an index of P in prototypes Pindex = Common.findItem(prot, P, true); // add instance to the current prototype addInstance(sample[i],prot[Pindex],param.beta); prot_seq[Pindex].Add(i); changed[i]=true; break; } // try other best P else { continue; } } //score=>alphaSize else { int prot_index; // if prototype is not enough similar to the example(sample[i]) then create a new prototype // check if the instance is not already in some other prototype prot_index=Common.instanceInSequence(prot_seq,i); if( prot_index != -1 ) { // if so, remove it (recreate prototype--without the instance) removeInstance(sample,i,prot,prot_index,prot_seq,param.beta,param.vigilance); } createPrototype(sample,i,prot, prot_seq,param.vigilance); changed[i] = true; break; } } while( prot.Count != sample.Count ); } // for sample //count statistics for this pass number_changed=0; for( j = 0; j < changed.Count; j++ ) { if( changed[j] ) { number_changed++; } } fluctuation = ((float)number_changed/sample.Count)*100; pass++; //cout << "Pass: "******", fluctuation: " << fluctuation << "%" << ", clusters: " << prot.size() << endl; //test if this iteration has not lower error if(fluctuation < fluctuation_best){ //if it is so - assign the new best results prot_best = prot; prot_seq_best = prot_seq; fluctuation_best = fluctuation; } } // while // create results results.proto = prot_best; results.proto_seq = prot_seq_best; results.fluctuation = fluctuation_best; }
public override void IterateOnce() { double AlphaI = 0; double AlphaJ = 0; double Beta = 0; double Gamma = 0; int Winner; if (!stopped) { if (clusters.Count > 1) { DenseMatrix InterimMatrix = new DenseMatrix(currentDistanceMatrix.RowCount); currentDistanceMatrix.CopyTo(InterimMatrix); Cluster[] InterimClusters = new Cluster[clusters.Count]; Parallel.For(0, clusters.Count, i => { InterimClusters[i] = (Cluster)clusters[i].Clone(); }); List <Cluster> InterimClusterList = InterimClusters.ToList <Cluster>(); Tuple <int, int, double> CurrentMinimumDistance = new Tuple <int, int, double>(0, 0, 0); Parallel.ForEach <Tuple <int, int, double> >(currentDistanceMatrix.EnumerateIndexed(), CurrentTuple => { if (CurrentTuple.Item3 == nextDistance) { CurrentMinimumDistance = CurrentTuple; } }); switch (style) //this loops initialises coefficient values for the lance williams algorithm - this is within the iterate once method //as some styles require iterative recalculation { case HACDistanceStyle.Centroid: AlphaI = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2]); AlphaJ = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item2], clusters[CurrentMinimumDistance.Item1]); Beta = calculateCentroidBeta(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2]); Gamma = 0; break; case HACDistanceStyle.CLink: AlphaI = 0.5; AlphaJ = 0.5; Beta = 0; Gamma = 0.5; break; case HACDistanceStyle.MeanDist: AlphaI = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2]); AlphaJ = calculateMeanAlpha(clusters[CurrentMinimumDistance.Item2], clusters[CurrentMinimumDistance.Item1]); Beta = 0; Gamma = 0; break; case HACDistanceStyle.SLink: AlphaI = 0.5; AlphaJ = 0.5; Beta = 0; Gamma = -0.5; break; case HACDistanceStyle.Ward: //has to be implemented inside matrix update loop //as requires values for cluster k break; } //handle cluster merging in list InterimClusterList[CurrentMinimumDistance.Item1].Merge(InterimClusterList[CurrentMinimumDistance.Item2]); InterimClusterList.RemoveAt(CurrentMinimumDistance.Item2); updateClusterIndexes(InterimClusterList); //handles removal of subsumed cluster's distances from distance matrix if (CurrentMinimumDistance.Item1 < CurrentMinimumDistance.Item2) { Winner = CurrentMinimumDistance.Item1; } else { //if the item 1 value is further down the distances matrix than the row/column to be removed Winner = CurrentMinimumDistance.Item1 - 1; } InterimMatrix = (DenseMatrix)InterimMatrix.RemoveColumn(CurrentMinimumDistance.Item2).RemoveRow(CurrentMinimumDistance.Item2); Parallel.For(0, InterimMatrix.RowCount, i => { //recalculate distances from new cluster to all other clusters if (style == HACDistanceStyle.Ward) { AlphaI = calculateWardAlpha(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2], clusters[i]); AlphaJ = calculateWardAlpha(clusters[CurrentMinimumDistance.Item2], clusters[CurrentMinimumDistance.Item1], clusters[i]); Beta = calculateWardBeta(clusters[CurrentMinimumDistance.Item1], clusters[CurrentMinimumDistance.Item2], clusters[i]); Gamma = 0; } double DistanceIJ = nextDistance; double DistanceIK = currentDistanceMatrix[CurrentMinimumDistance.Item1, i]; double DistanceJK = currentDistanceMatrix[CurrentMinimumDistance.Item2, i]; InterimMatrix[Winner, i] = runLanceWilliamsEquation(DistanceIK, DistanceJK, DistanceIJ, AlphaI, AlphaJ, Beta, Gamma); InterimMatrix[i, Winner] = InterimMatrix[Winner, i]; InterimMatrix[Winner, Winner] = artificialMax; }); lastDistance = nextDistance; nextDistance = InterimMatrix.Values.Min(); if (!stopped) { currentDistanceMatrix = (DenseMatrix)InterimMatrix.Clone(); double XValue = InterimClusterList[Winner].XValue; Tuple <string, string, double, double> ThisDendroEntry = new Tuple <string, string, double, double>(clusters[CurrentMinimumDistance.Item1].ClusterID, clusters[CurrentMinimumDistance.Item2].ClusterID, lastDistance, XValue); dendrogram.Add(ThisDendroEntry); clusters = InterimClusterList; iterator++; stopped = StoppingConditionMet(); } else { Parallel.ForEach(clusters, Clust => { Clust.SetCentroidAsMean(); }); stopped = true; } } else { stopped = true; } } else { Parallel.ForEach(clusters, Clust => { Clust.SetCentroidAsMean(); }); } }