/// <summary> /// Calculate cluster centroids using k-means++ /// </summary> /// <param name="numClusters"></param> /// <returns></returns> public double[][] calcClusterCentroidsInit(int numClusters) { int numGenes = chromosomes[0].GetGenes().Length; //Initialise array double[][] centroidVectorsInit = new double[numClusters][]; List <int> centroidChromoIndexes = new List <int>(); // 1. Choose random initial centroid //int rndCentroidChromo = Friends.GetRandomInt(0, chromosomes.Length); int rndCentroidChromo = 0; // Just choose the first one to keep some continuity centroidChromoIndexes.Add(rndCentroidChromo); while (centroidChromoIndexes.Count < numClusters) { // 2. Calculate distance from each chromo to the nearest centroid that has already been chosen //the distance to the nearest centroid for each chromosome List <double> chromoDistances = new List <double>(); for (int i = 0; i < chromosomes.Length; i++) { //distances from one chromo to all of the already chosen centroids List <double> distances = new List <double>(); for (int j = 0; j < centroidChromoIndexes.Count; j++) { int centroidIndex = centroidChromoIndexes[j]; distances.Add(Friends.calcDistance(chromosomes[i].GetGenes(), chromosomes[centroidIndex].GetGenes())); } chromoDistances.Add(distances.Min()); //if the chromosome compares to itself and is chosen as a centroid, the distance will be zero (fine as we choose the largest distance for all chromosomes afterwards) } //3. Choose next centroid furthest away from the already selected ones int indexOfMaxDist = chromoDistances.IndexOf(chromoDistances.Max()); centroidChromoIndexes.Add(indexOfMaxDist); } //Update array for (int i = 0; i < numClusters; i++) { centroidVectorsInit[i] = new double[numGenes]; for (int j = 0; j < numGenes; j++) { centroidVectorsInit[i][j] = chromosomes[centroidChromoIndexes[i]].GetGenes()[j]; } } return(centroidVectorsInit); }
/// <summary> /// d) Update chromosome representatives and cluster distances after k-means clustering /// </summary> /// <param name="numClusters"></param> public void calcKMeansRepresentatives(int numClusters) { double[][] clusterMeanVectors = calcClusterMeans(numClusters); //Initialise lists List <double>[] distances = new List <double> [numClusters]; List <int>[] distanceIndexes = new List <int> [numClusters]; for (int i = 0; i < numClusters; i++) { distances[i] = new List <double>(); distanceIndexes[i] = new List <int>(); } //Run through each chromosome for (int i = 0; i < chromosomes.Length; i++) { distances[chromosomes[i].clusterId].Add(Friends.calcDistance(chromosomes[i].GetGenes(), clusterMeanVectors[chromosomes[i].clusterId])); distanceIndexes[chromosomes[i].clusterId].Add(i); } //Find the chromosome in each cluster with the smallest distance to the cluster mean int[] chromoRepresentatives = new int[numClusters]; for (int i = 0; i < numClusters; i++) { double minDist = distances[i].Min(); int indexOfMin = distances[i].IndexOf(minDist); int chromoRepId = distanceIndexes[i][indexOfMin]; chromosomes[chromoRepId].isRepresentative = true; chromoRepresentatives[i] = chromoRepId; } //Calculate the distance between a chromosome and the chromosome closest to the cluster mean for (int i = 0; i < chromosomes.Length; i++) { int closestToMean = chromoRepresentatives[chromosomes[i].clusterId]; chromosomes[i].distToRepresentative = Friends.calcDistance(chromosomes[i].GetGenes(), chromosomes[closestToMean].GetGenes()); } }
/// <summary> /// c) Update clustering by calculating the distance between the chromosome genes and the mean vectors for each cluster /// </summary> /// <param name="numClusters"></param> /// <param name="clusterMeanVectors"></param> /// <returns></returns> public bool updateClustering(int numClusters, double[][] clusterMeanVectors) { bool go = true; bool hasChanged = false; bool hasZeroMembers = false; //Run through all the chromosomes and compare its genes to all the mean vectors. Store new temporary clusterIds int[] tempClusterId = new int[chromosomes.Length]; for (int i = 0; i < chromosomes.Length; i++) { double[] distances = new double[numClusters]; //Run through the clusters in order to compare to each mean vector for (int j = 0; j < numClusters; j++) { distances[j] = Friends.calcDistance(chromosomes[i].GetGenes(), clusterMeanVectors[j]); } int newClusterId = identifyClusterId(distances); tempClusterId[i] = newClusterId; if (chromosomes[i].clusterId != newClusterId) { hasChanged = true; } } //Check that each new cluster contains at least one chromosome before changing the clusterId property int[] tempClusterCounts = new int[numClusters]; for (int i = 0; i < numClusters; i++) { tempClusterCounts[i] = 0; } for (int i = 0; i < chromosomes.Length; i++) { tempClusterCounts[tempClusterId[i]]++; } for (int i = 0; i < tempClusterCounts.Length; i++) { if (tempClusterCounts[i] == 0) { hasZeroMembers = true; } } //If no cluster has zero elements then update the chromosome clusterId property if (!hasZeroMembers) { for (int i = 0; i < chromosomes.Length; i++) { chromosomes[i].clusterId = tempClusterId[i]; } } //Check whether the cluster loop shall continue or not if (!hasChanged || hasZeroMembers) { go = false; } return(go); }