/// <summary> /// Calculates a new centroid for every cluster based on the data of the points in the cluster /// </summary> /// <param name="clusterIndex">The current clusterId</param> /// <returns>A new centroid for the given cluster</returns> private KmeansVector CalculateCentroidPerCluster(int clusterIndex) { KmeansVector newCentroid = new KmeansVector(new double[_vectorLength]); //Total amount of vectors in a cluster int totalAmountVectors = 0; //Sum all the vectors with the same cluster ID foreach (KmeansVector vector in _vectors) { if (vector.ClusterId != clusterIndex) { continue; } //Add every dimension of the selected vector to the existing vector for (int dimension = 0; dimension < vector.Coordinates.Length; dimension++) { newCentroid.Coordinates[dimension] = newCentroid.Coordinates[dimension] + vector.Coordinates[dimension]; } totalAmountVectors++; } for (int s = 0; s < newCentroid.Coordinates.Length; s++) { newCentroid.Coordinates[s] = newCentroid.Coordinates[s] / totalAmountVectors; } return(newCentroid); }
/// <summary> /// Uses the Euclidean Distance to return the nearest centroid to the given vector /// </summary> /// <param name="vector">The vector to calculate the euclidean distance for</param> private void FindClosestCentroid(KmeansVector vector) { double shortestDistance = -1; //Find the shortest distance from the given vector to a centroid for (int centroidId = 0; centroidId < _centroids.Count; centroidId++) { double distance = 0; //For every centroid dimension - Subtract every coordinate from the centroid for (int dimension = 0; dimension < _centroids[centroidId].Coordinates.Length; dimension++) { double delta = _centroids[centroidId].Coordinates[dimension] - vector.Coordinates[dimension]; distance = distance + Math.Pow(delta, 2); } distance = Math.Sqrt(distance); if (shortestDistance < 0 || distance < shortestDistance) { shortestDistance = distance; vector.ShortestDistanceToCentroid = distance; vector.ClusterId = centroidId; } } }