/// <summary> /// calculateNearestCluster is a function that determines the nearest cluster and calculates the distance between those two clusters. /// </summary> /// <param name="Centroids">the centroids of the clusters</param> /// <param name="SamplesInClusters">number of samples in each cluster</param> /// <returns>Tuple of two Items: <br /> /// - Item 1: contains the number of nearest cluster <br /> /// - Item 2: contains the distance to the nearest cluster /// </returns> private static Tuple <int[], double[]> calculateNearestCluster(double[][] Centroids, int[] SamplesInClusters) { int[] NearestClustersArray = new int[Centroids.Length]; double[] DistanceToNearestClusterArray = new double[Centroids.Length]; int Code; string Message = "Function <calculateNearestCluster>: "; try { double curDistance; for (int i = 0; i < Centroids.Length; i++) { //in case of empty cluster if (SamplesInClusters[i] == 0) { NearestClustersArray[i] = -1; DistanceToNearestClusterArray[i] = -1; continue; } DistanceToNearestClusterArray[i] = double.MaxValue; for (int j = 0; j < Centroids.Length; j++) { if (i == j || SamplesInClusters[j] == 0) { continue; } curDistance = KMeans.calculateDistance(Centroids[i], Centroids[j]); if (curDistance < DistanceToNearestClusterArray[i]) { DistanceToNearestClusterArray[i] = curDistance; NearestClustersArray[i] = j; } } } return(Tuple.Create(NearestClustersArray, DistanceToNearestClusterArray)); } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }
/// <summary> /// calculateNoreStatistics is a function that claculates statistics of a cluster. These statistics are dependent on other clusters. /// </summary> /// <param name="RawData">data to be clustered</param> /// <param name="DataToClusterMapping">contains the assigned cluster number for each sample of the RawData</param> /// <param name="Centroids">the centroids of the clusters</param> /// <param name="NearestCluster">nearest cluster number</param> /// <param name="NearestForeignSampleInNearestCluster">nearest sample belonging of the nearest cluster to this cluster's centroid</param> /// <param name="DistanceToNearestForeignSampleInNearestCluster">distance between the nearest sample of the nearest cluster and this cluster's centroid</param> /// <param name="NearestForeignSample">nearest sample not belonging to this cluster and this cluster's centroid</param> /// <param name="DistanceToNearestForeignSample">distance between the nearest foreign sample and this cluster's centroid</param> /// <param name="ClusterOfNearestForeignSample">the cluster to which the nearest foreign sample belongs</param> private static void calculateMoreStatistics(double[][] RawData, int[] DataToClusterMapping, double[][] Centroids, int[] NearestCluster, out double[][] NearestForeignSampleInNearestCluster, out double[] DistanceToNearestForeignSampleInNearestCluster, out double[][] NearestForeignSample, out double[] DistanceToNearestForeignSample, out int[] ClusterOfNearestForeignSample) { int Code; string Message = "Function <calculateMoreStatistics>: "; try { NearestForeignSampleInNearestCluster = new double[Centroids.Length][]; DistanceToNearestForeignSampleInNearestCluster = new double[Centroids.Length]; NearestForeignSample = new double[Centroids.Length][]; DistanceToNearestForeignSample = new double[Centroids.Length]; ClusterOfNearestForeignSample = new int[Centroids.Length]; for (int i = 0; i < Centroids.Length; i++) { //in case of empty cluster if (NearestCluster[i] == -1) { NearestForeignSampleInNearestCluster[i] = null; NearestForeignSample[i] = null; DistanceToNearestForeignSampleInNearestCluster[i] = -1; DistanceToNearestForeignSample[i] = -1; ClusterOfNearestForeignSample[i] = -1; } else { DistanceToNearestForeignSampleInNearestCluster[i] = double.MaxValue; DistanceToNearestForeignSample[i] = double.MaxValue; } } double curDistance; for (int i = 0; i < RawData.Length; i++) { for (int j = 0; j < Centroids.Length; j++) { //skip if sample belong to the cluster itself or the cluster is empty if (DataToClusterMapping[i] == j || NearestCluster[j] == -1) { continue; } curDistance = KMeans.calculateDistance(RawData[i], Centroids[j]); if (curDistance < DistanceToNearestForeignSample[j]) { DistanceToNearestForeignSample[j] = curDistance; NearestForeignSample[j] = RawData[i]; ClusterOfNearestForeignSample[j] = DataToClusterMapping[i]; } if (DataToClusterMapping[i] == NearestCluster[j]) { if (curDistance < DistanceToNearestForeignSampleInNearestCluster[j]) { DistanceToNearestForeignSampleInNearestCluster[j] = curDistance; NearestForeignSampleInNearestCluster[j] = RawData[i]; } } } } } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }
/// <summary> /// calculateStatistics is a function that claculates statistics and properties of a cluster. These statistics are independent on other clusters. /// </summary> /// <param name="Cluster">a cluster object</param> private static void calculateStatistics(Cluster cls) { int Code; string Message = "Function <calculateStatistics>: "; try { int NumberOfSamples = cls.ClusterData.Length; int NumberOfAttributes = cls.Centroid.Length; cls.ClusterDataDistanceToCentroid = new double[NumberOfSamples]; cls.Mean = new double[NumberOfAttributes]; cls.StandardDeviation = new double[NumberOfAttributes]; cls.InClusterMaxDistance = -1; //in case of empty cluster if (NumberOfSamples == 0) { cls.InClusterFarthestSampleIndex = 0; cls.InClusterMaxDistance = 0; cls.InClusterFarthestSample = new double[NumberOfAttributes]; for (int j = 0; j < NumberOfAttributes; j++) { cls.Mean[j] = 0; cls.Centroid[j] = 0; cls.InClusterFarthestSample[j] = 0; } cls.NearestCluster = -1; } else { for (int i = 0; i < NumberOfSamples; i++) { //calculate distance for each sample cls.ClusterDataDistanceToCentroid[i] = KMeans.calculateDistance(cls.ClusterData[i], cls.Centroid); if (cls.ClusterDataDistanceToCentroid[i] > cls.InClusterMaxDistance) { //farthest sample cls.InClusterFarthestSampleIndex = i; cls.InClusterFarthestSample = cls.ClusterData[i]; cls.InClusterMaxDistance = cls.ClusterDataDistanceToCentroid[i]; } for (int j = 0; j < NumberOfAttributes; j++) { cls.Mean[j] += cls.ClusterData[i][j] / NumberOfSamples; } } double[] ClusterVariance = new double[NumberOfAttributes]; for (int i = 0; i < NumberOfSamples; i++) { for (int j = 0; j < NumberOfAttributes; j++) { ClusterVariance[j] += Math.Pow((cls.ClusterData[i][j] - cls.Mean[j]), 2) / NumberOfSamples; } } for (int i = 0; i < NumberOfAttributes; i++) { cls.StandardDeviation[i] = Math.Sqrt(ClusterVariance[i]); } } } catch (Exception Ex) { Code = 400; Message += "Unhandled exception:\t" + Ex.ToString(); throw new KMeansException(Code, Message); } }