/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double[,] data, string type) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double[] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); } cluster.Add(dataPoint); clusters.Add(cluster); } } int compteur = 0; while (stableClustersCount != clusters.Count) { stableClustersCount = 0; bool isNotGood = false; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data, type); for (int tempo = 0; tempo < newClusters.Count; tempo++) { if (newClusters[tempo].Count < 1) { isNotGood = true; } } if (compteur == 10) { return(clusters); } if (isNotGood) { compteur++; continue; } for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double distance = 0; switch (type) { case "DTW": distance = (KMeans.DtwDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; case "Manhattan": distance = (KMeans.ManhattanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; default: distance = (KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; } if (distance == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double [,] data) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double [] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); } cluster.Add(dataPoint); clusters.Add(cluster); } } while (stableClustersCount != clusters.Count) { stableClustersCount = 0; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { if ((KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)) == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double[,] data, string type) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double[] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); } cluster.Add(dataPoint); clusters.Add(cluster); } } int compteur = 0; while (stableClustersCount != clusters.Count) { stableClustersCount = 0; bool isNotGood = false; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data, type); for (int tempo = 0; tempo < newClusters.Count; tempo++) if (newClusters[tempo].Count < 1) isNotGood = true; if (compteur == 10) return clusters; if (isNotGood) { compteur++; continue; } for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double distance = 0; switch (type) { case "DTW": distance = (KMeans.DtwDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; case "Manhattan": distance = (KMeans.ManhattanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; default: distance = (KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; } if (distance == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return clusters; }