/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(ClusterCollection clusters, double [,] data) { double [] dataPoint; double [] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for (int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if (clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for (int row = 0; row < rowCount; row++) { dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[row, field]), field); } for (int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if (cluster == 0) { firstClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean); position = cluster; } else { secondClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean); if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(dataPoint); } return(newClusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double [,] data) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double [] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); } cluster.Add(dataPoint); clusters.Add(cluster); } } while (stableClustersCount != clusters.Count) { stableClustersCount = 0; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data); for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { if ((KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)) == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusters">A collection of data clusters</param> /// <param name="data">An array containing data to b eclustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(ClusterCollection clusters, double[,] data, string type) { double[] dataPoint; double[] clusterMean; double firstClusterDistance = 0.0; double secondClusterDistance = 0.0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int position = 0; // create a new collection of clusters ClusterCollection newClusters = new ClusterCollection(); for (int count = 0; count < clusters.Count; count++) { Cluster newCluster = new Cluster(); newClusters.Add(newCluster); } if (clusters.Count <= 0) { throw new SystemException("Cluster Count Cannot Be Zero!"); } //((20+30)/2), ((170+160)/2), ((80+120)/2) for (int row = 0; row < rowCount; row++) { dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[row, field]), field); } for (int cluster = 0; cluster < clusters.Count; cluster++) { clusterMean = clusters[cluster].ClusterMean; if (cluster == 0) { switch (type) { case "DTW": firstClusterDistance = KMeans.DtwDistance(dataPoint, clusterMean); break; case "Manhattan": firstClusterDistance = KMeans.ManhattanDistance(dataPoint, clusterMean); break; default: firstClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean); break; } position = cluster; } else { switch (type) { case "DTW": secondClusterDistance = KMeans.DtwDistance(dataPoint, clusterMean); break; case "Manhattan": secondClusterDistance = KMeans.ManhattanDistance(dataPoint, clusterMean); break; default: secondClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean); break; } if (firstClusterDistance > secondClusterDistance) { firstClusterDistance = secondClusterDistance; position = cluster; } } } newClusters[position].Add(dataPoint); newClusters[position].AddValue(row); } return newClusters; }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double[,] data, string type) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double[] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); } cluster.Add(dataPoint); clusters.Add(cluster); } } int compteur = 0; while (stableClustersCount != clusters.Count) { stableClustersCount = 0; bool isNotGood = false; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data, type); for (int tempo = 0; tempo < newClusters.Count; tempo++) { if (newClusters[tempo].Count < 1) { isNotGood = true; } } if (compteur == 10) { return(clusters); } if (isNotGood) { compteur++; continue; } for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double distance = 0; switch (type) { case "DTW": distance = (KMeans.DtwDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; case "Manhattan": distance = (KMeans.ManhattanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; default: distance = (KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; } if (distance == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return(clusters); }
/// <summary> /// Seperates a dataset into clusters or groups with similar characteristics /// </summary> /// <param name="clusterCount">The number of clusters or groups to form</param> /// <param name="data">An array containing data that will be clustered</param> /// <returns>A collection of clusters of data</returns> public static ClusterCollection ClusterDataSet(int clusterCount, double[,] data, string type) { //bool stableClusterFormation = false; int clusterNumber = 0; int rowCount = data.GetUpperBound(0) + 1; int fieldCount = data.GetUpperBound(1) + 1; int stableClustersCount = 0; int iterationCount = 0; double[] dataPoint; Random random = new Random(); Cluster cluster = null; ClusterCollection clusters = new ClusterCollection(); System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount); while (clusterNumbers.Count < clusterCount) { clusterNumber = random.Next(0, rowCount - 1); if (!clusterNumbers.Contains(clusterNumber)) { cluster = new Cluster(); clusterNumbers.Add(clusterNumber); dataPoint = new double[fieldCount]; for (int field = 0; field < fieldCount; field++) { dataPoint.SetValue((data[clusterNumber, field]), field); } cluster.Add(dataPoint); clusters.Add(cluster); } } int compteur = 0; while (stableClustersCount != clusters.Count) { stableClustersCount = 0; bool isNotGood = false; ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data, type); for (int tempo = 0; tempo < newClusters.Count; tempo++) if (newClusters[tempo].Count < 1) isNotGood = true; if (compteur == 10) return clusters; if (isNotGood) { compteur++; continue; } for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++) { double distance = 0; switch (type) { case "DTW": distance = (KMeans.DtwDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; case "Manhattan": distance = (KMeans.ManhattanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; default: distance = (KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean)); break; } if (distance == 0) { stableClustersCount++; } } iterationCount++; clusters = newClusters; } return clusters; }