Example #1
0
        /// <summary>
        /// Seperates a dataset into clusters or groups with similar characteristics
        /// </summary>
        /// <param name="clusters">A collection of data clusters</param>
        /// <param name="data">An array containing data to b eclustered</param>
        /// <returns>A collection of clusters of data</returns>
        public static ClusterCollection ClusterDataSet(ClusterCollection clusters, double[,] data, string type)
        {
            double[] dataPoint;

            double[] clusterMean;

            double firstClusterDistance = 0.0;

            double secondClusterDistance = 0.0;

            int rowCount = data.GetUpperBound(0) + 1;

            int fieldCount = data.GetUpperBound(1) + 1;

            int position = 0;


            // create a new collection of clusters
            ClusterCollection newClusters = new ClusterCollection();

            for (int count = 0; count < clusters.Count; count++)
            {
                Cluster newCluster = new Cluster();

                newClusters.Add(newCluster);
            }


            if (clusters.Count <= 0)
            {
                throw new SystemException("Cluster Count Cannot Be Zero!");
            }


            //((20+30)/2), ((170+160)/2), ((80+120)/2)
            for (int row = 0; row < rowCount; row++)
            {
                dataPoint = new double[fieldCount];

                for (int field = 0; field < fieldCount; field++)
                {
                    dataPoint.SetValue((data[row, field]), field);
                }

                for (int cluster = 0; cluster < clusters.Count; cluster++)
                {
                    clusterMean = clusters[cluster].ClusterMean;

                    if (cluster == 0)
                    {
                        switch (type)
                        {
                        case "DTW":
                            firstClusterDistance = KMeans.DtwDistance(dataPoint, clusterMean);
                            break;

                        case "Manhattan":
                            firstClusterDistance = KMeans.ManhattanDistance(dataPoint, clusterMean);
                            break;

                        default:
                            firstClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean);
                            break;
                        }

                        position = cluster;
                    }
                    else
                    {
                        switch (type)
                        {
                        case "DTW":
                            secondClusterDistance = KMeans.DtwDistance(dataPoint, clusterMean);
                            break;

                        case "Manhattan":
                            secondClusterDistance = KMeans.ManhattanDistance(dataPoint, clusterMean);
                            break;

                        default:
                            secondClusterDistance = KMeans.EuclideanDistance(dataPoint, clusterMean);
                            break;
                        }
                        if (firstClusterDistance > secondClusterDistance)
                        {
                            firstClusterDistance = secondClusterDistance;

                            position = cluster;
                        }
                    }
                }

                newClusters[position].Add(dataPoint);
                newClusters[position].AddValue(row);
            }

            return(newClusters);
        }
Example #2
0
        /// <summary>
        /// Seperates a dataset into clusters or groups with similar characteristics
        /// </summary>
        /// <param name="clusterCount">The number of clusters or groups to form</param>
        /// <param name="data">An array containing data that will be clustered</param>
        /// <returns>A collection of clusters of data</returns>
        public static ClusterCollection ClusterDataSet(int clusterCount, double[,] data, string type)
        {
            //bool stableClusterFormation = false;

            int clusterNumber = 0;

            int rowCount = data.GetUpperBound(0) + 1;

            int fieldCount = data.GetUpperBound(1) + 1;

            int stableClustersCount = 0;

            int iterationCount = 0;

            double[] dataPoint;

            Random random = new Random();

            Cluster cluster = null;

            ClusterCollection clusters = new ClusterCollection();

            System.Collections.ArrayList clusterNumbers = new System.Collections.ArrayList(clusterCount);


            while (clusterNumbers.Count < clusterCount)
            {
                clusterNumber = random.Next(0, rowCount - 1);

                if (!clusterNumbers.Contains(clusterNumber))
                {
                    cluster = new Cluster();

                    clusterNumbers.Add(clusterNumber);

                    dataPoint = new double[fieldCount];


                    for (int field = 0; field < fieldCount; field++)
                    {
                        dataPoint.SetValue((data[clusterNumber, field]), field);
                    }

                    cluster.Add(dataPoint);

                    clusters.Add(cluster);
                }
            }

            int compteur = 0;

            while (stableClustersCount != clusters.Count)
            {
                stableClustersCount = 0;

                bool isNotGood = false;

                ClusterCollection newClusters = KMeans.ClusterDataSet(clusters, data, type);
                for (int tempo = 0; tempo < newClusters.Count; tempo++)
                {
                    if (newClusters[tempo].Count < 1)
                    {
                        isNotGood = true;
                    }
                }

                if (compteur == 10)
                {
                    return(clusters);
                }

                if (isNotGood)
                {
                    compteur++;
                    continue;
                }

                for (int clusterIndex = 0; clusterIndex < clusters.Count; clusterIndex++)
                {
                    double distance = 0;

                    switch (type)
                    {
                    case "DTW":
                        distance = (KMeans.DtwDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean));
                        break;

                    case "Manhattan":
                        distance = (KMeans.ManhattanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean));
                        break;

                    default:
                        distance = (KMeans.EuclideanDistance(newClusters[clusterIndex].ClusterMean, clusters[clusterIndex].ClusterMean));
                        break;
                    }
                    if (distance == 0)
                    {
                        stableClustersCount++;
                    }
                }

                iterationCount++;

                clusters = newClusters;
            }

            return(clusters);
        }