예제 #1
0
        /// <summary>
        /// Performs KMeans partitioning (clustering) on the data. This is done on all
        /// parameters within the DataLines except for the dates, since they are not
        /// easily normalized (without loosing the meaning).
        /// </summary>
        /// <param name="k">The number of partitions to use.</param>
        /// <param name="data">The dataset that needs partitioning.</param>
        /// <returns>A list of KMeanCluster together containing all the entries from
        /// the original dataset.</returns>
        public static List <KMeanCluster> KMeansPartition(int k, List <DataLine> data)
        {
            // select k random
            KMeanCluster[] clusters = new KMeanCluster[k];

            data.Shuffle(1337);
            for (int i = 0; i < k; i++)
            {
                clusters[i] = new KMeanCluster(data[i]);
            }


            int iteration = 0;

            while (iteration < 10000)
            { // upper cutoff
                // beregn alle iris' til de k means
                data.ForEach(a => clusters.OrderBy(c => Dissimilarity(a, c)).First().AddMember(a));

                // beregn nye centroids
                KMeanCluster[] newClusters = new KMeanCluster[k];
                for (int c = 0; c < k; c++)
                {
                    newClusters[c] = clusters[c].CalcCentroid();
                }

                // stop hvis ingen ændringer
                if (Changed(clusters, newClusters))
                {
                    clusters = newClusters;
                }
                else
                {
                    break;
                }

                iteration++;
                Console.WriteLine("Iteration " + iteration + " done.");
            }

            return(clusters.ToList());
        }
예제 #2
0
        private static double Dissimilarity(DataLine a, KMeanCluster c)
        {
            Dictionary <string, double> centroid = c.Centroid;

            return(centroid.Sum(kv => Math.Abs(a.hashDoubles[kv.Key] ?? 0.0 - kv.Value))); // TODO: null == 0.0 might not be good
        }