Exemplo n.º 1
0
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 4;
            var rand     = new Random(1);
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / 10;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }

            var            evaluator = new ClusterEvaluator();
            var            testData  = CollectionDataSource.Create(clusters);
            ClusterMetrics metrics   = evaluator.Evaluate(model, testData);

            //Label is not specified, so NMI would be equal to NaN
            Assert.Equal(metrics.Nmi, double.NaN);
            //Calculate dbi is false by default so Dbi would be 0
            Assert.Equal(metrics.Dbi, (double)0.0);
            Assert.Equal(metrics.AvgMinScore, (double)0.0, 5);
        }
        public void EvaluateModel(IEnumerable <PivotData> testData, PredictionModel <PivotData, ClusteringPrediction> model)
        {
            ConsoleWriteHeader("Metrics for Customer Segmentation");
            var            testDataSource = CollectionDataSource.Create(testData);
            var            evaluator      = new ClusterEvaluator();
            ClusterMetrics metrics        = evaluator.Evaluate(model, testDataSource);

            Console.WriteLine($"Average mean score: {metrics.AvgMinScore:0.##}");
            //Console.WriteLine($"*       Davies-Bouldin Index: {metrics.Dbi:#.##}");
            //Console.WriteLine($"*       Normalized mutual information: {metrics.Nmi:#.##}");
        }