public void PredictClusters()
        {
            int n        = 1000;
            int k        = 4;
            var rand     = new Random(1);
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / 10;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(Legacy.Data.CollectionDataSource.Create(data));
            pipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }

            var evaluator = new Legacy.Models.ClusterEvaluator();
            var testData  = Legacy.Data.CollectionDataSource.Create(clusters);
            var metrics   = evaluator.Evaluate(model, testData);

            //Label is not specified, so NMI would be equal to NaN
            Assert.Equal(metrics.Nmi, double.NaN);
            //Calculate dbi is false by default so Dbi would be 0
            Assert.Equal(metrics.Dbi, (double)0.0);
            Assert.Equal(metrics.AvgMinScore, (double)0.0, 5);
        }
示例#2
0
 public Task <ClusteringPrediction> Predict(ClusteringData clusteringData)
 {
     return(Task.Run(() =>
     {
         var predictionFunc = Model.MakePredictionFunction <ClusteringData, ClusteringPrediction>(_mlContext);
         return predictionFunc.Predict(clusteringData);
     }));
 }
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 5;
            var rand     = new Random();
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / k;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }
        }
        public ClusteringMetrics Evaluate(ITransformer model)
        {
            var predictions = model.Transform(_testingDataView);

            var metrics = _mlContext.Clustering.Evaluate(predictions, score: "Score", features: "Features");

            TrainModel = model;

            // Run test cases to identify clusters
            var predictionFunction = TrainModel.CreatePredictionEngine <ClusteringData, ClusteringPrediction>(_mlContext);
            var tests = new List <TestCase>();

            for (var r = 1; r <= RfmMaxForTests; r++)
            {
                for (var f = 1; f <= RfmMaxForTests; f++)
                {
                    for (var m = 1; m <= RfmMaxForTests; m++)
                    {
                        var data = new ClusteringData
                        {
                            R = r,
                            M = f,
                            F = m
                        };
                        var prediction = predictionFunction.Predict(data);
                        tests.Add(new TestCase
                        {
                            Data    = data,
                            Cluster = prediction.SelectedClusterId
                        });
                    }
                }
            }

            // save RFM cluster matching in csv
            var fileService = new FileService();

            fileService.ExportToCsv(tests);


            return(metrics);
        }
示例#5
0
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 4;
            var rand     = new Random(1);
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / 10;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }

            var mlContext = new MLContext(seed: 1, conc: 1);

            // Turn the data into the ML.NET data view.
            // We can use CreateDataView or CreateStreamingDataView, depending on whether 'churnData' is an IList,
            // or merely an IEnumerable.
            var trainData = mlContext.CreateStreamingDataView(data);
            var testData  = mlContext.CreateStreamingDataView(clusters);

            // Create Estimator
            var pipe = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: k);

            // Train the pipeline
            var trainedModel = pipe.Fit(trainData);

            // Validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels          = new HashSet <uint>();
            var predictFunction = trainedModel.CreatePredictionEngine <ClusteringData, ClusteringPrediction>(mlContext);

            for (int i = 0; i < k; i++)
            {
                var scores = predictFunction.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }

            // Evaluate the trained pipeline
            var predicted = trainedModel.Transform(testData);
            var metrics   = mlContext.Clustering.Evaluate(predicted);

            //Label is not specified, so NMI would be equal to NaN
            Assert.Equal(metrics.Nmi, double.NaN);
            //Calculate dbi is false by default so Dbi would be 0
            Assert.Equal(metrics.Dbi, (double)0.0);
            Assert.Equal(metrics.AvgMinScore, (double)0.0, 5);
        }