public void PredictClusters() { int n = 1000; int k = 4; var rand = new Random(1); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / 10; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var pipeline = new Legacy.LearningPipeline(seed: 1, conc: 1); pipeline.Add(Legacy.Data.CollectionDataSource.Create(data)); pipeline.Add(new Legacy.Trainers.KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train <ClusteringData, ClusteringPrediction>(); //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); for (int i = 0; i < k; i++) { var scores = model.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } var evaluator = new Legacy.Models.ClusterEvaluator(); var testData = Legacy.Data.CollectionDataSource.Create(clusters); var metrics = evaluator.Evaluate(model, testData); //Label is not specified, so NMI would be equal to NaN Assert.Equal(metrics.Nmi, double.NaN); //Calculate dbi is false by default so Dbi would be 0 Assert.Equal(metrics.Dbi, (double)0.0); Assert.Equal(metrics.AvgMinScore, (double)0.0, 5); }
public Task <ClusteringPrediction> Predict(ClusteringData clusteringData) { return(Task.Run(() => { var predictionFunc = Model.MakePredictionFunction <ClusteringData, ClusteringPrediction>(_mlContext); return predictionFunc.Predict(clusteringData); })); }
public void PredictClusters() { int n = 1000; int k = 5; var rand = new Random(); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / k; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train <ClusteringData, ClusteringPrediction>(); //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); for (int i = 0; i < k; i++) { var scores = model.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } }
public ClusteringMetrics Evaluate(ITransformer model) { var predictions = model.Transform(_testingDataView); var metrics = _mlContext.Clustering.Evaluate(predictions, score: "Score", features: "Features"); TrainModel = model; // Run test cases to identify clusters var predictionFunction = TrainModel.CreatePredictionEngine <ClusteringData, ClusteringPrediction>(_mlContext); var tests = new List <TestCase>(); for (var r = 1; r <= RfmMaxForTests; r++) { for (var f = 1; f <= RfmMaxForTests; f++) { for (var m = 1; m <= RfmMaxForTests; m++) { var data = new ClusteringData { R = r, M = f, F = m }; var prediction = predictionFunction.Predict(data); tests.Add(new TestCase { Data = data, Cluster = prediction.SelectedClusterId }); } } } // save RFM cluster matching in csv var fileService = new FileService(); fileService.ExportToCsv(tests); return(metrics); }
public void PredictClusters() { int n = 1000; int k = 4; var rand = new Random(1); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / 10; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var mlContext = new MLContext(seed: 1, conc: 1); // Turn the data into the ML.NET data view. // We can use CreateDataView or CreateStreamingDataView, depending on whether 'churnData' is an IList, // or merely an IEnumerable. var trainData = mlContext.CreateStreamingDataView(data); var testData = mlContext.CreateStreamingDataView(clusters); // Create Estimator var pipe = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: k); // Train the pipeline var trainedModel = pipe.Fit(trainData); // Validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); var predictFunction = trainedModel.CreatePredictionEngine <ClusteringData, ClusteringPrediction>(mlContext); for (int i = 0; i < k; i++) { var scores = predictFunction.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } // Evaluate the trained pipeline var predicted = trainedModel.Transform(testData); var metrics = mlContext.Clustering.Evaluate(predicted); //Label is not specified, so NMI would be equal to NaN Assert.Equal(metrics.Nmi, double.NaN); //Calculate dbi is false by default so Dbi would be 0 Assert.Equal(metrics.Dbi, (double)0.0); Assert.Equal(metrics.AvgMinScore, (double)0.0, 5); }