public static void TrainOption2() { var mlContext = new MLContext(seed: 0); IDataView trainingData = mlContext.Data.LoadFromTextFile <UserData>(_dataPath1, hasHeader: true, separatorChar: ','); var options = new KMeansTrainer.Options { NumberOfClusters = 4, NumberOfThreads = 1, FeatureColumnName = "Utilities" }; var pipeline = mlContext.Clustering.Trainers.KMeans(options); var model = pipeline.Fit(trainingData); VBuffer <float>[] centroids = default; var modelParams = model.Model; modelParams.GetClusterCentroids(ref centroids, out int k); Console.WriteLine( $"The first 3 coordinates of the first centroid are: " + string.Join(", ", centroids[0].GetValues().ToArray().Take(3))); }
/// <summary> /// KMeans <see cref="ClusteringCatalog"/> extension method. /// </summary> /// <param name="catalog">The regression catalog trainer object.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="options">Algorithm advanced settings.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> public static (Vector <float> score, Key <uint> predictedLabel) KMeans(this ClusteringCatalog.ClusteringTrainers catalog, Vector <float> features, Scalar <float> weights, KMeansTrainer.Options options, Action <KMeansModelParameters> onFit = null) { Contracts.CheckValueOrNull(onFit); Contracts.CheckValue(options, nameof(options)); var rec = new TrainerEstimatorReconciler.Clustering( (env, featuresName, weightsName) => { options.FeatureColumnName = featuresName; options.ExampleWeightColumnName = weightsName; var trainer = new KMeansTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } else { return(trainer); } }, features, weights); return(rec.Output); }
/// <summary> /// KMeans <see cref="ClusteringCatalog"/> extension method. /// </summary> /// <param name="catalog">The clustering catalog trainer object.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="clustersCount">The number of clusters to use for KMeans.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> public static (Vector <float> score, Key <uint> predictedLabel) KMeans(this ClusteringCatalog.ClusteringTrainers catalog, Vector <float> features, Scalar <float> weights = null, int clustersCount = KMeansTrainer.Defaults.NumberOfClusters, Action <KMeansModelParameters> onFit = null) { Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckParam(clustersCount > 1, nameof(clustersCount), "If provided, must be greater than 1."); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.Clustering( (env, featuresName, weightsName) => { var options = new KMeansTrainer.Options { FeatureColumnName = featuresName, NumberOfClusters = clustersCount, ExampleWeightColumnName = weightsName }; var trainer = new KMeansTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } else { return(trainer); } }, features, weights); return(rec.Output); }
public void TrainModelIfNotExists() { try { GenerateDataset(); string modelPath = Path.Combine(Environment.CurrentDirectory, "Data", "trainedModel.zip"); if (File.Exists(modelPath)) { Logger.LogInformation($"Trained model found at {InputPath}. Skipping training."); return; } var result = GenerateNames(); var textLoader = MLContext.Data.CreateTextLoader(result.Item1, hasHeader: true, separatorChar: ','); var data = textLoader.Load(InputPath); DataOperationsCatalog.TrainTestData trainTestData = MLContext.Data.TrainTestSplit(data); var trainingDataView = trainTestData.TrainSet; var testingDataView = trainTestData.TestSet; var options = new KMeansTrainer.Options { NumberOfClusters = NumberOfClusters, OptimizationTolerance = 1e-6f, NumberOfThreads = 1, MaximumNumberOfIterations = 10, FeatureColumnName = "Features" }; var dataProcessPipeline = MLContext .Transforms .Concatenate("Features", result.Item2) .Append(MLContext.Clustering.Trainers.KMeans(options)); var trainedModel = dataProcessPipeline.Fit(data); IDataView predictions = trainedModel.Transform(testingDataView); var metrics = MLContext.Clustering.Evaluate(predictions, scoreColumnName: "Score", featureColumnName: "Features"); // Save/persist the trained model to a .ZIP file MLContext.Model.Save(trainedModel, data.Schema, modelPath); Logger.LogInformation($"The model was saved to {modelPath}"); } catch (Exception ex) { Logger.LogError(ex, "Model training operation failed."); throw; } }
/// <summary> /// Train a KMeans++ clustering algorithm using <see cref="KMeansTrainer"/>. /// </summary> /// <param name="catalog">The clustering catalog trainer object.</param> /// <param name="featureColumnName">The name of the feature column.</param> /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param> /// <param name="numberOfClusters">The number of clusters to use for KMeans.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[KMeans](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs)] /// ]]></format> /// </example> public static KMeansTrainer KMeans(this ClusteringCatalog.ClusteringTrainers catalog, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, int numberOfClusters = KMeansTrainer.Defaults.NumberOfClusters) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); var options = new KMeansTrainer.Options { FeatureColumnName = featureColumnName, ExampleWeightColumnName = exampleWeightColumnName, NumberOfClusters = numberOfClusters }; return(new KMeansTrainer(env, options)); }
public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var dataPoints = GenerateRandomDataPoints(1000, 0); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new KMeansTrainer.Options { NumberOfClusters = 2, OptimizationTolerance = 1e-6f, NumberOfThreads = 1 }; // Define the trainer. var pipeline = mlContext.Clustering.Trainers.KMeans(options); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different from training data. var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label. // It is not used during training. foreach (var p in predictions.Take(2)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } foreach (var p in predictions.TakeLast(3)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } // Expected output: // Label: 1, Prediction: 1 // Label: 1, Prediction: 1 // Label: 2, Prediction: 2 // Label: 2, Prediction: 2 // Label: 2, Prediction: 2 // Evaluate the overall metrics var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features"); PrintMetrics(metrics); // Expected output: // Normalized Mutual Information: 0.92 // Average Distance: 4.18 // Davies Bouldin Index: 2.87 // Get cluster centroids and the number of clusters k from KMeansModelParameters. VBuffer <float>[] centroids = default; var modelParams = model.Model; modelParams.GetClusterCentroids(ref centroids, out int k); Console.WriteLine($"The first 3 coordinates of the first centroid are: ({string.Join(", ", centroids[0].GetValues().ToArray().Take(3))})"); Console.WriteLine($"The first 3 coordinates of the second centroid are: ({string.Join(", ", centroids[1].GetValues().ToArray().Take(3))})"); // Expected output: // The first 3 coordinates of the first centroid are: (0.5840713, 0.5678288, 0.6221277) // The first 3 coordinates of the second centroid are: (0.3705794, 0.4289133, 0.4001645) }
/// <summary> /// Train a KMeans++ clustering algorithm using <see cref="KMeansTrainer"/>. /// </summary> /// <param name="catalog">The clustering catalog trainer object.</param> /// <param name="options">Algorithm advanced options.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[KMeans](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs)] /// ]]></format> /// </example> public static KMeansTrainer KMeans(this ClusteringCatalog.ClusteringTrainers catalog, KMeansTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); Contracts.CheckValue(options, nameof(options)); var env = CatalogUtils.GetEnvironment(catalog); return(new KMeansTrainer(env, options)); }
static void Main(string[] args) { //############################################################### //INICIALIZACIÓN DEL PROCESO //############################################################### //Inicialización de mlContext; utilización del seed para replicidad MLContext mlContext = new MLContext(seed: 1); //Definición de las clases de los datos de entrada: // -Clase Observaciones: CountryObservation //Carga de datos IDataView originalFullData = mlContext.Data .LoadFromTextFile <CountryObservation>( _DataPath, separatorChar: ',', hasHeader: true); //############################################################### //CONSTRUYE EL CONJUNTO DE DATOS (DATASET) //############################################################### IDataView trainingDataView = originalFullData; //Guardamos dataset trainingDataView using (var fileStream = File.Create(_salida_trainDataPath)) { mlContext.Data.SaveAsText(trainingDataView, fileStream, separatorChar: ';', headerRow: true, schema: true); } //############################################################### //SELECCIÓN DE VARIABLES //############################################################### //Suprimimos del esquema IDataView lo que no seleccionemos como features string[] featureColumnNames = trainingDataView.Schema.AsQueryable() .Select(column => column.Name) .Where(name => name != "country")//no aporta información .ToArray(); //############################################################### //TRANFORMACIÓN DE LOS DATOS DEL MODELO --> pipeline //############################################################### //Concatena IEstimator <ITransformer> pipeline = mlContext.Transforms.Concatenate("Features", featureColumnNames) //Normalizado de las Features .Append(mlContext.Transforms.NormalizeMinMax(inputColumnName: "Features", outputColumnName: "FeaturesNormalized")); //Guardamos dataset transformedData IDataView transformedData = pipeline.Fit(trainingDataView).Transform(trainingDataView); using (var fileStream = File.Create(_salida_transformationData)) { mlContext.Data.SaveAsText(transformedData, fileStream, separatorChar: ';', headerRow: true, schema: true); } //############################################################### //SELECCIÓN DEL ALGORITMO DE ENTRENAMIENTO --> trainingPipeline //############################################################### //*************************************************************** //1. K-Means //*************************************************************** //Selección del Número de Clusters int k = 4; //Opciones K-Means var options = new KMeansTrainer.Options { FeatureColumnName = "FeaturesNormalized", NumberOfClusters = k, MaximumNumberOfIterations = 5800, OptimizationTolerance = 1e-6f }; //K-Means var trainer_km = mlContext.Clustering.Trainers.KMeans(options); //Se añade el Algoritmo al pipeline de transformación de datos IEstimator <ITransformer> trainingPipeline_km = pipeline.Append(trainer_km); //############################################################### //ENTRENAMIENTO DEL MODELO //############################################################### Console.WriteLine($"\n**************************************************************"); Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo K-Means "); Console.WriteLine($"*-------------------------------------------------------------"); var watch_km = System.Diagnostics.Stopwatch.StartNew(); var model_km = trainingPipeline_km.Fit(trainingDataView); watch_km.Stop(); var elapseds_km = watch_km.ElapsedMilliseconds * 0.001; Console.WriteLine($"El entrenamiento K-Means ha tardado: {elapseds_km:#.##} s\n"); //############################################################### //EVALUACIÓN DEL MODELO //############################################################### //Transformación del IDataView trainingDataView var predictions_km = model_km.Transform(trainingDataView); //Calculo de las métricas de cada Modelo var metrics_km = mlContext.Clustering.Evaluate(predictions_km, scoreColumnName: "Score", featureColumnName: "FeaturesNormalized"); //Mostramos las métricas K-Means Console.WriteLine($"\n**************************************************************"); Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo K-Means "); Console.WriteLine($"*-------------------------------------------------------------"); Console.WriteLine($"* K-Means Average Distance: {metrics_km.AverageDistance:#.##}"); Console.WriteLine($"* K-Means Davies Bouldin Index: {metrics_km.DaviesBouldinIndex:#.##}"); Console.WriteLine($"* K-Means Normalized Mutual Information: {metrics_km.NormalizedMutualInformation:#.##}"); //############################################################### //SELECCIÓN MODELO //############################################################### //Guardamos el Modelo para su posterior consumo mlContext.Model.Save(model_km, trainingDataView.Schema, _salida_modelPath); //############################################################### //VISUALIZACIÓN DEL MODELO //############################################################### //Definición de las clases de las predicciones: // -Clase Predicciones: CountryPrediction //Inicialización de PlotModel var plot = new PlotModel { Title = "Clúster Paises", IsLegendVisible = true }; //Transformamos el dataset con el Modelo var predictionsData = model_km.Transform(trainingDataView); //Creamos Array a partir del IDataView y la clase de predicción var predictions = mlContext.Data .CreateEnumerable <CountryPrediction>(predictionsData, false) .ToArray(); //Extraemos la lista de los nombres clusteres creados var clusters = predictions .Select(p => p.PredictedLabel).Distinct().OrderBy(x => x); //Construimos el conjunto de puntos para su visualización foreach (var cluster in clusters) { var scatter = new ScatterSeries { MarkerType = MarkerType.Circle, MarkerStrokeThickness = 2, Title = $"Cluster: {cluster}", RenderInLegend = true }; //Array ScatterPoint (2 dimensiones) var series = predictions .Where(p => p.PredictedLabel == cluster) //Seleccionamos 2 de las 5 coordenadas de nuestras Features .Select(p => new ScatterPoint(p.Location[2], p.Location[0])).ToArray(); scatter.Points.AddRange(series); plot.Series.Add(scatter); } //Le damos un color a cada cluster plot.DefaultColors = OxyPalettes.HueDistinct(plot.Series.Count).Colors; //Guardamos la gráfica en un archivo .svg var exporter = new SvgExporter { Width = 1000, Height = 800 }; using (var fs = new System.IO.FileStream(_salida_plotDataPath, System.IO.FileMode.Create)) { exporter.Export(plot, fs); } //Guardamos un archivo .csv con el cluster resultante para cada pais using (var w = new System.IO.StreamWriter(_salida_ResultDataPath)) { w.WriteLine($"Country;Cluster"); w.Flush(); predictions.ToList().ForEach(prediction => { w.WriteLine($"{prediction.country};{prediction.PredictedLabel}"); w.Flush(); }); } }