Example #1
0
        public static void TrainOption2()
        {
            var       mlContext    = new MLContext(seed: 0);
            IDataView trainingData = mlContext.Data.LoadFromTextFile <UserData>(_dataPath1, hasHeader: true, separatorChar: ',');

            var options = new KMeansTrainer.Options
            {
                NumberOfClusters  = 4,
                NumberOfThreads   = 1,
                FeatureColumnName = "Utilities"
            };

            var pipeline = mlContext.Clustering.Trainers.KMeans(options);


            var model = pipeline.Fit(trainingData);

            VBuffer <float>[] centroids = default;

            var modelParams = model.Model;

            modelParams.GetClusterCentroids(ref centroids, out int k);
            Console.WriteLine(
                $"The first 3 coordinates of the first centroid are: " +
                string.Join(", ", centroids[0].GetValues().ToArray().Take(3)));
        }
Example #2
0
        /// <summary>
        /// KMeans <see cref="ClusteringCatalog"/> extension method.
        /// </summary>
        /// <param name="catalog">The regression catalog trainer object.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Algorithm advanced settings.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        public static (Vector <float> score, Key <uint> predictedLabel) KMeans(this ClusteringCatalog.ClusteringTrainers catalog,
                                                                               Vector <float> features, Scalar <float> weights,
                                                                               KMeansTrainer.Options options,
                                                                               Action <KMeansModelParameters> onFit = null)
        {
            Contracts.CheckValueOrNull(onFit);
            Contracts.CheckValue(options, nameof(options));

            var rec = new TrainerEstimatorReconciler.Clustering(
                (env, featuresName, weightsName) =>
            {
                options.FeatureColumnName       = featuresName;
                options.ExampleWeightColumnName = weightsName;

                var trainer = new KMeansTrainer(env, options);

                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                else
                {
                    return(trainer);
                }
            }, features, weights);

            return(rec.Output);
        }
Example #3
0
        /// <summary>
        /// KMeans <see cref="ClusteringCatalog"/> extension method.
        /// </summary>
        /// <param name="catalog">The clustering catalog trainer object.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="clustersCount">The number of clusters to use for KMeans.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        public static (Vector <float> score, Key <uint> predictedLabel) KMeans(this ClusteringCatalog.ClusteringTrainers catalog,
                                                                               Vector <float> features, Scalar <float> weights = null,
                                                                               int clustersCount = KMeansTrainer.Defaults.NumberOfClusters,
                                                                               Action <KMeansModelParameters> onFit = null)
        {
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(clustersCount > 1, nameof(clustersCount), "If provided, must be greater than 1.");
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.Clustering(
                (env, featuresName, weightsName) =>
            {
                var options = new KMeansTrainer.Options
                {
                    FeatureColumnName       = featuresName,
                    NumberOfClusters        = clustersCount,
                    ExampleWeightColumnName = weightsName
                };

                var trainer = new KMeansTrainer(env, options);

                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                else
                {
                    return(trainer);
                }
            }, features, weights);

            return(rec.Output);
        }
        public void TrainModelIfNotExists()
        {
            try
            {
                GenerateDataset();
                string modelPath = Path.Combine(Environment.CurrentDirectory, "Data", "trainedModel.zip");
                if (File.Exists(modelPath))
                {
                    Logger.LogInformation($"Trained model found at {InputPath}. Skipping training.");
                    return;
                }

                var result     = GenerateNames();
                var textLoader = MLContext.Data.CreateTextLoader(result.Item1, hasHeader: true, separatorChar: ',');
                var data       = textLoader.Load(InputPath);
                DataOperationsCatalog.TrainTestData trainTestData = MLContext.Data.TrainTestSplit(data);
                var trainingDataView = trainTestData.TrainSet;
                var testingDataView  = trainTestData.TestSet;

                var options = new KMeansTrainer.Options
                {
                    NumberOfClusters          = NumberOfClusters,
                    OptimizationTolerance     = 1e-6f,
                    NumberOfThreads           = 1,
                    MaximumNumberOfIterations = 10,
                    FeatureColumnName         = "Features"
                };

                var dataProcessPipeline = MLContext
                                          .Transforms
                                          .Concatenate("Features", result.Item2)
                                          .Append(MLContext.Clustering.Trainers.KMeans(options));
                var trainedModel = dataProcessPipeline.Fit(data);

                IDataView predictions = trainedModel.Transform(testingDataView);
                var       metrics     = MLContext.Clustering.Evaluate(predictions, scoreColumnName: "Score", featureColumnName: "Features");

                // Save/persist the trained model to a .ZIP file
                MLContext.Model.Save(trainedModel, data.Schema, modelPath);

                Logger.LogInformation($"The model was saved to {modelPath}");
            }
            catch (Exception ex)
            {
                Logger.LogError(ex, "Model training operation failed.");
                throw;
            }
        }
Example #5
0
        /// <summary>
        /// Train a KMeans++ clustering algorithm using <see cref="KMeansTrainer"/>.
        /// </summary>
        /// <param name="catalog">The clustering catalog trainer object.</param>
        /// <param name="featureColumnName">The name of the feature column.</param>
        /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
        /// <param name="numberOfClusters">The number of clusters to use for KMeans.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[KMeans](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs)]
        /// ]]></format>
        /// </example>
        public static KMeansTrainer KMeans(this ClusteringCatalog.ClusteringTrainers catalog,
                                           string featureColumnName       = DefaultColumnNames.Features,
                                           string exampleWeightColumnName = null,
                                           int numberOfClusters           = KMeansTrainer.Defaults.NumberOfClusters)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            var env = CatalogUtils.GetEnvironment(catalog);

            var options = new KMeansTrainer.Options
            {
                FeatureColumnName       = featureColumnName,
                ExampleWeightColumnName = exampleWeightColumnName,
                NumberOfClusters        = numberOfClusters
            };

            return(new KMeansTrainer(env, options));
        }
Example #6
0
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var dataPoints = GenerateRandomDataPoints(1000, 0);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // Define trainer options.
            var options = new KMeansTrainer.Options
            {
                NumberOfClusters      = 2,
                OptimizationTolerance = 1e-6f,
                NumberOfThreads       = 1
            };

            // Define the trainer.
            var pipeline = mlContext.Clustering.Trainers.KMeans(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
            // It is not used during training.
            foreach (var p in predictions.Take(2))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }
            foreach (var p in predictions.TakeLast(3))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: 1, Prediction: 1
            //   Label: 1, Prediction: 1
            //   Label: 2, Prediction: 2
            //   Label: 2, Prediction: 2
            //   Label: 2, Prediction: 2

            // Evaluate the overall metrics
            var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");

            PrintMetrics(metrics);

            // Expected output:
            //   Normalized Mutual Information: 0.92
            //   Average Distance: 4.18
            //   Davies Bouldin Index: 2.87

            // Get cluster centroids and the number of clusters k from KMeansModelParameters.
            VBuffer <float>[] centroids = default;

            var modelParams = model.Model;

            modelParams.GetClusterCentroids(ref centroids, out int k);
            Console.WriteLine($"The first 3 coordinates of the first centroid are: ({string.Join(", ", centroids[0].GetValues().ToArray().Take(3))})");
            Console.WriteLine($"The first 3 coordinates of the second centroid are: ({string.Join(", ", centroids[1].GetValues().ToArray().Take(3))})");

            // Expected output:
            //   The first 3 coordinates of the first centroid are: (0.5840713, 0.5678288, 0.6221277)
            //   The first 3 coordinates of the second centroid are: (0.3705794, 0.4289133, 0.4001645)
        }
Example #7
0
        /// <summary>
        /// Train a KMeans++ clustering algorithm using <see cref="KMeansTrainer"/>.
        /// </summary>
        /// <param name="catalog">The clustering catalog trainer object.</param>
        /// <param name="options">Algorithm advanced options.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[KMeans](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs)]
        /// ]]></format>
        /// </example>
        public static KMeansTrainer KMeans(this ClusteringCatalog.ClusteringTrainers catalog, KMeansTrainer.Options options)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            Contracts.CheckValue(options, nameof(options));

            var env = CatalogUtils.GetEnvironment(catalog);

            return(new KMeansTrainer(env, options));
        }
Example #8
0
        static void Main(string[] args)
        {
            //###############################################################
            //INICIALIZACIÓN DEL PROCESO
            //###############################################################

            //Inicialización de mlContext; utilización del seed para replicidad
            MLContext mlContext = new MLContext(seed: 1);

            //Definición de las clases de los datos de entrada:
            //  -Clase Observaciones: CountryObservation

            //Carga de datos
            IDataView originalFullData = mlContext.Data
                                         .LoadFromTextFile <CountryObservation>(
                _DataPath,
                separatorChar: ',',
                hasHeader: true);


            //###############################################################
            //CONSTRUYE EL CONJUNTO DE DATOS (DATASET)
            //###############################################################

            IDataView trainingDataView = originalFullData;

            //Guardamos dataset trainingDataView
            using (var fileStream = File.Create(_salida_trainDataPath))
            {
                mlContext.Data.SaveAsText(trainingDataView, fileStream, separatorChar: ';', headerRow: true,
                                          schema: true);
            }


            //###############################################################
            //SELECCIÓN DE VARIABLES
            //###############################################################

            //Suprimimos del esquema IDataView lo que no seleccionemos como features
            string[] featureColumnNames = trainingDataView.Schema.AsQueryable()
                                          .Select(column => column.Name)
                                          .Where(name => name != "country")//no aporta información
                                          .ToArray();

            //###############################################################
            //TRANFORMACIÓN DE LOS DATOS DEL MODELO --> pipeline
            //###############################################################

            //Concatena
            IEstimator <ITransformer> pipeline = mlContext.Transforms.Concatenate("Features",
                                                                                  featureColumnNames)
                                                 //Normalizado de las Features
                                                 .Append(mlContext.Transforms.NormalizeMinMax(inputColumnName: "Features",
                                                                                              outputColumnName: "FeaturesNormalized"));

            //Guardamos dataset transformedData
            IDataView transformedData =
                pipeline.Fit(trainingDataView).Transform(trainingDataView);

            using (var fileStream = File.Create(_salida_transformationData))
            {
                mlContext.Data.SaveAsText(transformedData, fileStream, separatorChar: ';', headerRow: true,
                                          schema: true);
            }


            //###############################################################
            //SELECCIÓN DEL ALGORITMO DE ENTRENAMIENTO --> trainingPipeline
            //###############################################################

            //***************************************************************
            //1. K-Means
            //***************************************************************

            //Selección del Número de Clusters
            int k = 4;

            //Opciones K-Means
            var options = new KMeansTrainer.Options
            {
                FeatureColumnName         = "FeaturesNormalized",
                NumberOfClusters          = k,
                MaximumNumberOfIterations = 5800,
                OptimizationTolerance     = 1e-6f
            };

            //K-Means
            var trainer_km = mlContext.Clustering.Trainers.KMeans(options);

            //Se añade el Algoritmo al pipeline de transformación de datos
            IEstimator <ITransformer> trainingPipeline_km = pipeline.Append(trainer_km);


            //###############################################################
            //ENTRENAMIENTO DEL MODELO
            //###############################################################

            Console.WriteLine($"\n**************************************************************");
            Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo K-Means   ");
            Console.WriteLine($"*-------------------------------------------------------------");
            var watch_km = System.Diagnostics.Stopwatch.StartNew();
            var model_km = trainingPipeline_km.Fit(trainingDataView);

            watch_km.Stop();
            var elapseds_km = watch_km.ElapsedMilliseconds * 0.001;

            Console.WriteLine($"El entrenamiento K-Means ha tardado: {elapseds_km:#.##} s\n");


            //###############################################################
            //EVALUACIÓN DEL MODELO
            //###############################################################

            //Transformación del IDataView trainingDataView
            var predictions_km = model_km.Transform(trainingDataView);

            //Calculo de las métricas de cada Modelo
            var metrics_km = mlContext.Clustering.Evaluate(predictions_km,
                                                           scoreColumnName: "Score",
                                                           featureColumnName: "FeaturesNormalized");

            //Mostramos las métricas K-Means
            Console.WriteLine($"\n**************************************************************");
            Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo K-Means      ");
            Console.WriteLine($"*-------------------------------------------------------------");
            Console.WriteLine($"*       K-Means Average Distance:  {metrics_km.AverageDistance:#.##}");
            Console.WriteLine($"*       K-Means Davies Bouldin Index:  {metrics_km.DaviesBouldinIndex:#.##}");
            Console.WriteLine($"*       K-Means Normalized Mutual Information:  {metrics_km.NormalizedMutualInformation:#.##}");


            //###############################################################
            //SELECCIÓN MODELO
            //###############################################################

            //Guardamos el Modelo para su posterior consumo
            mlContext.Model.Save(model_km, trainingDataView.Schema, _salida_modelPath);


            //###############################################################
            //VISUALIZACIÓN DEL MODELO
            //###############################################################

            //Definición de las clases de las predicciones:
            //  -Clase Predicciones: CountryPrediction

            //Inicialización de PlotModel
            var plot = new PlotModel {
                Title = "Clúster Paises", IsLegendVisible = true
            };

            //Transformamos el dataset con el Modelo
            var predictionsData = model_km.Transform(trainingDataView);

            //Creamos Array a partir del IDataView y la clase de predicción
            var predictions = mlContext.Data
                              .CreateEnumerable <CountryPrediction>(predictionsData, false)
                              .ToArray();

            //Extraemos la lista de los nombres clusteres creados
            var clusters = predictions
                           .Select(p => p.PredictedLabel).Distinct().OrderBy(x => x);

            //Construimos el conjunto de puntos para su visualización
            foreach (var cluster in clusters)
            {
                var scatter = new ScatterSeries {
                    MarkerType            = MarkerType.Circle,
                    MarkerStrokeThickness = 2,
                    Title          = $"Cluster: {cluster}",
                    RenderInLegend = true
                };
                //Array ScatterPoint (2 dimensiones)
                var series = predictions
                             .Where(p => p.PredictedLabel == cluster)
                             //Seleccionamos 2 de las 5 coordenadas de nuestras Features
                             .Select(p => new ScatterPoint(p.Location[2], p.Location[0])).ToArray();
                scatter.Points.AddRange(series);

                plot.Series.Add(scatter);
            }

            //Le damos un color a cada cluster
            plot.DefaultColors = OxyPalettes.HueDistinct(plot.Series.Count).Colors;

            //Guardamos la gráfica en un archivo .svg
            var exporter = new SvgExporter {
                Width = 1000, Height = 800
            };

            using (var fs = new System.IO.FileStream(_salida_plotDataPath, System.IO.FileMode.Create))
            {
                exporter.Export(plot, fs);
            }

            //Guardamos un archivo .csv con el cluster resultante para cada pais
            using (var w = new System.IO.StreamWriter(_salida_ResultDataPath))
            {
                w.WriteLine($"Country;Cluster");
                w.Flush();
                predictions.ToList().ForEach(prediction =>
                {
                    w.WriteLine($"{prediction.country};{prediction.PredictedLabel}");
                    w.Flush();
                });
            }
        }