public ITransformer Train(IDataView trainingData)
 {
     TrainedModel       = _trainingPipeline.Fit(trainingData);
     PredictionFunction = TrainedModel.MakePredictionFunction <DemandObservation, DemandPrediction>(_mlcontext);
     return(TrainedModel);
 }
        public static void Example()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var mlContext = new MLContext();

            var samples = new List <DataPoint>()
            {
                new DataPoint()
                {
                    Label = 3, Features = new float[3] {
                        1, 1, 0
                    }
                },
                new DataPoint()
                {
                    Label = 32, Features = new float[3] {
                        0, float.NaN, 1
                    }
                },
                new DataPoint()
                {
                    Label = float.NaN, Features = new float[3] {
                        -1, float.NaN, -3
                    }
                },
            };
            // Convert training data to IDataView, the general data type used in ML.NET.
            var data = mlContext.Data.LoadFromEnumerable(samples);

            // IndicateMissingValues is used to create a boolean containing
            // 'true' where the value in the input column is NaN. This value can be used
            // to replace missing values with other values.
            IEstimator <ITransformer> pipeline = mlContext.Transforms.IndicateMissingValues("MissingIndicator", "Features");

            // Now we can transform the data and look at the output to confirm the behavior of the estimator.
            // This operation doesn't actually evaluate data until we read the data below.
            var tansformer      = pipeline.Fit(data);
            var transformedData = tansformer.Transform(data);

            // We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below.
            var rowEnumerable = mlContext.Data.CreateEnumerable <SampleDataTransformed>(transformedData, reuseRowObject: false);

            // a small printing utility
            Func <object[], string> vectorPrinter = (object[] vector) =>
            {
                string preview = "[";
                foreach (var slot in vector)
                {
                    preview += $"{slot} ";
                }
                return(preview += "]");
            };

            // And finally, we can write out the rows of the dataset, looking at the columns of interest.
            foreach (var row in rowEnumerable)
            {
                Console.WriteLine($"Label: {row.Label} Features: {vectorPrinter(row.Features.Cast<object>().ToArray())} MissingIndicator: {vectorPrinter(row.MissingIndicator.Cast<object>().ToArray())}");
            }

            // Expected output:
            //
            // Label: 3 Features: [1 1 0] MissingIndicator: [False False False]
            // Label: 32 Features: [0 NaN 1] MissingIndicator: [False True False]
            // Label: NaN Features: [-1 NaN -3 ] MissingIndicator: [False True False]
        }
Example #3
0
        public void IrisLightGbm()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                // https://github.com/dotnet/machinelearning/issues/4156
                return;
            }

            var mlContext = new MLContext(seed: 1);

            var connectionString = GetConnectionString(TestDatasets.irisDb.name);
            var commandText      = $@"SELECT * FROM ""{TestDatasets.irisDb.trainFilename}""";

            var loaderColumns = new DatabaseLoader.Column[]
            {
                new DatabaseLoader.Column()
                {
                    Name = "Label", Type = DbType.Int32
                },
                new DatabaseLoader.Column()
                {
                    Name = "SepalLength", Type = DbType.Single
                },
                new DatabaseLoader.Column()
                {
                    Name = "SepalWidth", Type = DbType.Single
                },
                new DatabaseLoader.Column()
                {
                    Name = "PetalLength", Type = DbType.Single
                },
                new DatabaseLoader.Column()
                {
                    Name = "PetalWidth", Type = DbType.Single
                }
            };

            var loader = mlContext.Data.CreateDatabaseLoader(loaderColumns);

            var databaseSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, commandText);

            var trainingData = loader.Load(databaseSource);

            IEstimator <ITransformer> pipeline = mlContext.Transforms.Conversion.MapValueToKey("Label")
                                                 .Append(mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"))
                                                 .Append(mlContext.MulticlassClassification.Trainers.LightGbm())
                                                 .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            var model = pipeline.Fit(trainingData);

            var engine = mlContext.Model.CreatePredictionEngine <IrisData, IrisPrediction>(model);

            Assert.Equal(0, engine.Predict(new IrisData()
            {
                SepalLength = 4.5f,
                SepalWidth  = 5.6f,
                PetalLength = 0.5f,
                PetalWidth  = 0.5f,
            }).PredictedLabel);

            Assert.Equal(1, engine.Predict(new IrisData()
            {
                SepalLength = 4.9f,
                SepalWidth  = 2.4f,
                PetalLength = 3.3f,
                PetalWidth  = 1.0f,
            }).PredictedLabel);
        }
Example #4
0
        public void TrainModel(string testImagePath = null)
        {
            #region Notes: Fundamental components

            /*  Main components:
             *      IDataView,
             *      ITransformer,
             *      IEstimator
             */

            //IDataView demoDataView;
            //ITransformer demoITransformer;
            //IEstimator<ITransformer> demoIEstimator;
            #endregion Notes: Fundamental components
            #region Notes: Conventional column names

            /*  Conventional column names:
             *      Input:
             *          Label
             *          Features
             *      Output:
             *          PredictedLabel
             *          Score
             */
            #endregion Notes: Conventional column names
            #region Notes: Usual training process

            /*  Usual training process:
             *      1. Load training/test datasets (IDataView)
             *      2. Build training pipeline (IEstimator)
             *          2.1   Construct preProcessing pipeline (IEstimator) (optional)
             *          2.2   Configure trainer (IEstimator)
             *          2.3   Construct postProcessing pipeline (optional)
             *          2.4   Construct training pipeline (preProcessing pipelin + trainer + postProcessing pipline
             *      3. Train model using training dataset (ITransformer)
             *      4. Evaluate model perfomance
             *          4.1 Make predictions on test data using trained model (IDataView)
             *          4.2 Compute evaluation metrics (Metrics staticsitcs)
             *      (optional) Retrain on full dataset (Itransformer)
             *      5. Save model to filesystem
             *      6. Make single prediction
             */
            #endregion Notes: Usual training process

            // Load data
            IDataView imagesInfo = LoadData(_dataFolder);
            imagesInfo = mlContext.Data.ShuffleRows(imagesInfo);
            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(imagesInfo, testFraction: 0.2);

            // Pre processing
            IEstimator <ITransformer> e_preProcessing_readImageBytes = mlContext.Transforms.LoadRawImageBytes(
                inputColumnName: nameof(ImageFileInputModel.ImagePath),
                outputColumnName: nameof(ImageInputModel.Image),
                imageFolder: _dataFolder);

            IEstimator <ITransformer> e_preProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapValueToKey(
                inputColumnName: nameof(BaseInputModel.Label),
                outputColumnName: "LabelAsKey",
                keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator.KeyOrdinality.ByValue);


            ITransformer t_preProcessing_labelKeyMapping = e_preProcessing_labelKeyMapping.Fit(imagesInfo);
            ITransformer t_preProcessing_readImageBytes  = e_preProcessing_readImageBytes.Fit(imagesInfo);
            ITransformer t_preProcessingPipeline         = t_preProcessing_labelKeyMapping.Append(t_preProcessing_readImageBytes);


            // Core Model training pipeline
            IDataView testSetTransformed = t_preProcessingPipeline.Transform(dataSplit.TestSet);
            ImageClassificationTrainer.Options trainerSettings = new ImageClassificationTrainer.Options
            {
                FeatureColumnName = nameof(ImageInputModel.Image),
                LabelColumnName   = "LabelAsKey",
                Arch            = ImageClassificationTrainer.Architecture.ResnetV2101,
                Epoch           = 100,
                BatchSize       = 200,
                LearningRate    = 0.05f,
                MetricsCallback = (m) => Console.WriteLine(m),
                ValidationSet   = testSetTransformed,
                WorkspacePath   = _workspaceFolder
            };

            IEstimator <ITransformer> e_trainer = mlContext.MulticlassClassification.Trainers.ImageClassification(trainerSettings);
            IEstimator <ITransformer> e_postProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapKeyToValue(
                inputColumnName: "PredictedLabel",
                outputColumnName: nameof(PredictionModel.PredictedLabel));

            IEstimator <ITransformer> trainingPipeline = e_trainer.Append(e_postProcessing_labelKeyMapping);

            // Train
            #region Notes: On metadata

            /*
             * Metadata source: https://aka.ms/mlnet-resources/resnet_v2_101_299.meta
             * System.IO.Path.GetTempPath() -  C:\Users\User\AppData\Local\Temp\
             */
            #endregion
            ITransformer trainedModel = Train(trainingPipeline, t_preProcessingPipeline.Transform(dataSplit.TrainSet));

            #region Notes: Model composition
            //var extractPixelsEst = mlContext.Transforms.ExtractPixels();
            //var resizeEst = mlContext.Transforms.ResizeImages();
            //IEstimator<ITransformer> est = mlContext.Model.LoadTensorFlowModel("MODEL_PATH")
            //.ScoreTensorFlowModel(
            //outputColumnNames: new[] { "some-name" },
            //inputColumnNames: new[] { "Features" }, addBatchDimensionInput: true);
            #endregion Model composition

            // Evaluate/Save FileSystemModel
            ITransformer fileSystemModel = t_preProcessingPipeline.Append(trainedModel);
            Evaluate(fileSystemModel, dataSplit.TestSet);
            SaveModel(fileSystemModel,
                      new DataViewSchema.Column[] {
                imagesInfo.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)),
                imagesInfo.Schema.First(x => x.Name == nameof(BaseInputModel.Label))
            },
                      ResolveModelFileName("fromFile"));

            // Evaluate/Save InMemoryModel
            IDataView testSetImageExtracted = t_preProcessing_readImageBytes.Transform(dataSplit.TrainSet);

            ITransformer inMemoryModel = t_preProcessing_labelKeyMapping.Append(trainedModel);
            Evaluate(inMemoryModel, testSetImageExtracted);
            SaveModel(inMemoryModel,
                      new DataViewSchema.Column[] {
                testSetImageExtracted.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)),
                testSetImageExtracted.Schema.First(x => x.Name == nameof(BaseInputModel.Label))
            },
                      ResolveModelFileName("inMemory"));

            //Try a single prediction
            if (!string.IsNullOrWhiteSpace(testImagePath))
            {
                MakeSinglePrediction(testImagePath);
            }
        }
Example #5
0
        private ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView, IEstimator <ITransformer> trainingPipeline)
        {
            ITransformer model = trainingPipeline.Fit(trainingDataView);

            return(model);
        }
 protected virtual ITransformer BuildAndTrainModel(IDataView trainingData, IEstimator <ITransformer> pipeline)
 {
     return(pipeline.Fit(trainingData));
 }
Example #7
0
        static void Main(string[] args)
        {
            Helper.PrintLine("创建 MLContext...");
            MLContext    mlContext = new MLContext(seed: 0);
            ITransformer model;
            IDataView    testDataView = mlContext.Data.LoadFromTextFile <MovieRating>(TestingDataPath, hasHeader: true, separatorChar: ',');

            if (File.Exists(ModelPath))
            {
                Helper.PrintLine("加载神经网络模型...");
                model = mlContext.Model.Load(ModelPath, out DataViewSchema inputScema);
            }
            else
            {
                // 数据集合
                IDataView trainingDataView = mlContext.Data.LoadFromTextFile <MovieRating>(TrainingDataPath, hasHeader: true, separatorChar: ',');

                // 创建神经网络管道
                Helper.PrintLine("创建神经网络管道...");
                IEstimator <ITransformer> estimator = mlContext.Transforms.Conversion
                                                      .MapValueToKey(outputColumnName: "userIdEncoded", inputColumnName: "userId")
                                                      .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "movieIdEncoded", inputColumnName: "movieId"))
                                                      .Append(mlContext.Recommendation().Trainers.MatrixFactorization(
                                                                  new MatrixFactorizationTrainer.Options
                {
                    MatrixColumnIndexColumnName = "userIdEncoded",
                    MatrixRowIndexColumnName    = "movieIdEncoded",
                    LabelColumnName             = "Label",
                    NumberOfIterations          = 20,
                    ApproximationRank           = 100
                }));

                // 开始训练神经网络
                Helper.PrintSplit();
                Helper.PrintLine("开始训练神经网络...");
                model = estimator.Fit(trainingDataView);
                Helper.PrintLine("训练神经网络完成");
                Helper.PrintSplit();

                Helper.PrintLine($"导出神经网络模型...");
                mlContext.Model.Save(model, trainingDataView.Schema, ModelPath);
            }

            // 预测
            Helper.PrintLine("预测:");
            var prediction = model.Transform(testDataView);
            var metrics    = mlContext.Regression.Evaluate(prediction, labelColumnName: "Label", scoreColumnName: "Score");

            Helper.PrintLine($"R^2: {metrics.RSquared:0.##}");
            Helper.PrintLine($"RMS error: {metrics.RootMeanSquaredError:0.##}");

            var predictionEngine = mlContext.Model.CreatePredictionEngine <MovieRating, MovieRatingPrediction>(model);
            var testInput        = new MovieRating {
                userId = 6, movieId = 10
            };
            var movieRatingPrediction = predictionEngine.Predict(testInput);

            if (Math.Round(movieRatingPrediction.Score, 1) > 3.5)
            {
                Helper.PrintLine($"Movie {testInput.movieId} is recommended for user {testInput.userId}");
            }
            else
            {
                Helper.PrintLine($"Movie {testInput.movieId} is not recommended for user {testInput.userId}");
            }

            Helper.Exit(0);
        }
Example #8
0
        public void Train(string trainingFileName, string testFileName)
        {
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            if (!File.Exists(testFileName))
            {
                Console.WriteLine($"Failed to find test data file ({testFileName}");

                return;
            }

            var trainingDataView = MlContext.Data.LoadFromTextFile <CarInventory>(trainingFileName, ',', hasHeader: false);

            IEstimator <ITransformer> dataProcessPipeline = MlContext.Transforms.Concatenate("Features",
                                                                                             typeof(CarInventory).ToPropertyList <CarInventory>(nameof(CarInventory.Label)))
                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features",
                                                                                                               outputColumnName: "FeaturesNormalizedByMeanVar"));

            var dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2);

            var trainer = MlContext.BinaryClassification.Trainers.FastTree(labelColumnName: nameof(CarInventory.Label),
                                                                           featureColumnName: "FeaturesNormalizedByMeanVar",
                                                                           numberOfLeaves: 2,
                                                                           numberOfTrees: 1000,
                                                                           minimumExampleCountPerLeaf: 1,
                                                                           learningRate: 0.2);

            var trainingPipeline = dataProcessPipeline.Append(trainer);

            var trainedModel = trainingPipeline.Fit(trainingDataView);

            MlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath);

            var evaluationPipeline = trainedModel.Append(MlContext.Transforms
                                                         .CalculateFeatureContribution(trainedModel.LastTransformer)
                                                         .Fit(dataProcessPipeline.Fit(trainingDataView).Transform(trainingDataView)));

            var testDataView = MlContext.Data.LoadFromTextFile <CarInventory>(testFileName, ',', hasHeader: false);

            var testSetTransform = evaluationPipeline.Transform(testDataView);

            var modelMetrics = MlContext.BinaryClassification.Evaluate(data: testSetTransform,
                                                                       labelColumnName: nameof(CarInventory.Label),
                                                                       scoreColumnName: "Score");

            Console.WriteLine($"Accuracy: {modelMetrics.Accuracy:P2}");
            Console.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}");
            Console.WriteLine($"Area under Precision recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}");
            Console.WriteLine($"F1Score: {modelMetrics.F1Score:P2}");
            Console.WriteLine($"LogLoss: {modelMetrics.LogLoss:#.##}");
            Console.WriteLine($"LogLossReduction: {modelMetrics.LogLossReduction:#.##}");
            Console.WriteLine($"PositivePrecision: {modelMetrics.PositivePrecision:#.##}");
            Console.WriteLine($"PositiveRecall: {modelMetrics.PositiveRecall:#.##}");
            Console.WriteLine($"NegativePrecision: {modelMetrics.NegativePrecision:#.##}");
            Console.WriteLine($"NegativeRecall: {modelMetrics.NegativeRecall:P2}");
        }
        static void Main(string[] args)
        {
            //###############################################################
            //INICIALIZACIÓN DEL PROCESO
            //###############################################################

            //Inicialización de mlContext; utilización del seed para replicidad
            MLContext mlContext = new MLContext(seed: 1);

            //Definición de las clases de los datos de entrada:
            //  -Clase Observaciones: TransactionObservation

            //Carga de datos
            IDataView originalFullData = mlContext.Data.LoadFromTextFile <TransactionObservation>(
                _DataPath,
                separatorChar: ';',
                hasHeader: true);


            //###############################################################
            //CONSTRUYE EL CONJUNTO DE DATOS (DATASET)
            //###############################################################

            //División del IDataView originalFullData:
            //  -entrenamiento (trainingDataView): 70%
            //  -testeo (testDataView): 20%
            //  -Consumo (ConsumoDataView): 10%

            //Split dataset: train = 0.7 + test_Consumo = 0.3
            double        testFraction = 0.3;
            TrainTestData Split_TrainTestConsumoData = mlContext.Data.TrainTestSplit(originalFullData,
                                                                                     testFraction: testFraction, seed: 1);
            IDataView trainingDataView = Split_TrainTestConsumoData.TrainSet;
            IDataView testConsumoData  = Split_TrainTestConsumoData.TestSet;

            //Split dataset tes_val: test = 0.7 (0.7*0.3 = 0.21) + val = 0.3 (0.3*0.3 = 0.09)
            testFraction = 0.3;
            TrainTestData Split_TestConsumoData = mlContext.Data.TrainTestSplit(testConsumoData,
                                                                                testFraction: testFraction, seed: 1);
            IDataView testDataView    = Split_TestConsumoData.TrainSet;
            IDataView ConsumoDataView = Split_TestConsumoData.TestSet;

            //save train split
            using (var fileStream = File.Create(_salida_trainDataPath))
            {
                mlContext.Data.SaveAsText(trainingDataView, fileStream, separatorChar: ';', headerRow: true,
                                          schema: true);
            }

            //save test split
            using (var fileStream = File.Create(_salida_testDataPath))
            {
                mlContext.Data.SaveAsText(testDataView, fileStream, separatorChar: ';', headerRow: true,
                                          schema: true);
            }

            //save Consumo split
            using (var fileStream = File.Create(_salida_ConsumoDataPath))
            {
                mlContext.Data.SaveAsText(ConsumoDataView, fileStream, separatorChar: ';', headerRow: true,
                                          schema: true);
            }


            //###############################################################
            //SELECCIÓN DE VARIABLES
            //###############################################################

            //Suprimimos del esquema IDataView lo que no seleccionemos como features
            string[] featureColumnNames = trainingDataView.Schema.AsQueryable()
                                          .Select(column => column.Name)
                                          .Where(name => name != "Label" && //atributo de salida
                                                 name != "Time")            //no aporta información
                                          .ToArray();


            //###############################################################
            //TRANFORMACIÓN DE LOS DATOS DEL MODELO --> pipeline
            //###############################################################

            //Concatena
            IEstimator <ITransformer> pipeline = mlContext.Transforms.Concatenate("Features",
                                                                                  featureColumnNames)
                                                 //Surpime del IDataView
                                                 .Append(mlContext.Transforms.DropColumns(new string[] { "Time" }))
                                                 //Normalizado de las Features
                                                 .Append(mlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features",
                                                                                                    outputColumnName: "FeaturesNormalized"));


            //Guardar dataset transformedData --> Validación Cruzada
            IDataView transformedData =
                pipeline.Fit(trainingDataView).Transform(trainingDataView);

            using (var fileStream = File.Create(_salida_transformationData))
            {
                mlContext.Data.SaveAsText(transformedData, fileStream, separatorChar: ';', headerRow: true,
                                          schema: true);
            }


            //###############################################################
            //SELECCIÓN DE ALGORITMOS DE ENTRENAMIENTO --> trainingPipeline
            //###############################################################

            //***************************************************************
            //1. SVM (Suport Vector Machine)
            //***************************************************************

            var trainer_svm = mlContext.BinaryClassification.Trainers
                              .LinearSvm(labelColumnName: "Label",
                                         featureColumnName: "FeaturesNormalized",
                                         numberOfIterations: 10);

            //Se añade el Algoritmo al pipeline de transformación de datos
            IEstimator <ITransformer> trainingPipeline_svm = pipeline.Append(trainer_svm);


            //***************************************************************
            //2. GBA (Gradient Boosting Algorithm)
            //***************************************************************

            var trainer_boost = mlContext.BinaryClassification.Trainers
                                .FastTree(labelColumnName: "Label",
                                          featureColumnName: "FeaturesNormalized",
                                          numberOfLeaves: 20,
                                          numberOfTrees: 100,
                                          minimumExampleCountPerLeaf: 10,
                                          learningRate: 0.2);

            //Se añade el Algoritmo al pipeline de transformación de datos
            IEstimator <ITransformer> trainingPipeline_boost = pipeline.Append(trainer_boost);


            //###############################################################
            //ENTRENAMIENTO DE LOS MODELOS
            //###############################################################

            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo SVM   ");
            Console.WriteLine($"*-----------------------------------------------------------");
            var watch_svm = System.Diagnostics.Stopwatch.StartNew();
            var model_svm = trainingPipeline_svm.Fit(trainingDataView);

            watch_svm.Stop();
            var elapseds_svm = watch_svm.ElapsedMilliseconds * 0.001;

            Console.WriteLine($"El entrenamiento SVM ha tardado: {elapseds_svm:#.##} s\n");

            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo GBA   ");
            Console.WriteLine($"*-----------------------------------------------------------");
            var watch_boost = System.Diagnostics.Stopwatch.StartNew();
            var model_boost = trainingPipeline_boost.Fit(trainingDataView);

            watch_boost.Stop();
            var elapseds_boost = watch_boost.ElapsedMilliseconds * 0.001;

            Console.WriteLine($"El entrenamiento GBA ha tardado: {elapseds_boost:#.##} s\n");


            //###############################################################
            //EVALUACIÓN DE LOS MODELOS
            //###############################################################

            //Transformación del IDataView testDataView a paritr de ambos Modelos
            var predictions_svm   = model_svm.Transform(testDataView);
            var predictions_boost = model_boost.Transform(testDataView);

            //Calculo de las métricas de cada Modelo
            var metrics_svm = mlContext.BinaryClassification
                              //SVM es un Modelo no basado en PROBABILIDAD -> NonCalibrated
                              .EvaluateNonCalibrated(data: predictions_svm, labelColumnName: "Label", scoreColumnName: "Score");
            var metrics_boost = mlContext.BinaryClassification
                                .Evaluate(data: predictions_boost, labelColumnName: "Label", scoreColumnName: "Score");


            //Muestra las métricas SVM
            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo SVM      ");
            Console.WriteLine($"*-----------------------------------------------------------");
            Console.WriteLine($"*       SVM Positive Precision:  {metrics_svm.PositivePrecision:0.##}");
            Console.WriteLine($"*       SVM Positive Recall:  {metrics_svm.PositiveRecall:0.##}");
            Console.WriteLine($"*       SVM Negative Precision:  {metrics_svm.NegativePrecision:0.##}");
            Console.WriteLine($"*       SVM Negative Recall:  {metrics_svm.NegativeRecall:0.##}");
            Console.WriteLine($"*       SVM Accuracy: {metrics_svm.Accuracy:P2}");
            Console.WriteLine($"*       SVM F1Score:  {metrics_svm.F1Score:P2}\n");

            //Muestra las métricas GBA
            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo GBA      ");
            Console.WriteLine($"*-----------------------------------------------------------");
            Console.WriteLine($"*       GBA Positive Precision:  {metrics_boost.PositivePrecision:0.##}");
            Console.WriteLine($"*       GBA Positive Recall:  {metrics_boost.PositiveRecall:0.##}");
            Console.WriteLine($"*       GBA Negative Precision:  {metrics_boost.NegativePrecision:0.##}");
            Console.WriteLine($"*       GBA Negative Recall:  {metrics_boost.NegativeRecall:0.##}");
            Console.WriteLine($"*       GBA Accuracy: {metrics_boost.Accuracy:P2}");
            Console.WriteLine($"*       GBA F1Score:  {metrics_boost.F1Score:P2}\n");


            //###############################################################
            //VALIDACIÓN CRUZADA
            //###############################################################

            Console.WriteLine($"\n*****************************************");
            Console.WriteLine($"* Validación Cruzada del Algoritmo SVM   ");
            Console.WriteLine($"*----------------------------------------");
            var watch_CV_SVM  = System.Diagnostics.Stopwatch.StartNew();
            var cvResults_svm = mlContext.BinaryClassification
                                //SVM es un Modelo no basado en PROBABILIDAD -> NonCalibrated
                                .CrossValidateNonCalibrated(
                transformedData,
                trainer_svm,
                numberOfFolds: 10,
                labelColumnName: "Label");

            watch_CV_SVM.Stop();
            var elapseds_CV_SVM = watch_CV_SVM.ElapsedMilliseconds * 0.001;

            Console.WriteLine($"La Validación Cruzada del Algoritmo SVM ha tardado: {elapseds_CV_SVM:#.##} s\n");

            //Vamos a supervisar el resultado de la Validación Cruzada para la métrica: F1 Score
            Double[] F1_models =
                cvResults_svm
                .OrderByDescending(fold => fold.Metrics.F1Score)
                .Select(fold => fold.Metrics.F1Score)
                .ToArray();

            //Calculamos la media del F1 Score
            Double media_F1 = F1_models.Average();

            //Vamos a supervisar el resultado de la Validación Cruzada para la métrica: Accuracy
            Double[] Accu_models =
                cvResults_svm
                .OrderByDescending(fold => fold.Metrics.F1Score)
                .Select(fold => fold.Metrics.Accuracy)
                .ToArray();
            //Calculamos la media del Accuracy
            Double media_Accu = Accu_models.Average();

            //Mostramos métricas y media
            Console.WriteLine($"\n**********************************************************");
            Console.WriteLine($"* Resultado de la Validación Cruzada del Algoritmo SVM     ");
            Console.WriteLine($"*---------------------------------------------------------");
            Console.WriteLine($"|     MODEL_N     | MEDIDA F1 SCORE | MEDIDA ACCURACY |");
            Console.WriteLine($"|     Model_1     |     {F1_models[0]:P2}     |     {Accu_models[0]:P2}     |");
            Console.WriteLine($"|     Model_2     |     {F1_models[1]:P2}     |     {Accu_models[1]:P2}     |");
            Console.WriteLine($"|     Model_3     |     {F1_models[2]:P2}     |     {Accu_models[2]:P2}     |");
            Console.WriteLine($"|     Model_4     |     {F1_models[3]:P2}     |     {Accu_models[3]:P2}     |");
            Console.WriteLine($"|     Model_5     |     {F1_models[4]:P2}     |     {Accu_models[4]:P2}     |");
            Console.WriteLine($"|     Model_6     |     {F1_models[5]:P2}     |     {Accu_models[5]:P2}     |");
            Console.WriteLine($"|     Model_7     |     {F1_models[6]:P2}     |     {Accu_models[6]:P2}     |");
            Console.WriteLine($"|     Model_8     |     {F1_models[7]:P2}     |     {Accu_models[7]:P2}     |");
            Console.WriteLine($"|     Model_9     |     {F1_models[8]:P2}     |     {Accu_models[8]:P2}     |");
            Console.WriteLine($"|     Model_10    |     {F1_models[9]:P2}     |     {Accu_models[9]:P2}     |");
            Console.WriteLine($"La F1 Score media es igual a:  {media_F1:P2}");
            Console.WriteLine($"La Accuracy media es igual a:  {media_Accu:P2}\n");


            //###############################################################
            //SELECCIÓN MODELO
            //###############################################################

            //Tomamos todos los Modelos calculados con la Validación Cruzada
            ITransformer[] models =
                cvResults_svm
                .OrderByDescending(fold => fold.Metrics.F1Score)
                .Select(fold => fold.Model)
                .ToArray();

            //Tomamos el mejor Modelo
            ITransformer topModel = models[0];

            //Guardamos el Modelo para su posterior consumo
            mlContext.Model.Save(model_svm, trainingDataView.Schema, _salida_modelPath);


            //######################################
            //CONSUMO DEL MODELO
            //######################################

            //Definición de las clases de las predicciones:
            //  -Clase Predicciones: TransactionPrediction

            //Definimos CreatePredictionEngine de TransactionObservation --> TransactionPrediction a través de model_svm
            var predictionEngine = mlContext.Model
                                   .CreatePredictionEngine <TransactionObservation, TransactionPrediction>(
                model_svm);

            Console.WriteLine($"\n**********************************");
            Console.WriteLine($"--- Predicción ConsumoDataView ---");
            Console.WriteLine($"----------------------------------");
            mlContext.Data.CreateEnumerable <TransactionObservation>(ConsumoDataView, reuseRowObject: false)
            .Select(ConsumoData => ConsumoData)
            .ToList()
            .ForEach(ConsumoData =>
            {
                //Predict() predicción única instancia
                var prediction = predictionEngine.Predict(ConsumoData);
                Console.WriteLine($"Label: {prediction.Label:.##}");
                Console.WriteLine($"Predicted Label: {prediction.PredictedLabel:#.##}");
                //SVM no está basado en determinar Probabilidad
                //Console.WriteLine("Probability: {prediction.Probability:#.##}");
                Console.WriteLine($"Score: {prediction.Score:.##}");
                Console.WriteLine($"-------------------");
            });
        }
        //calibrated version below
        public static IReadOnlyList <TrainCatalogBase.CrossValidationResult <CalibratedBinaryClassificationMetrics> > BuildAndTrain(MLContext mlContext, IDataView splitTrainSet)

        //public static IReadOnlyList<TrainCatalogBase.CrossValidationResult<BinaryClassificationMetrics>> BuildAndTrain(MLContext mlContext, IDataView splitTrainSet)
        {
            //var estimator = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(QuestionData.QuestionText))


            //transformedData used to be pipeline when algorithm was in it also
            IEstimator <ITransformer> transformedDataStage1 = mlContext.Transforms.Text.FeaturizeText(inputColumnName: "UserAbility", outputColumnName: "UserAbilityFeaturized")

                                                              .Append(mlContext.Transforms.Text.FeaturizeText(inputColumnName: "QuestionDifficulty", outputColumnName: "QuestionDifficultyFeaturized")
                                                                      .Append(mlContext.Transforms.Concatenate("Features", "UserAbilityFeaturized", "QuestionDifficultyFeaturized")));


            //used below to help with stop watch
            //https://docs.microsoft.com/en-us/dotnet/api/system.diagnostics.stopwatch?redirectedfrom=MSDN&view=netframework-4.8

            Console.WriteLine("===================== Starting Stopwatch ====================");

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();


            //what does the fit method do? These two methods involve transforming the data for use in the algorithm, unspecific though
            var dataPrepTransformer = transformedDataStage1.Fit(splitTrainSet);

            transformedDataStage2 = dataPrepTransformer.Transform(splitTrainSet);
            Console.WriteLine(splitTrainSet.Schema.ToString());
            //LogReg Stopchastic used as this was given in the sample, which used calibrated model, will change to SVM

            IEstimator <ITransformer> svmLinAlg = mlContext.BinaryClassification.Trainers.Prior();
            var cvResults = mlContext.BinaryClassification.CrossValidate(transformedDataStage2, svmLinAlg, numberOfFolds: 10);



            //svm linear now used

            //IEstimator<ITransformer> svmLinAlg = mlContext.BinaryClassification.Trainers.FastForest();
            //var cvResults = mlContext.BinaryClassification.CrossValidateNonCalibrated(transformedDataStage2, svmLinAlg, numberOfFolds: 10);


            //Console.WriteLine("=============== Create and Train the Model ===============");

            //Console.WriteLine("=============== End of training ===============");
            //Console.WriteLine();

            //the cvResults object will contain a lot of things:

            // 1. TrainTestData object for each fold of data
            // 2. a model for each fold
            // 3. a metric for each fold

            Console.WriteLine("===================== Stopwatch Stopped=========================");
            stopWatch.Stop();
            TimeSpan ts          = stopWatch.Elapsed;
            string   elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
                                                 ts.Hours, ts.Minutes, ts.Seconds,
                                                 ts.Milliseconds / 10);

            Console.WriteLine($"Time taken (hh:mm:ss:msms):  {elapsedTime}");

            return(cvResults);
        }
Example #11
0
        static void Main()
        {
            MLContext mlContext = new MLContext();

            // 1. Uvezi ili stvori trening podatke
            HouseData[] houseData =
            {
                new HouseData {
                    Size = 1.1F, Price = 1.2F
                },
                new HouseData {
                    Size = 1.9F, Price = 2.3F
                },
                new HouseData {
                    Size = 2.8F, Price = 3.0F
                },
                new HouseData {
                    Size = 3.4F, Price = 3.7F
                }
            };

            IDataView trainingData = mlContext.Data.LoadFromEnumerable(houseData);

            // 2. Specificiraj pipeline za pripremu podataka i trening
            IEstimator <ITransformer> pipeline = mlContext
                                                 .Transforms
                                                 .Concatenate(
                outputColumnName: "Features",
                inputColumnNames: new[] { "Size" })
                                                 .Append(mlContext.Regression.Trainers.Sdca(
                                                             labelColumnName: "Price",
                                                             featureColumnName: "Features",
                                                             maximumNumberOfIterations: 100));

            // 3. Treniraj model
            ITransformer model = pipeline.Fit(trainingData);

            // 4. Testiraj model
            HouseData[] testHouseData =
            {
                new HouseData {
                    Size = 1.1F, Price = 0.98F
                },
                new HouseData {
                    Size = 1.9F, Price = 2.1F
                },
                new HouseData {
                    Size = 2.8F, Price = 2.9F
                },
                new HouseData {
                    Size = 3.4F, Price = 3.6F
                }
            };

            IDataView testHouseDataView = mlContext.Data.LoadFromEnumerable(testHouseData);
            IDataView testPriceDataView = model.Transform(testHouseDataView);
            var       debug             = testPriceDataView.Preview();

            var metrics = mlContext.Regression.Evaluate(
                testPriceDataView,
                labelColumnName: "Price");

            Console.WriteLine($"R^2: {metrics.RSquared:0.##}");
            Console.WriteLine($"RMS error: {metrics.RootMeanSquaredError:0.##}");

            // 5. Spremi model
            mlContext.Model.Save(model, trainingData.Schema, "model.zip");

            // 6. Radi predikciju
            MakePrediction();
        }
        public List <FeatureImportance> ComputePermutationMetrics(string trainingDataPath)
        {
            IEstimator <ITransformer> pipeline =
                MLContext.Transforms.ReplaceMissingValues(
                    outputColumnName: "FixedAcidity",
                    replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean)
                .Append(MLContext.Transforms.Concatenate("Features",
                                                         new[]
            {
                "FixedAcidity",
                "VolatileAcidity",
                "CitricAcid",
                "ResidualSugar",
                "Chlorides",
                "FreeSulfurDioxide",
                "TotalSulfurDioxide",
                "Density",
                "Ph",
                "Sulphates",
                "Alcohol"
            }))
                .Append(MLContext.Transforms.NormalizeMeanVariance("Features"));

            var trainData = MLContext.Data.LoadFromTextFile <FeatureImportanceData>(
                path: trainingDataPath,
                separatorChar: ';',
                hasHeader: true);

            // Cache the data view in memory. For an iterative algorithm such as SDCA this makes a huge difference.
            trainData = MLContext.Data.Cache(trainData);

            var transformationModel = pipeline.Fit(trainData);

            // Prepare the data for the algorithm.
            var transformedData = transformationModel.Transform(trainData);

            // Choose a regression algorithm.
            var algorithm = MLContext.Regression.Trainers.Sdca();

            // Train the model and score it on the transformed data.
            var regressionModel = algorithm.Fit(transformedData);

            // Calculate the PFI metrics.
            var permutationMetrics = MLContext.Regression.PermutationFeatureImportance(
                regressionModel,
                transformedData,
                permutationCount: 50);

            // List of evaluation metrics:
            // https://docs.microsoft.com/en-us/dotnet/machine-learning/resources/metrics

            var result = new List <FeatureImportance> {
                new FeatureImportance("FixedAcidity"),
                new FeatureImportance("VolatileAcidity"),
                new FeatureImportance("CitricAcid"),
                new FeatureImportance("ResidualSugar"),
                new FeatureImportance("Chlorides"),
                new FeatureImportance("FreeSulfurDioxide"),
                new FeatureImportance("TotalSulfurDioxide"),
                new FeatureImportance("Density"),
                new FeatureImportance("Ph"),
                new FeatureImportance("Sulphates"),
                new FeatureImportance("Alcohol")
            };

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                result[i].R2Decrease = permutationMetrics[i].RSquared.Mean;
            }

            return(result);
        }
        public IEnumerable <RatioAnalysis> Predict(IEnumerable <RatioAnalysis> data)
        {
            var mlContext = new MLContext(seed: 1);

            IEstimator <ITransformer> costofgoodsForcaster = mlContext.Forecasting.ForecastBySsa(
                outputColumnName: nameof(RatioAnalysisPrediction.Forecasted),
                inputColumnName: nameof(RatioAnalysis.CostOfGoods),                                // This is the column being forecasted.
                windowSize: 12,                                                                    // Window size is set to the time period represented in the product data cycle; our product cycle is based on 12 months, so this is set to a factor of 12, e.g. 3.
                seriesLength: data.Count(),                                                        // This parameter specifies the number of data points that are used when performing a forecast.
                trainSize: data.Count(),                                                           // This parameter specifies the total number of data points in the input time series, starting from the beginning.
                horizon: 3,                                                                        // Indicates the number of values to forecast; 3 indicates that the next 3 months of product units will be forecasted.
                confidenceLevel: 0.75f,                                                            // Indicates the likelihood the real observed value will fall within the specified interval bounds.
                confidenceLowerBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceLowerBound),  //This is the name of the column that will be used to store the lower interval bound for each forecasted value.
                confidenceUpperBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceUpperBound)); //This is the name of the column that will be used to store the upper interval bound for each forecasted value.

            IEstimator <ITransformer> inventoryForcaster = mlContext.Forecasting.ForecastBySsa(
                outputColumnName: nameof(RatioAnalysisPrediction.Forecasted),
                inputColumnName: nameof(RatioAnalysis.Inventory),                                  // This is the column being forecasted.
                windowSize: 12,                                                                    // Window size is set to the time period represented in the product data cycle; our product cycle is based on 12 months, so this is set to a factor of 12, e.g. 3.
                seriesLength: data.Count(),                                                        // This parameter specifies the number of data points that are used when performing a forecast.
                trainSize: data.Count(),                                                           // This parameter specifies the total number of data points in the input time series, starting from the beginning.
                horizon: 3,                                                                        // Indicates the number of values to forecast; 3 indicates that the next 3 months of product units will be forecasted.
                confidenceLevel: 0.75f,                                                            // Indicates the likelihood the real observed value will fall within the specified interval bounds.
                confidenceLowerBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceLowerBound),  //This is the name of the column that will be used to store the lower interval bound for each forecasted value.
                confidenceUpperBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceUpperBound)); //This is the name of the column that will be used to store the upper interval bound for each forecasted value.

            IEstimator <ITransformer> turnoverForcaster = mlContext.Forecasting.ForecastBySsa(
                outputColumnName: nameof(RatioAnalysisPrediction.Forecasted),
                inputColumnName: nameof(RatioAnalysis.Turnover),                                   // This is the column being forecasted.
                windowSize: 12,                                                                    // Window size is set to the time period represented in the product data cycle; our product cycle is based on 12 months, so this is set to a factor of 12, e.g. 3.
                seriesLength: data.Count(),                                                        // This parameter specifies the number of data points that are used when performing a forecast.
                trainSize: data.Count(),                                                           // This parameter specifies the total number of data points in the input time series, starting from the beginning.
                horizon: 3,                                                                        // Indicates the number of values to forecast; 3 indicates that the next 3 months of product units will be forecasted.
                confidenceLevel: 0.75f,                                                            // Indicates the likelihood the real observed value will fall within the specified interval bounds.
                confidenceLowerBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceLowerBound),  //This is the name of the column that will be used to store the lower interval bound for each forecasted value.
                confidenceUpperBoundColumn: nameof(RatioAnalysisPrediction.ConfidenceUpperBound)); //This is the name of the column that will be used to store the upper interval bound for each forecasted value.

            // Fit the forecasting model to the specified product's data series.
            ITransformer costofgoodsTransformer = costofgoodsForcaster.Fit(mlContext.Data.LoadFromEnumerable(data));
            ITransformer inventoryTransformer   = inventoryForcaster.Fit(mlContext.Data.LoadFromEnumerable(data));
            ITransformer turnoverTransformer    = turnoverForcaster.Fit(mlContext.Data.LoadFromEnumerable(data));

            // Create the forecast engine used for creating predictions.
            TimeSeriesPredictionEngine <RatioAnalysis, RatioAnalysisPrediction> inventoryEngine  = inventoryTransformer.CreateTimeSeriesEngine <RatioAnalysis, RatioAnalysisPrediction>(mlContext);
            TimeSeriesPredictionEngine <RatioAnalysis, RatioAnalysisPrediction> turneroverEngine = turnoverTransformer.CreateTimeSeriesEngine <RatioAnalysis, RatioAnalysisPrediction>(mlContext);
            TimeSeriesPredictionEngine <RatioAnalysis, RatioAnalysisPrediction> costofgoodEngine = costofgoodsTransformer.CreateTimeSeriesEngine <RatioAnalysis, RatioAnalysisPrediction>(mlContext);

            // Get the prediction; this will include the forecasted turnover for the next 3 months since this
            //the time period specified in the `horizon` parameter when the forecast estimator was originally created.
            var turnoverPrediction  = turneroverEngine.Predict();
            var costPrediction      = costofgoodEngine.Predict();
            var inventoryPrediction = inventoryEngine.Predict();

            var last   = data.Last();
            var retVal = data.ToList();

            for (int i = 0; i < turnoverPrediction.Forecasted.Count(); i++)
            {
                retVal.Add(new RatioAnalysis
                {
                    Date             = last.Date.AddMonths(i + 1),
                    CostOfGoods      = costPrediction.Forecasted[i],
                    CostOfGoodsDelta = costPrediction.ConfidenceUpperBound[i] - costPrediction.Forecasted[i],
                    Inventory        = inventoryPrediction.Forecasted[i],
                    InventoryDelta   = inventoryPrediction.ConfidenceUpperBound[i] - inventoryPrediction.Forecasted[i],
                    Turnover         = turnoverPrediction.Forecasted[i],
                    TurnoverDelta    = turnoverPrediction.ConfidenceUpperBound[i] - turnoverPrediction.Forecasted[i]
                });
            }

            return(retVal);
        }
Example #14
0
 /// <summary>
 /// Fits the training data to the training pipeline.
 /// </summary>
 /// <returns> The model </returns>
 private static ITransformer TrainModel()
 {
     return(TrainingPipeline.Fit(TrainingDataView));
 }
Example #15
0
        public ITransformer TrainFeaturizeText()
        {
            var textColumns = new List <string>();

            for (int i = 0; i < 20; i++) // Only load first 20 columns
            {
                textColumns.Add($"Column{i}");
            }

            var featurizers = new List <TextFeaturizingEstimator>();

            foreach (var textColumn in textColumns)
            {
                var featurizer = _mlContext.Transforms.Text.FeaturizeText(textColumn, new TextFeaturizingEstimator.Options()
                {
                    CharFeatureExtractor = null,
                    WordFeatureExtractor = new WordBagEstimator.Options()
                    {
                        NgramLength        = 2,
                        MaximumNgramsCount = new int[] { 200000 }
                    }
                });
                featurizers.Add(featurizer);
            }

            IEstimator <ITransformer> pipeline = featurizers.First();

            foreach (var featurizer in featurizers.Skip(1))
            {
                pipeline = pipeline.Append(featurizer);
            }

            var model = pipeline.Fit(_dataset);

            // BENCHMARK OUTPUT
            // * Summary *

            //BenchmarkDotNet = v0.11.3, OS = Windows 10.0.18363
            //Intel Xeon W - 2133 CPU 3.60GHz, 1 CPU, 12 logical and 6 physical cores
            //.NET Core SDK = 3.0.100
            //[Host]     : .NET Core 2.1.13(CoreCLR 4.6.28008.01, CoreFX 4.6.28008.01), 64bit RyuJIT
            //Job - KDKCUJ : .NET Core 2.1.13(CoreCLR 4.6.28008.01, CoreFX 4.6.28008.01), 64bit RyuJIT

            //Arguments =/ p:Configuration = Release  Toolchain = netcoreapp2.1  IterationCount = 1
            //LaunchCount = 3  MaxIterationCount = 20  RunStrategy = ColdStart
            //UnrollFactor = 1  WarmupCount = 1

            //             Method | Mean     | Error    | StdDev    | Extra Metric  | Gen 0 / 1k Op | Gen 1 / 1k Op | Gen 2 / 1k Op | Allocated Memory / Op |
            //------------------- | --------:| --------:| ---------:| -------------:| -------------:| ------------: | ------------: | --------------------: |
            // TrainFeaturizeText | 17.00 s  | 6.337 s  | 0.3474 s  | -             | 1949000.0000  | 721000.0000   | 36000.0000    | 315.48 MB             |

            //// * Legends *
            //  Mean                : Arithmetic mean of all measurements
            //  Error               : Half of 99.9 % confidence interval
            //  StdDev              : Standard deviation of all measurements
            //  Extra Metric: Value of the provided extra metric
            //  Gen 0 / 1k Op         : GC Generation 0 collects per 1k Operations
            //  Gen 1 / 1k Op         : GC Generation 1 collects per 1k Operations
            //  Gen 2 / 1k Op         : GC Generation 2 collects per 1k Operations
            //  Allocated Memory/ Op : Allocated memory per single operation(managed only, inclusive, 1KB = 1024B)
            //  1 s: 1 Second(1 sec)

            //// * Diagnostic Output - MemoryDiagnoser *
            //// ***** BenchmarkRunner: End *****
            //  Run time: 00:01:52(112.92 sec), executed benchmarks: 1

            //// * Artifacts cleanup *
            //  Global total time: 00:01:59(119.89 sec), executed benchmarks: 1

            return(model);
        }
Example #16
0
        /// <summary>
        /// Fits the training data to the training pipeline.
        /// </summary>
        /// <returns> model </returns>
        public static ITransformer TrainModel()
        {
            ITransformer model = TrainingPipeline.Fit(TrainingDataView);

            return(model);
        }
Example #17
0
        // Build and train model
        public static ITransformer GenerateModel(MLContext mlContext)
        {
            // <SnippetImageTransforms>
            IEstimator <ITransformer> pipeline = mlContext.Transforms.LoadImages(outputColumnName: "input", imageFolder: _imagesFolder, inputColumnName: nameof(ImageData.ImagePath))
                                                 // The image transforms transform the images into the model's expected format.
                                                 .Append(mlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: InceptionSettings.ImageWidth, imageHeight: InceptionSettings.ImageHeight, inputColumnName: "input"))
                                                 .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input", interleavePixelColors: InceptionSettings.ChannelsLast, offsetImage: InceptionSettings.Mean))
                                                 // </SnippetImageTransforms>
                                                 // The ScoreTensorFlowModel transform scores the TensorFlow model and allows communication
                                                 // <SnippetScoreTensorFlowModel>
                                                 .Append(mlContext.Model.LoadTensorFlowModel(_inceptionTensorFlowModel).
                                                         ScoreTensorFlowModel(outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }, addBatchDimensionInput: true))
                                                 // </SnippetScoreTensorFlowModel>
                                                 // <SnippetMapValueToKey>
                                                 .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelKey", inputColumnName: "Label"))
                                                 // </SnippetMapValueToKey>
                                                 // <SnippetAddTrainer>
                                                 .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(labelColumnName: "LabelKey", featureColumnName: "softmax2_pre_activation"))
                                                 // </SnippetAddTrainer>
                                                 // <SnippetMapKeyToValue>
                                                 .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabelValue", "PredictedLabel"))
                                                 .AppendCacheCheckpoint(mlContext);
            // </SnippetMapKeyToValue>

            // <SnippetLoadData>
            IDataView trainingData = mlContext.Data.LoadFromTextFile <ImageData>(path:  _trainTagsTsv, hasHeader: false);

            // </SnippetLoadData>

            // Train the model
            Console.WriteLine("=============== Training classification model ===============");
            // Create and train the model
            // <SnippetTrainModel>
            ITransformer model = pipeline.Fit(trainingData);
            // </SnippetTrainModel>

            // Generate predictions from the test data, to be evaluated
            // <SnippetLoadAndTransformTestData>
            IDataView testData    = mlContext.Data.LoadFromTextFile <ImageData>(path: _testTagsTsv, hasHeader: false);
            IDataView predictions = model.Transform(testData);

            // Create an IEnumerable for the predictions for displaying results
            IEnumerable <ImagePrediction> imagePredictionData = mlContext.Data.CreateEnumerable <ImagePrediction>(predictions, true);

            DisplayResults(imagePredictionData);
            // </SnippetLoadAndTransformTestData>

            // Get performance metrics on the model using training data
            Console.WriteLine("=============== Classification metrics ===============");

            // <SnippetEvaluate>
            MulticlassClassificationMetrics metrics =
                mlContext.MulticlassClassification.Evaluate(predictions,
                                                            labelColumnName: "LabelKey",
                                                            predictedLabelColumnName: "PredictedLabel");

            // </SnippetEvaluate>

            //<SnippetDisplayMetrics>
            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");
            //</SnippetDisplayMetrics>

            // <SnippetReturnModel>
            return(model);
            // </SnippetReturnModel>
        }
        static void Main(string[] args)
        {
            //###############################################################
            //INICIALIZACIÓN DEL PROCESO
            //###############################################################

            //Inicialización de mlContext; utilización del seed para replicidad
            MLContext mlContext = new MLContext(seed: 1);

            //Definición de las clases de los datos de entrada:
            //  -Clase Observaciones: BorderCrossObservation

            //Carga de datos
            IDataView originalFullData =
                mlContext.Data.LoadFromTextFile <BorderCrossObservation>(
                    _DataPath,
                    separatorChar: ';',
                    hasHeader: true);


            //###############################################################
            //CONSTRUYE EL CONJUNTO DE DATOS (DATASET)
            //###############################################################

            //División del IDataView originalFullData:
            //  -entrenamiento (trainingDataView): 80%
            //  -testeo (testDataView): 20%

            //Selección de porcentaje para el test
            double testFraction = 0.2;

            //Aplicacón de la División
            TrainTestData Split_trainTestData = mlContext.Data.TrainTestSplit(originalFullData,
                                                                              testFraction: testFraction, seed: 1);

            //IDataView resultantes
            IDataView trainingDataView = Split_trainTestData.TrainSet;
            IDataView testDataView     = Split_trainTestData.TestSet;

            //Guardar IDataView trainingDataView para una posible viasualización (extensión csv)
            using (var fileStream = File.Create(_salida_trainDataPath))
            {
                mlContext.Data.SaveAsText(trainingDataView, fileStream, separatorChar: ';', headerRow: true, schema: true);
            }

            //Guardar IDataView testDataView para una posible viasualización (extensión csv)
            using (var fileStream = File.Create(_salida_testDataPath))
            {
                mlContext.Data.SaveAsText(testDataView, fileStream, separatorChar: ';', headerRow: true, schema: true);
            }


            //###############################################################
            //SELECCIÓN DE VARIABLES
            //###############################################################

            //Suprimimos del esquema IDataView lo que no seleccionemos como features
            var listfeatureColumnNames = trainingDataView.Schema.AsQueryable()
                                         .Select(column => column.Name)
                                         .Where(name => name != "Label" &&   //atributo de salida
                                                name != "Port_Name" &&       //solo existe un valor
                                                name != "State" &&           //un valor
                                                name != "Port_Code" &&       //un valor
                                                name != "Border" &&          //un valor
                                                name != "Longitud" &&        //un valor
                                                name != "Latitud" &&         //un valor
                                                name != "Mes" &&             //transformar
                                                name != "Year" &&            //transformar
                                                name != "Measure").ToList(); //transformar

            //Añadimos las Transformaciones de los atributos suprimidos anteriormente
            listfeatureColumnNames.Add("MesInd");
            listfeatureColumnNames.Add("YearInd");
            listfeatureColumnNames.Add("MeasureInd");

            //Conversión a array para su posterior utlización
            string[] featureColumnNames = listfeatureColumnNames.ToArray();


            //###############################################################
            //TRANFORMACIÓN DE LOS DATOS DEL MODELO --> pipeline
            //###############################################################

            //Indicadoras
            IEstimator <ITransformer> pipeline = mlContext.Transforms.Categorical.OneHotEncoding(
                outputColumnName: "MesInd", inputColumnName: "Mes")
                                                 //Indicadoras
                                                 .Append(mlContext.Transforms.Categorical.OneHotEncoding(
                                                             outputColumnName: "YearInd", inputColumnName: "Year"))
                                                 //Indicadoras
                                                 .Append(mlContext.Transforms.Categorical.OneHotEncoding(
                                                             outputColumnName: "MeasureInd", inputColumnName: "Measure"))
                                                 //Concatena
                                                 .Append(mlContext.Transforms.Concatenate(
                                                             "Features", featureColumnNames))
                                                 //Surpime del IDataView
                                                 .Append(mlContext.Transforms.DropColumns(
                                                             new string[] { "Mes", "Year", "Measure" }))
                                                 //Normalizado del atributo de salida
                                                 .Append(mlContext.Transforms.NormalizeMeanVariance(
                                                             inputColumnName: "Label", outputColumnName: "LabelNormalized"));

            //Guardar datos transformedData
            IDataView transformedData =
                pipeline.Fit(trainingDataView).Transform(trainingDataView);

            using (var fileStream = File.Create(_salida_transformationData))
            {
                mlContext.Data.SaveAsText(transformedData, fileStream, separatorChar: ';', headerRow: true, schema: true);
            }


            //###############################################################
            //SELECCIÓN DE ALGORITMOS DE ENTRENAMIENTO --> trainingPipeline
            //###############################################################

            //***************************************************************
            //1. GAM (Generalized Additive Models)
            //***************************************************************

            var trainer_gam = mlContext.Regression.Trainers
                              .Gam(labelColumnName: "LabelNormalized",
                                   featureColumnName: "Features",
                                   learningRate: 0.02,
                                   numberOfIterations: 2100);

            //Se añade el Algoritmo al pipeline de transformación de datos
            IEstimator <ITransformer> trainingPipeline_gam = pipeline.Append(trainer_gam);


            //***************************************************************
            //2. GBA (Gradient Boosting Algorithm)
            //***************************************************************

            var trainer_boost = mlContext.Regression.Trainers
                                .FastTree(labelColumnName: "LabelNormalized",
                                          featureColumnName: "Features",
                                          numberOfLeaves: 20,
                                          numberOfTrees: 100,
                                          minimumExampleCountPerLeaf: 10,
                                          learningRate: 0.2);

            //Se añade el Algoritmo al pipeline de transformación de datos
            IEstimator <ITransformer> trainingPipeline_boost = pipeline.Append(trainer_boost);


            //###############################################################
            //ENTRENAMIENTO DE LOS MODELOS
            //###############################################################

            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo GAM   ");
            Console.WriteLine($"*-----------------------------------------------------------");
            var watch_gam = System.Diagnostics.Stopwatch.StartNew();
            var model_gam = trainingPipeline_gam.Fit(trainingDataView);

            watch_gam.Stop();
            var elapseds_gam = watch_gam.ElapsedMilliseconds * 0.001;

            Console.WriteLine($"El entrenamiento GAM ha tardado: {elapseds_gam:#.##} s\n");

            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Entrenamiento del Modelo calculado con el Algoritmo GBA   ");
            Console.WriteLine($"*-----------------------------------------------------------");
            var watch_boost = System.Diagnostics.Stopwatch.StartNew();
            var model_boost = trainingPipeline_boost.Fit(trainingDataView);

            watch_boost.Stop();
            var elapseds_boost = watch_boost.ElapsedMilliseconds * 0.001;

            Console.WriteLine($"El entrenamiento GBA ha tardado: {elapseds_boost:#.##} s\n");


            //###############################################################
            //EVALUACIÓN DE LOS MODELOS
            //###############################################################

            //Transformación del IDataView testDataView a paritr de ambos modelos
            var predictions_gam   = model_gam.Transform(testDataView);
            var predictions_boost = model_boost.Transform(testDataView);

            //Calculo de las métricas de cada Modelo
            var metrics_gam = mlContext.Regression
                              .Evaluate(data: predictions_gam, labelColumnName: "LabelNormalized", scoreColumnName: "Score");
            var metrics_boost = mlContext.Regression
                                .Evaluate(data: predictions_boost, labelColumnName: "LabelNormalized", scoreColumnName: "Score");

            //Muestra las métricas GAM
            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo GAM      ");
            Console.WriteLine($"*-----------------------------------------------------------");
            Console.WriteLine($"*       GAM RSquared Score:      {metrics_gam.RSquared:0.##}");
            Console.WriteLine($"*       GAM Root Mean Squared Error Score:      {metrics_gam.RootMeanSquaredError:#.##}");
            Console.WriteLine($"*       GAM MAE Score:      {metrics_gam.MeanAbsoluteError:#.##}");
            Console.WriteLine($"*       GAM MSE Score:      {metrics_gam.MeanSquaredError:#.##}\n");

            //Muestra las métricas GBA
            Console.WriteLine($"\n************************************************************");
            Console.WriteLine($"* Métricas para el Modelo calculado con el Algoritmo GBA      ");
            Console.WriteLine($"*-----------------------------------------------------------");
            Console.WriteLine($"*       GBA RSquared Score:      {metrics_boost.RSquared:0.##}");
            Console.WriteLine($"*       GBA Root Mean Squared Error Score:      {metrics_boost.RootMeanSquaredError:#.##}");
            Console.WriteLine($"*       GBA MAE Score:      {metrics_boost.MeanAbsoluteError:#.##}");
            Console.WriteLine($"*       GBA MSE Score:      {metrics_boost.MeanSquaredError:#.##}\n");


            //###############################################################
            //SELECCIÓN DEL MEJOR MODELO
            //###############################################################

            //Guardamos el Modelo para su posterior consumo
            mlContext.Model.Save(model_boost, trainingDataView.Schema, _salida_modelPath);
        }
Example #19
0
        //------------------------------------------------------------------------------------------------------------------

        // Основная функция по построению регрессионных моделей для каждого из массивов и по получению прогнозных значений

        public static void BuildPredictModel(ref float[] arrPredict1, ref float[] arrPredict2, ref float[] arrPredict3, ref float[] arrPredict4)
        {
            //Работа с первой моделью(первым массивом данных)

            //Подготовка данных для модели

            IDataView trainingDataView_1 = mlContext1.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH_1,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);

            IEstimator <ITransformer> dataProcessPipeline = mlContext1.Transforms.Concatenate("Features", new[] { "id" })
                                                            .Append(mlContext1.Transforms.NormalizeMinMax("Features", "Features"))
                                                            .AppendCacheCheckpoint(mlContext1);

            // Выбор тренировочного алгоритма

            var trainer = mlContext1.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
            {
                DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features"
            });
            IEstimator <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            // Тренировка модели

            ITransformer model1 = trainingPipeline.Fit(trainingDataView_1);

            // Оценка модели и вывод результатов оценки (для использования только в консоли)

            //var crossValidationResults = mlContext1.Regression.CrossValidate(trainingDataView_1, trainingPipeline, numberOfFolds: 5, labelColumnName: "name");
            //PrintStatistics(crossValidationResults);

            // Сохранение модели

            mlContext1.Model.Save(model1, trainingDataView_1.Schema, GetAbsolutePath(MODEL_FILEPATH_1));

            // Получение множественного прогноза (4 прогнозных значения)

            IDataView inputPredict = mlContext1.Data.LoadFromEnumerable <ModelInput>(inputModelData);
            IDataView predictions  = model1.Transform(inputPredict);

            arrPredict1 = predictions.GetColumn <float>("Score").ToArray();

            //------------------------------------------------------------------------------------------------------------------

            // Работа со второй моделью (вторым массивом данных)

            // Подготовка данных для модели

            IDataView trainingDataView_2 = mlContext2.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH_2,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);

            IEstimator <ITransformer> dataProcessPipeline2 = mlContext2.Transforms.Concatenate("Features", new[] { "id" })
                                                             .Append(mlContext2.Transforms.NormalizeMinMax("Features", "Features"))
                                                             .AppendCacheCheckpoint(mlContext2);

            // Выбор тренировочного алгоритма

            var trainer2 = mlContext2.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
            {
                DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features"
            });
            IEstimator <ITransformer> trainingPipeline2 = dataProcessPipeline2.Append(trainer2);

            // Тренировка модели

            ITransformer model2 = trainingPipeline2.Fit(trainingDataView_2);

            // Оценка модели и вывод результатов оценки (для использования только в консоли)

            //var crossValidationResults2 = mlContext2.Regression.CrossValidate(trainingDataView_2, trainingPipeline2, numberOfFolds: 5, labelColumnName: "name");
            //PrintStatistics(crossValidationResults2);

            // Сохранение модели

            mlContext2.Model.Save(model2, trainingDataView_2.Schema, GetAbsolutePath(MODEL_FILEPATH_2));

            // Получение множественного прогноза (4 прогнозных значения)

            IDataView inputPredict2 = mlContext2.Data.LoadFromEnumerable <ModelInput>(inputModelData);
            IDataView predictions2  = model2.Transform(inputPredict2);

            arrPredict2 = predictions2.GetColumn <float>("Score").ToArray();

            //------------------------------------------------------------------------------------------------------------------

            // Работа с третьей моделью (третьим массивом данных)

            // Подготовка данных для модели

            IDataView trainingDataView_3 = mlContext3.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH_3,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);

            IEstimator <ITransformer> dataProcessPipeline3 = mlContext3.Transforms.Concatenate("Features", new[] { "id" })
                                                             .Append(mlContext3.Transforms.NormalizeMinMax("Features", "Features"))
                                                             .AppendCacheCheckpoint(mlContext3);

            // Выбор тренировочного алгоритма

            var trainer3 = mlContext3.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
            {
                DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features"
            });
            IEstimator <ITransformer> trainingPipeline3 = dataProcessPipeline3.Append(trainer3);

            // Тренировка модели

            ITransformer model3 = trainingPipeline3.Fit(trainingDataView_3);

            // Оценка модели и вывод результатов оценки (для использования только в консоли)

            //var crossValidationResults3 = mlContext3.Regression.CrossValidate(trainingDataView_3, trainingPipeline3, numberOfFolds: 5, labelColumnName: "name");
            //PrintStatistics(crossValidationResults3);

            // Сохранение модели

            mlContext3.Model.Save(model3, trainingDataView_3.Schema, GetAbsolutePath(MODEL_FILEPATH_3));

            // Получение множественного прогноза (4 прогнозных значения)

            IDataView inputPredict3 = mlContext3.Data.LoadFromEnumerable <ModelInput>(inputModelData);
            IDataView predictions3  = model3.Transform(inputPredict3);

            arrPredict3 = predictions3.GetColumn <float>("Score").ToArray();

            //------------------------------------------------------------------------------------------------------------------

            // Работа с четвертой моделью (четвертым массивом данных)

            // Подготовка данных для модели

            IDataView trainingDataView_4 = mlContext4.Data.LoadFromTextFile <ModelInput>(
                path: TRAIN_DATA_FILEPATH_4,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);

            IEstimator <ITransformer> dataProcessPipeline4 = mlContext4.Transforms.Concatenate("Features", new[] { "id" })
                                                             .Append(mlContext4.Transforms.NormalizeMinMax("Features", "Features"))
                                                             .AppendCacheCheckpoint(mlContext4);

            // Выбор тренировочного алгоритма

            var trainer4 = mlContext4.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
            {
                DenseOptimizer = true, LabelColumnName = "name", FeatureColumnName = "Features"
            });
            IEstimator <ITransformer> trainingPipeline4 = dataProcessPipeline4.Append(trainer4);

            // Тренировка модели

            ITransformer model4 = trainingPipeline4.Fit(trainingDataView_4);

            // Оценка модели и вывод результатов оценки (для использования только в консоли)

            //var crossValidationResults4 = mlContext4.Regression.CrossValidate(trainingDataView_4, trainingPipeline4, numberOfFolds: 5, labelColumnName: "name");
            //PrintStatistics(crossValidationResults4);

            // Сохранение модели

            mlContext4.Model.Save(model4, trainingDataView_4.Schema, GetAbsolutePath(MODEL_FILEPATH_4));

            // Получение множественного прогноза (4 прогнозных значения)

            IDataView inputPredict4 = mlContext4.Data.LoadFromEnumerable <ModelInput>(inputModelData);
            IDataView predictions4  = model4.Transform(inputPredict4);

            arrPredict4 = predictions4.GetColumn <float>("Score").ToArray();
        }
Example #20
0
 private static ITransformer Train(IDataView trainingDataView, IEstimator <ITransformer> pipeLine)
 {
     // Train your model based on the data set
     return(pipeLine.Fit(trainingDataView));
 }
Example #21
0
 public ITransformer Train(IDataView trainingData)
 {
     TrainedModel = _trainingPipeline.Fit(trainingData);
     return(TrainedModel);
 }
Example #22
0
 /// <summary>
 /// Train the model.
 /// </summary>
 /// <param name="trainingPipeline">Pipeline for model training.</param>
 /// <param name="trainData">Training data view.</param>
 /// <returns>The trained model.</returns>
 public static ITransformer TrainModel(IEstimator <ITransformer> trainingPipeline, IDataView trainData)
 {
     return(trainingPipeline.Fit(trainData));
 }
Example #23
0
        //creates the training algorithm class, trains the model, predicts area from training data then returns model
        public static void TrainAndSaveModel(IDataView trainingDataView, IEstimator <ITransformer> pipeline)
        {
            var trainedModel = pipeline.Fit(trainingDataView);

            _mlContext.Model.Save(trainedModel, trainingDataView.Schema, _modelPath);
        }
Example #24
0
        static void Main(string[] args)
        {
            var mlContext = new MLContext();

            IDataView attritionData = mlContext.Data.LoadFromTextFile <Employee>(path: "./data/attrition.csv", hasHeader: true, separatorChar: ',');

            var split     = mlContext.Data.TrainTestSplit(attritionData, testFraction: 0.2);
            var trainData = split.TrainSet;
            var testData  = split.TestSet;

            var numFields = attritionData.Schema.AsEnumerable()
                            .Select(column => new { column.Name, column.Type })
                            .Where(column => (column.Name != nameof(Employee.Attrition)) && (column.Type.ToString() == "Single"))
                            .ToArray();

            var numFieldNames = numFields.AsEnumerable()
                                .Select(column => column.Name)
                                .ToList();

            var oheFieldNames = new List <string>();

            oheFieldNames.Add("OHE-" + nameof(Employee.BusinessTravel));
            oheFieldNames.Add("OHE-" + nameof(Employee.Department));
            oheFieldNames.Add("OHE-" + nameof(Employee.EducationField));
            oheFieldNames.Add("OHE-" + nameof(Employee.MaritalStatus));
            oheFieldNames.Add("OHE-" + nameof(Employee.JobLevel));
            oheFieldNames.Add("OHE-" + nameof(Employee.JobRole));
            oheFieldNames.Add("OHE-" + nameof(Employee.OverTime));

            var allFeatureFields = new List <string>();

            allFeatureFields.AddRange(oheFieldNames);
            string[] numFeatures = numFieldNames.ToArray();
            allFeatureFields.AddRange(numFeatures);
            string[] allFeatureNames = allFeatureFields.ToArray();

            IEstimator <ITransformer> featurizePipeline = mlContext.Transforms.Categorical.OneHotEncoding(
                new[]
            {
                new InputOutputColumnPair("OHE-" + nameof(Employee.BusinessTravel), nameof(Employee.BusinessTravel)),
                new InputOutputColumnPair("OHE-" + nameof(Employee.Department), nameof(Employee.Department)),
                new InputOutputColumnPair("OHE-" + nameof(Employee.EducationField), nameof(Employee.EducationField)),
                new InputOutputColumnPair("OHE-" + nameof(Employee.MaritalStatus), nameof(Employee.MaritalStatus)),
                new InputOutputColumnPair("OHE-" + nameof(Employee.JobLevel), nameof(Employee.JobLevel)),
                new InputOutputColumnPair("OHE-" + nameof(Employee.JobRole), nameof(Employee.JobRole)),
                new InputOutputColumnPair("OHE-" + nameof(Employee.OverTime), nameof(Employee.OverTime))
            }, OneHotEncodingEstimator.OutputKind.Indicator);

            featurizePipeline = featurizePipeline.Append(mlContext.Transforms.Concatenate("Features", allFeatureNames))
                                .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"));

            ConsoleHelper.ConsoleWriteHeader("=============== Begin to train the model ===============");

            var trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
                labelColumnName: nameof(Employee.Attrition),
                featureColumnName: "Features");

            /* ----- Tried with other trainers below and compared the outcome ------ */
            // var trainer = mlContext.BinaryClassification.Trainers.LightGbm(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features");
            // var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features");
            // var trainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features");
            // var trainer = mlContext.BinaryClassification.Trainers.SgdCalibrated(labelColumnName: nameof(Employee.Attrition), featureColumnName: "Features");
            /* ------------------------------------------------------------------- */

            var trainPipeline = featurizePipeline.Append(trainer);
            var trainedModel  = trainPipeline.Fit(trainData);

            ConsoleHelper.ConsoleWriteHeader("=============== Trained model successfully ===============");

            /*
             * var viewTrainPipeline = mlContext.Transforms
             *  .CalculateFeatureContribution(model.LastTransformer)
             *  .Fit(dataPipeline.Fit(trainData).Transform(trainData));
             */

            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");

            var testDataPredictions = trainedModel.Transform(testData);
            var evaluateMetrics     = mlContext.BinaryClassification.Evaluate(data: testDataPredictions,
                                                                              labelColumnName: nameof(Employee.Attrition),
                                                                              scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainedModel.ToString(), evaluateMetrics);

            Console.WriteLine("===== Permutation Test =====");

            var permuteTestData    = featurizePipeline.Fit(trainData).Transform(trainData);
            var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance(
                predictionTransformer: trainedModel.LastTransformer,
                data: permuteTestData,
                labelColumnName: nameof(Employee.Attrition),
                permutationCount: 50);

            var mapFields = new List <string>();

            for (int i = 0; i < allFeatureNames.Count(); i++)
            {
                var slotField = new VBuffer <ReadOnlyMemory <char> >();
                if (permuteTestData.Schema[allFeatureNames[i]].HasSlotNames())
                {
                    permuteTestData.Schema[allFeatureNames[i]].GetSlotNames(ref slotField);
                    for (int j = 0; j < slotField.Length; j++)
                    {
                        mapFields.Add(allFeatureNames[i]);
                    }
                }
                else
                {
                    mapFields.Add(allFeatureNames[i]);
                }
            }

            // Now let's look at which features are most important to the model
            // overall. Get the feature indices sorted by their impact on AUC.
            var sortedIndices = permutationMetrics
                                .Select((metrics, index) => new { index, metrics.AreaUnderRocCurve })
                                .OrderByDescending(
                feature => Math.Abs(feature.AreaUnderRocCurve.Mean));

            foreach (var feature in sortedIndices)
            {
                Console.WriteLine($"{mapFields[feature.index],-20}|\t{Math.Abs(feature.AreaUnderRocCurve.Mean):F6}");
            }
        }
Example #25
0
        // </SnippetDeclareGlobalVariables>

        static void Main(string[] args)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // <SnippetCreateMLContext>
            MLContext mlContext = new MLContext();
            // </SnippetCreateMLContext>

            // Dictionary to encode words as integers.
            // <SnippetCreateLookupMap>
            var lookupMap = mlContext.Data.LoadFromTextFile(Path.Combine(_modelPath, "imdb_word_index.csv"),
                                                            columns: new[]
            {
                new TextLoader.Column("Words", DataKind.String, 0),
                new TextLoader.Column("Ids", DataKind.Int32, 1),
            },
                                                            separatorChar: ','
                                                            );
            // </SnippetCreateLookupMap>

            // The model expects the input feature vector to be a fixed length vector.
            // This action resizes the variable length array generated by the lookup map
            // to a fixed length vector. If there are less than 600 words in the sentence,
            // the remaining indices will be filled with zeros. If there are more than
            // 600 words in the sentence, then the array is truncated at 600.
            // <SnippetResizeFeatures>
            Action <VariableLength, FixedLength> ResizeFeaturesAction = (s, f) =>
            {
                var features = s.VariableLengthFeatures;
                Array.Resize(ref features, FeatureLength);
                f.Features = features;
            };
            // </SnippetResizeFeatures>

            // Load the TensorFlow model.
            // <SnippetLoadTensorFlowModel>
            TensorFlowModel tensorFlowModel = mlContext.Model.LoadTensorFlowModel(_modelPath);
            // </SnippetLoadTensorFlowModel>

            // <SnippetGetModelSchema>
            DataViewSchema schema = tensorFlowModel.GetModelSchema();

            Console.WriteLine(" =============== TensorFlow Model Schema =============== ");
            var featuresType = (VectorDataViewType)schema["Features"].Type;

            Console.WriteLine($"Name: Features, Type: {featuresType.ItemType.RawType}, Size: ({featuresType.Dimensions[0]})");
            var predictionType = (VectorDataViewType)schema["Prediction/Softmax"].Type;

            Console.WriteLine($"Name: Prediction/Softmax, Type: {predictionType.ItemType.RawType}, Size: ({predictionType.Dimensions[0]})");

            // </SnippetGetModelSchema>

            // <SnippetTokenizeIntoWords>
            IEstimator <ITransformer> pipeline =
                // Split the text into individual words
                mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "ReviewText")
                // </SnippetTokenizeIntoWords>

                // <SnippetMapValue>
                // Map each word to an integer value. The array of integer makes up the input features.
                .Append(mlContext.Transforms.Conversion.MapValue("VariableLengthFeatures", lookupMap,
                                                                 lookupMap.Schema["Words"], lookupMap.Schema["Ids"], "TokenizedWords"))
                // </SnippetMapValue>

                // <SnippetCustomMapping>
                // Resize variable length vector to fixed length vector.
                .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
                // </SnippetCustomMapping>

                // <SnippetScoreTensorFlowModel>
                // Passes the data to TensorFlow for scoring
                .Append(tensorFlowModel.ScoreTensorFlowModel("Prediction/Softmax", "Features"))
                // </SnippetScoreTensorFlowModel>

                // <SnippetCopyColumns>
                // Retrieves the 'Prediction' from TensorFlow and copies to a column
                .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax"));
            // </SnippetCopyColumns>

            // <SnippetCreateModel>
            // Create an executable model from the estimator pipeline
            IDataView    dataView = mlContext.Data.LoadFromEnumerable(new List <MovieReview>());
            ITransformer model    = pipeline.Fit(dataView);

            // </SnippetCreateModel>

            // <SnippetCallPredictSentiment>
            PredictSentiment(mlContext, model);
            // </SnippetCallPredictSentiment>
        }
Example #26
0
        // ===========================================================================================================


        public void BuildTrainingPipelineAndModel(ClassificationMode classificationMode)
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                switch (classificationMode)
                {
                case ClassificationMode.OneVersusAll:     // first time this project was builot
                    // Data process configuration with pipeline data transformations
                    EstimatorChain <NormalizingTransformer> dataProcessPipeline1 =
                        _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                        .Append(_mlContext.Transforms.Concatenate(
                                    outputColumnName: "Features",
                                    inputColumnNames: FeatureNames.ToArray()))
                        .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                        .AppendCacheCheckpoint(_mlContext);

                    // Set the training algorithm
                    EstimatorChain <KeyToValueMappingTransformer> trainer1 =
                        _mlContext.MulticlassClassification.Trainers.OneVersusAll(
                            _mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label",
                                                                                        numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label")
                        .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                    IEstimator <ITransformer> trainingPipeline1 =
                        dataProcessPipeline1.Append(trainer1);

                    // Train Model
                    _mlModel = trainingPipeline1.Fit(_trainingDataView);
                    break;

                case ClassificationMode.LightGbm:
                    // Data process configuration with pipeline data transformations
                    EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline2 =
                        _mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
                        .Append(_mlContext.Transforms.Concatenate(
                                    outputColumnName: "Features",
                                    inputColumnNames: FeatureNames.ToArray()));

                    // Set the training algorithm
                    EstimatorChain <KeyToValueMappingTransformer> trainer2 =
                        _mlContext.MulticlassClassification.Trainers.LightGbm(labelColumnName: "Label", featureColumnName: "Features")
                        .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

                    EstimatorChain <TransformerChain <KeyToValueMappingTransformer> > trainingPipeline2 =
                        dataProcessPipeline2.Append(trainer2);

                    // Train Model
                    _mlModel = trainingPipeline2.Fit(_trainingDataView);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(classificationMode), classificationMode, null);
                }

                // TODO
                //// Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
                //// in order to evaluate and get the model's accuracy metrics
                //Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
                //var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numberOfFolds: 5, labelColumnName: "Label");
                //PrintMulticlassClassificationFoldsAverageMetrics(crossValidationResults);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }
Example #27
0
 public static IDataView FitAndTransform(this IEstimator <ITransformer> est, IDataView data) => est.Fit(data).Transform(data);
Example #28
0
 static ITransformer TrainModel(IDataView trainingData, IEstimator <ITransformer> trainingPipeline)
 {
     return(trainingPipeline.Fit(trainingData));
 }
        private void MixMatch(string dataPath)
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            var mlContext = new MLContext();

            // Read the data as an IDataView.
            // First, we define the loader: specify the data columns and where to find them in the text file.
            var loader = mlContext.Data.CreateTextLoader(ctx => (
                                                             // The four features of the Iris dataset.
                                                             SepalLength: ctx.LoadFloat(0),
                                                             SepalWidth: ctx.LoadFloat(1),
                                                             PetalLength: ctx.LoadFloat(2),
                                                             PetalWidth: ctx.LoadFloat(3),
                                                             // Label: kind of iris.
                                                             Label: ctx.LoadText(4)
                                                             ),
                                                         // Default separator is tab, but the dataset has comma.
                                                         separator: ',');

            // Read the data.
            var data = loader.Load(dataPath);

            // Build the pre-processing pipeline.
            var pipeline = loader.MakeNewEstimator()
                           .Append(r => (
                                       // Convert string label to a key.
                                       Label: r.Label.ToKey(),
                                       // Concatenate all the features together into one column 'Features'.
                                       Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth)));

            // Now, at the time of writing, there is no static pipeline for OVA (one-versus-all). So, let's
            // append the OVA learner to the dynamic pipeline.
            IEstimator <ITransformer> dynamicPipe = pipeline.AsDynamic;

            // Create a binary classification trainer.
            var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features");

            // Append the OVA learner to the pipeline.
            dynamicPipe = dynamicPipe.Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer));

            // At this point, we have a choice. We could continue working with the dynamically-typed pipeline, and
            // ultimately call dynamicPipe.Fit(data.AsDynamic) to get the model, or we could go back into the static world.
            // Here's how we go back to the static pipeline:
            var staticFinalPipe = dynamicPipe.AssertStatic(mlContext,
                                                           // Declare the shape of the input. As you can see, it's identical to the shape of the loader:
                                                           // four float features and a string label.
                                                           c => (
                                                               SepalLength: c.R4.Scalar,
                                                               SepalWidth: c.R4.Scalar,
                                                               PetalLength: c.R4.Scalar,
                                                               PetalWidth: c.R4.Scalar,
                                                               Label: c.Text.Scalar),
                                                           // Declare the shape of the output (or a relevant subset of it).
                                                           // In our case, we care only about the predicted label column (a key type), and scores (vector of floats).
                                                           c => (
                                                               Score: c.R4.Vector,
                                                               // Predicted label is a key backed by uint, with text values (since original labels are text).
                                                               PredictedLabel: c.KeyU4.TextValues.Scalar))
                                  // Convert the predicted label from key back to the original string value.
                                  .Append(r => r.PredictedLabel.ToValue());

            // Train the model in a statically typed way.
            var model = staticFinalPipe.Fit(data);

            // And here is how we could've stayed in the dynamic pipeline and train that way.
            dynamicPipe = dynamicPipe.Append(new KeyToValueMappingEstimator(mlContext, "PredictedLabel"));
            var dynamicModel = dynamicPipe.Fit(data.AsDynamic);

            // Now 'dynamicModel', and 'model.AsDynamic' are equivalent.
        }
Example #30
0
        public void Train(string trainingFileName, string testFileName)
        {
            System.Diagnostics.Debug.WriteLine("Reached Train Method");
            //Check if training file exists
            if (!File.Exists(trainingFileName))
            {
                System.Diagnostics.Debug.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }
            //Check if test file exists
            if (!File.Exists(testFileName))
            {
                System.Diagnostics.Debug.WriteLine($"Failed to find test data file ({testFileName}");

                return;
            }

            //Convert training file into IDataView object (ready for processing)
            var trainingDataView = MlContext.Data.LoadFromTextFile <CarInventory>(trainingFileName, ',', hasHeader: false);

            //Normalise Mean Variance on the inputted values

            IEstimator <ITransformer> dataProcessPipeline = MlContext.Transforms
                                                            .Concatenate("Features", typeof(CarInventory)
                                                                         .ToPropertyList <CarInventory>(nameof(CarInventory.Label)))
                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features", outputColumnName: "FeaturesNormalizedByMeanVar"));

            //Create a trainer object with the label from the car inventory class + normalised mean variance
            var trainer = MlContext.BinaryClassification.Trainers.FastTree(
                labelColumnName: nameof(CarInventory.Label),
                featureColumnName: "FeaturesNormalizedByMeanVar",
                numberOfLeaves: 2,
                numberOfTrees: 800,
                minimumExampleCountPerLeaf: 1,
                learningRate: 0.2);
            //Append the trainer to the pipeline
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            //Save the model
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            MlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath);

            //Evaluate the model like we trained it
            var evaluationPipeline = trainedModel.Append(MlContext.Transforms
                                                         .CalculateFeatureContribution(trainedModel.LastTransformer)
                                                         .Fit(dataProcessPipeline.Fit(trainingDataView).Transform(trainingDataView)));

            var testDataView = MlContext.Data.LoadFromTextFile <CarInventory>(testFileName, ',', hasHeader: false);

            var testSetTransform = evaluationPipeline.Transform(testDataView);

            var modelMetrics = MlContext.BinaryClassification.Evaluate(data: testSetTransform,
                                                                       labelColumnName: nameof(CarInventory.Label),
                                                                       scoreColumnName: "Score");

            System.Diagnostics.Debug.WriteLine($"Accuracy: {modelMetrics.Accuracy:P2}");
            System.Diagnostics.Debug.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}");
            System.Diagnostics.Debug.WriteLine($"Area under Precision recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}");
            System.Diagnostics.Debug.WriteLine($"F1Score: {modelMetrics.F1Score:P2}");
            System.Diagnostics.Debug.WriteLine($"LogLoss: {modelMetrics.LogLoss:#.##}");
            System.Diagnostics.Debug.WriteLine($"LogLossReduction: {modelMetrics.LogLossReduction:#.##}");
            System.Diagnostics.Debug.WriteLine($"PositivePrecision: {modelMetrics.PositivePrecision:#.##}");
            System.Diagnostics.Debug.WriteLine($"PositiveRecall: {modelMetrics.PositiveRecall:#.##}");
            System.Diagnostics.Debug.WriteLine($"NegativePrecision: {modelMetrics.NegativePrecision:#.##}");
            System.Diagnostics.Debug.WriteLine($"NegativeRecall: {modelMetrics.NegativeRecall:P2}");
        }