Example #1
0
        static void TrainAndSave()
        {
            MLContext mlContext = new MLContext();

            mlContext.Log += MlContext_Log;

            //准备数据
            var fulldata      = mlContext.Data.LoadFromTextFile <FigureData>(path: DataPath, hasHeader: true, separatorChar: ',');
            var trainTestData = mlContext.Data.TrainTestSplit(fulldata, testFraction: 0.2);
            var trainData     = trainTestData.TrainSet;
            var testData      = trainTestData.TestSet;

            //训练
            IEstimator <ITransformer> dataProcessPipeline = mlContext.Transforms.Concatenate("Features", new[] { "Height", "Weight" })
                                                            .Append(mlContext.Transforms.NormalizeMeanVariance(inputColumnName: "Features", outputColumnName: "FeaturesNormalizedByMeanVar"));
            IEstimator <ITransformer> trainer          = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Result", featureColumnName: "FeaturesNormalizedByMeanVar");
            IEstimator <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);
            ITransformer model = trainingPipeline.Fit(trainData);

            //评估
            var predictions = model.Transform(testData);
            var metrics     = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Result");

            PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            //保存模型
            mlContext.Model.Save(model, trainData.Schema, ModelPath);
            Console.WriteLine($"Model file saved to :{ModelPath}");
        }
        static void Main()
        {
            MLContext mLContext = new MLContext(1);

            var traningDataPath  = Utility.GetAbsolutePath(typeof(Program).Assembly.Location, "../../../SysptomTraining.txt");
            var trainingDataView = mLContext.Data.LoadFromTextFile <DiseasesSymptomTraining>(traningDataPath, hasHeader: false, separatorChar: '|');

            var dataProcessPipeline = mLContext.Transforms.Conversion.MapValueToKey(outputColumnName: "Label", inputColumnName: nameof(DiseasesSymptomTraining.Name))
                                      .Append(mLContext.Transforms.Text.FeaturizeText(outputColumnName: "Syptom", inputColumnName: nameof(DiseasesSymptomTraining.Syptom)))
                                      .Append(mLContext.Transforms.Concatenate(outputColumnName: "Features", "Syptom"))
                                      .AppendCacheCheckpoint(mLContext);

            IEstimator <ITransformer> trainer = mLContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features");

            var trainingPipeline = dataProcessPipeline.Append(trainer).Append(mLContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValidationResults = mLContext.MulticlassClassification.CrossValidate(data: trainingDataView, estimator: trainingPipeline, numberOfFolds: 6, labelColumnName: "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValidationResults);

            Console.WriteLine("=============== Training the model ===============");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            Console.WriteLine("=============== Saving the model to a file ===============");
            mLContext.Model.Save(trainedModel, trainingDataView.Schema, "../../../DiseasesSysptomModel.zip");

            ConsoleHelper.ConsoleWriteHeader("Training process finalized");

            ConsoleHelper.ConsolePressAnyKey();
        }
Example #3
0
 public static void DisplayPipeline(IEstimator <ITransformer> pipeline)
 {
     if (pipeline == null)
     {
         Console.WriteLine("Task 2 \"Create pipeline\" not completed.\n");
     }
     else
     {
         Console.WriteLine("Pipeline: " + pipeline.ToString() + "\n");
     }
 }
        public static void BuildAndTrainModel()
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var           mlContext      = new MLContext(seed: 1);
            var           dataView       = mlContext.Data.LoadFromEnumerable(LoadCorpus());
            TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView     trainingData   = trainTestSplit.TrainSet;
            IDataView     testData       = trainTestSplit.TestSet;

            var dataProcessPipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(CorrectionData.GlueWithPrevious))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(nameof(PdfFeatures.FirstChars)))
                                      .Append(mlContext.Transforms.Conversion.ConvertType(new[]
            {
                new InputOutputColumnPair(nameof(PdfFeatures.PrevLastIsAlpha)),
                new InputOutputColumnPair(nameof(PdfFeatures.PrevLastIsDigit)),
                new InputOutputColumnPair(nameof(PdfFeatures.PrevLastIsLower)),
                new InputOutputColumnPair(nameof(PdfFeatures.PrevLastIsPunct)),
            }, DataKind.Single))
                                      .Append(mlContext.Transforms.Concatenate("Features", nameof(PdfFeatures.ThisLen), nameof(PdfFeatures.MeanLen), nameof(PdfFeatures.PrevLen),
                                                                               nameof(PdfFeatures.FirstChars), nameof(PdfFeatures.PrevLastIsAlpha), nameof(PdfFeatures.PrevLastIsDigit),
                                                                               nameof(PdfFeatures.PrevLastIsLower), nameof(PdfFeatures.PrevLastIsPunct)))
                                      .AppendCacheCheckpoint(mlContext);

            ConsoleHelper.PeekDataViewInConsole(mlContext, trainingData, dataProcessPipeline, 2);

            IEstimator <ITransformer> trainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression();

            var trainingPipeline = dataProcessPipeline.Append(trainer);

            // Train the model fitting to the DataSet
            ITransformer trainedModel = trainingPipeline.Fit(trainingData);

            // Evaluate the model and show accuracy stats
            var predictions = trainedModel.Transform(testData);
            var metrics     = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label", scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // Save/persist the trained model to a .ZIP file
            Directory.CreateDirectory(ModelDir);
            mlContext.Model.Save(trainedModel, trainingData.Schema, ModelFileName);
            Console.WriteLine($"Model has been written into '{ModelFileName}'");
        }
Example #5
0
        public static ITransformer TrainModel(MLContext mlContext, IDataView trainingDataView)
        {
            // Data process configuration with pipeline data transformations
            IEstimator <ITransformer> dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[] { new OneHotEncodingEstimator.ColumnOptions("vendor_id", "vendor_id"), new OneHotEncodingEstimator.ColumnOptions("payment_type", "payment_type") })
                                                            .Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, new[] { "vendor_id", "payment_type", "rate_code", "passenger_count", "trip_time_in_secs", "trip_distance" }));

            // Set the training algorithm
            IEstimator <ITransformer> trainer = mlContext.Regression.Trainers.LightGbm(new Options()
            {
                NumBoostRound = 200, LearningRate = 0.02864992f, NumLeaves = 57, MinDataPerLeaf = 1, UseSoftmax = false, UseCat = false, UseMissing = true, MinDataPerGroup = 100, MaxCatThreshold = 16, CatSmooth = 20, CatL2 = 10, LabelColumn = "fare_amount", FeatureColumn = "Features"
            });
            IEstimator <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            Console.WriteLine("=============== Training " + trainer.ToString() + " model ===============");

            ITransformer model = trainingPipeline.Fit(trainingDataView);

            Console.WriteLine("=============== End of training process ===============");
            return(model);
        }
Example #6
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            var trainingDataView = mlContext.Data.ReadFromTextFile <GitHubIssue>(DataSetLocation, hasHeader: true, separatorChar: '\t');

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: DefaultColumnNames.Label, inputColumnName: nameof(GitHubIssue.Area))
                                      .Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "TitleFeaturized", inputColumnName: nameof(GitHubIssue.Title)))
                                      .Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "DescriptionFeaturized", inputColumnName: nameof(GitHubIssue.Description)))
                                      .Append(mlContext.Transforms.Concatenate(outputColumnName: DefaultColumnNames.Features, "TitleFeaturized", "DescriptionFeaturized"))
                                                                         //Sample Caching the DataView so estimators iterating over the data multiple times, instead of always reading from file, using the cache might get better performance
                                      .AppendCacheCheckpoint(mlContext); //In this sample, only when using OVA (Not SDCA) the cache improves the training time, since OVA works multiple times/iterations over the same data

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            IEstimator <ITransformer> trainer = null;

            switch (selectedStrategy)
            {
            case MyTrainerStrategy.SdcaMultiClassTrainer:
                trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label,
                                                                                                     DefaultColumnNames.Features);
                break;

            case MyTrainerStrategy.OVAAveragedPerceptronTrainer:
            {
                // Create a binary classification trainer.
                var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(DefaultColumnNames.Label,
                                                                                                                 DefaultColumnNames.Features,
                                                                                                                 numIterations: 10);
                // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer.
                // In this strategy, a binary classification algorithm is used to train one classifier for each class, "
                // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, "
                // and choosing the prediction with the highest confidence score.
                trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(averagedPerceptronBinaryTrainer);

                break;
            }

            default:
                break;
            }

            //Set the trainer/algorithm and map label to value (original readable state)
            var trainingPipeline = dataProcessPipeline.Append(trainer)
                                   .Append(mlContext.Transforms.Conversion.MapKeyToValue(DefaultColumnNames.PredictedLabel));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics

            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");

            //Measure cross-validation time
            var watchCrossValTime = System.Diagnostics.Stopwatch.StartNew();

            var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(data: trainingDataView, estimator: trainingPipeline, numFolds: 6, labelColumn: DefaultColumnNames.Label);

            //Stop measuring time
            watchCrossValTime.Stop();
            long elapsedMs = watchCrossValTime.ElapsedMilliseconds;

            Console.WriteLine($"Time Cross-Validating: {elapsedMs} miliSecs");

            //(CDLTLL-Pending-TODO)
            //
            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValidationResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");

            //Measure training time
            var watch = System.Diagnostics.Stopwatch.StartNew();

            var trainedModel = trainingPipeline.Fit(trainingDataView);

            //Stop measuring time
            watch.Stop();
            long elapsedCrossValMs = watch.ElapsedMilliseconds;

            Console.WriteLine($"Time Training the model: {elapsedCrossValMs} miliSecs");

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            // Create prediction engine related to the loaded trained model
            var predEngine = trainedModel.CreatePredictionEngine <GitHubIssue, GitHubIssuePrediction>(mlContext);
            //Score
            var prediction = predEngine.Predict(issue);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            //

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
                mlContext.Model.Save(trainedModel, fs);

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy)
        {
            // Create MLContext to be shared across the model creation workflow objects 
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 1);

            // STEP 1: Common data loading configuration
            var trainingDataView = mlContext.Data.LoadFromTextFile<GitHubIssue>(DataSetLocation, hasHeader: true, separatorChar:'\t', allowSparse: false);
             
            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "Label",inputColumnName:nameof(GitHubIssue.Area))
                            .Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "TitleFeaturized",inputColumnName:nameof(GitHubIssue.Title)))
                            .Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "DescriptionFeaturized", inputColumnName: nameof(GitHubIssue.Description)))
                            .Append(mlContext.Transforms.Concatenate(outputColumnName:"Features", "TitleFeaturized", "DescriptionFeaturized"))
                            .AppendCacheCheckpoint(mlContext);  
                            // Use in-memory cache for small/medium datasets to lower training time. 
                            // Do NOT use it (remove .AppendCacheCheckpoint()) when handling very large datasets.

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features" 
            Common.ConsoleHelper.PeekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            IEstimator<ITransformer> trainer = null; 
            switch(selectedStrategy)
            {
                case MyTrainerStrategy.SdcaMultiClassTrainer:                 
                     trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("Label", "Features");
                     break;
                case MyTrainerStrategy.OVAAveragedPerceptronTrainer:
                {
                    // Create a binary classification trainer.
                    var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features",numberOfIterations: 10);
                    // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer.
                    // In this strategy, a binary classification algorithm is used to train one classifier for each class, "
                    // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, "
                    // and choosing the prediction with the highest confidence score.
                    trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(averagedPerceptronBinaryTrainer);
                        
                    break;
                }
                default:
                    break;
            }

            //Set the trainer/algorithm and map label to value (original readable state)
            var trainingPipeline = dataProcessPipeline.Append(trainer)
                    .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics

            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValidationResults= mlContext.MulticlassClassification.CrossValidate(data:trainingDataView, estimator:trainingPipeline, numberOfFolds: 6, labelColumnName:"Label");
                    
            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValidationResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            GitHubIssue issue = new GitHubIssue() { ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.." };
            // Create prediction engine related to the loaded trained model
            var predEngine = mlContext.Model.CreatePredictionEngine<GitHubIssue, GitHubIssuePrediction>(trainedModel);
            //Score
            var prediction = predEngine.Predict(issue);
            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            //

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            mlContext.Model.Save(trainedModel, trainingDataView.Schema, ModelPath);

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
Example #8
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("ID", DataKind.Text, 0),
                    new TextLoader.Column("Area", DataKind.Text, 1),
                    new TextLoader.Column("Title", DataKind.Text, 2),
                    new TextLoader.Column("Description", DataKind.Text, 3),
                }
            });

            var trainingDataView = textLoader.Read(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Area", "Label")
                                      .Append(mlContext.Transforms.Text.FeaturizeText("Title", "TitleFeaturized"))
                                      .Append(mlContext.Transforms.Text.FeaturizeText("Description", "DescriptionFeaturized"))
                                      .Append(mlContext.Transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized"));

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            IEstimator <ITransformer> trainer = null;

            switch (selectedStrategy)
            {
            case MyTrainerStrategy.SdcaMultiClassTrainer:
                trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label,
                                                                                                     DefaultColumnNames.Features);
                break;

            case MyTrainerStrategy.OVAAveragedPerceptronTrainer:
            {
                // Create a binary classification trainer.
                var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(DefaultColumnNames.Label,
                                                                                                                 DefaultColumnNames.Features,
                                                                                                                 numIterations: 10);
                // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer.
                // In this strategy, a binary classification algorithm is used to train one classifier for each class, "
                // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, "
                // and choosing the prediction with the highest confidence score.
                trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(averagedPerceptronBinaryTrainer);

                break;
            }

            default:
                break;
            }

            //Set the trainer/algorithm and map label to value (original readable state)
            var trainingPipeline = dataProcessPipeline.Append(trainer)
                                   .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");

            var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingDataView, trainingPipeline, numFolds: 6, labelColumn: "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValidationResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            // Create prediction engine related to the loaded trained model
            var predFunction = trainedModel.MakePredictionFunction <GitHubIssue, GitHubIssuePrediction>(mlContext);
            //Score
            var prediction = predFunction.Predict(issue);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            //

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
                mlContext.Model.Save(trainedModel, fs);

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
Example #9
0
        private static void TrainModel(string dataFile, string modelFile)
        {
            // Create MLContext to be shared across the model creation workflow objects
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Loading the data
            Console.WriteLine($"Step 1: Loading the data ({dataFile})");
            var textLoader = mlContext.Data.TextReader(
                new TextLoader.Arguments
            {
                Separator    = ",",
                HasHeader    = true,
                AllowQuoting = true,
                AllowSparse  = true,
                Column       = new[]
                {
                    new TextLoader.Column("Id", DataKind.Text, 0),
                    new TextLoader.Column("Category", DataKind.Text, 1),
                    new TextLoader.Column("Content", DataKind.Text, 2),
                }
            });
            var trainingDataView = textLoader.Read(dataFile);

            // STEP 2: Common data process configuration with pipeline data transformations
            Console.WriteLine("Step 2: Map raw input data columns to ML.NET data");
            var dataProcessPipeline = mlContext.Transforms.Categorical.MapValueToKey("Category", DefaultColumnNames.Label)
                                      .Append(mlContext.Transforms.Text.FeaturizeText("Content", DefaultColumnNames.Features));

            // (OPTIONAL) Peek data (few records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            // DataViewToConsole<JokeModel>(mlContext, trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            Console.WriteLine("Step 3: Create and configure the selected training algorithm (trainer)");
            IEstimator <ITransformer> trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent();

            // Alternative training
            //// var averagedPerceptionBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(
            ////     DefaultColumnNames.Label,
            ////     DefaultColumnNames.Features,
            ////     numIterations: 10);
            //// trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(averagedPerceptronBinaryTrainer);

            // Set the trainer/algorithm and map label to value (original readable state)
            var trainingPipeline = dataProcessPipeline.Append(trainer).Append(
                mlContext.Transforms.Conversion.MapKeyToValue(DefaultColumnNames.PredictedLabel));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("Step 4: Cross-Validate with single dataset (alternatively we can divide it 80-20)");
            var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(
                trainingDataView,
                trainingPipeline,
                numFolds: 10,
                labelColumn: "Label");

            PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValidationResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("Step 5: Train the model fitting to the DataSet");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine($"Step 6: Save the model to a file ({modelFile})");
            using (var fs = new FileStream(modelFile, FileMode.Create, FileAccess.Write, FileShare.Write))
            {
                mlContext.Model.Save(trainedModel, fs);
            }
        }