Ejemplo n.º 1
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            DataLoader dataLoader       = new DataLoader(mlContext);
            var        trainingDataView = dataLoader.GetDataView(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Set the selected training algorithm into the modelBuilder
            var modelBuilder = new Common.ModelBuilder <GitHubIssue, GitHubIssuePrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features");

            modelBuilder.AddTrainer(trainer);
            modelBuilder.AddEstimator(new KeyToValueEstimator(mlContext, "PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValResults = modelBuilder.CrossValidateAndEvaluateMulticlassClassificationModel(trainingDataView, 6, "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics("SdcaMultiClassTrainer", crossValResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            // (OPTIONAL) Try/test a single prediction by loding the model from the file, first.
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "Entity Framework crashes", Description = "When connecting to the database, EF is crashing"
            };
            var modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(mlContext);

            modelScorer.LoadModelFromZipFile(ModelPath);
            var prediction = modelScorer.PredictSingle(issue);

            Console.WriteLine($"=============== Single Prediction - Result: {prediction.Area} ===============");
            //

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
Ejemplo n.º 2
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            var textLoader       = GitHubLabelerTextLoaderFactory.CreateTextLoader(mlContext);
            var trainingDataView = textLoader.Read(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = GitHubLabelerDataProcessPipelineFactory.CreateDataProcessPipeline(mlContext);

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            IEstimator <ITransformer> trainer = null;

            switch (selectedStrategy)
            {
            case MyTrainerStrategy.SdcaMultiClassTrainer:
                trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label,
                                                                                                     DefaultColumnNames.Features);
                break;

            case MyTrainerStrategy.OVAAveragedPerceptronTrainer:
            {
                // Create a binary classification trainer.
                var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(DefaultColumnNames.Label,
                                                                                                                 DefaultColumnNames.Features,
                                                                                                                 numIterations: 10);
                // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer.
                // In this strategy, a binary classification algorithm is used to train one classifier for each class, "
                // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, "
                // and choosing the prediction with the highest confidence score.
                trainer = new Ova(mlContext, averagedPerceptronBinaryTrainer);
                break;
            }

            default:
                break;
            }

            //Set the trainer/algorithm
            var modelBuilder = new Common.ModelBuilder <GitHubIssue, GitHubIssuePrediction>(mlContext, dataProcessPipeline);

            modelBuilder.AddTrainer(trainer);
            modelBuilder.AddEstimator(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValResults = modelBuilder.CrossValidateAndEvaluateMulticlassClassificationModel(trainingDataView, 6, "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            var modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(mlContext, modelBuilder.TrainedModel);
            var prediction  = modelScorer.PredictSingle(issue);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            //

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
Ejemplo n.º 3
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            var textLoader       = IrisTextLoaderFactory.CreateTextLoader(mlContext);
            var trainingDataView = textLoader.Read(TrainDataPath);
            var testDataView     = textLoader.Read(TestDataPath);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Concatenate("Features", "SepalLength",
                                                                       "SepalWidth",
                                                                       "PetalLength",
                                                                       "PetalWidth");

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            var modelBuilder = new Common.ModelBuilder <IrisData, IrisPrediction>(mlContext, dataProcessPipeline);
            // We apply our selected Trainer
            var trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Label", featureColumn: "Features");

            modelBuilder.AddTrainer(trainer);

            // STEP 4: Train the model fitting to the DataSet
            //The pipeline is trained on the dataset that has been loaded and transformed.
            Console.WriteLine("=============== Training the model ===============");
            var model = modelBuilder.Train(trainingDataView);

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            var metrics = modelBuilder.EvaluateMultiClassClassificationModel(testDataView, "Label");

            Common.ConsoleHelper.PrintMultiClassClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            return(model);


            // // STEP 1: Common data loading configuration
            // var textLoader = IrisTextLoaderFactory.CreateTextLoader(mlContext);
            // var trainingDataView = textLoader.Read(TrainDataPath);
            // // STEP 2: Common data process configuration with pipeline data transformations
            // var dataProcessPipeline = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
            // // Note that the label is text, so it needs to be converted to key.
            // .Append(mlContext.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest)
            // // Use the multi-class SDCA model to predict the label using features.
            //.Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent())
            // // Apply the inverse conversion from 'PredictedLabel' column back to string value.
            // .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "PredictedLabel")));
            // ;
            // // STEP 3: Set the training algorithm, then create and config the modelBuilder
            // var modelBuilder = new ModelBuilder<IrisData, IrisPrediction>(mlContext, dataProcessPipeline);
            // // We apply our selected Trainer
            // var trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Label", featureColumn: "Features");
            // modelBuilder.AddTrainer(trainer);
            // // STEP 4: Train the model fitting to the DataSet
            // //The pipeline is trained on the dataset that has been loaded and transformed.
            // Console.WriteLine("=============== Training the model ===============");
            // var model= modelBuilder.Train(trainingDataView);


            // // STEP 5: Evaluate the model and show accuracy stats
            // //Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            // //var metrics = modelBuilder.EvaluateMultiClassClassificationModel(testDataView, "Label");
            // //Common.ConsoleHelper.PrintMultiClassClassificationMetrics(trainer.ToString(), metrics);

            // //// STEP 6: Save/persist the trained model to a .ZIP file
            // Console.WriteLine("=============== Saving the model to a file ===============");
            // modelBuilder.SaveModelAsFile(ModelPath);
            // return model;
        }