コード例 #1
0
        public void TestPredictionForSingleIssue()
        {
            GitHubIssue singleIssue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "Entity Framework crashes", Description = "When connecting to the database, EF is crashing"
            };

            //Predict label for single hard-coded issue
            var prediction = _modelScorer.PredictSingle(singleIssue);

            Console.WriteLine($"=============== Single Prediction - Result: {prediction.Area} ===============");
        }
コード例 #2
0
        private static void Main(string[] args)
        {
            //Create the MLContext to share across components for deterministic results
            MLContext mlContext = new MLContext(seed: 1);  //Seed set to any number so you have a deterministic environment

            //STEP 1: Common data loading
            DataLoader dataLoader = new DataLoader(mlContext);
            var        fullData   = dataLoader.GetDataView(DataPath);

            (IDataView trainingDataView, IDataView testingDataView) = mlContext.Clustering.TrainTestSplit(fullData, testFraction: 0.2);

            //STEP 2: Process data transformations in pipeline
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
            Common.ConsoleHelper.PeekDataViewInConsole <IrisData>(mlContext, trainingDataView, dataProcessPipeline, 10);
            Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 10);

            // STEP 3: Create and train the model
            var modelBuilder = new ModelBuilder <IrisData, IrisPrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.Clustering.Trainers.KMeans(features: "Features", clustersCount: 3);

            modelBuilder.AddTrainer(trainer);
            var trainedModel = modelBuilder.Train(trainingDataView);

            // STEP4: Evaluate accuracy of the model
            var metrics = modelBuilder.EvaluateClusteringModel(testingDataView);

            Common.ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics);

            // STEP5: Save/persist the model as a .ZIP file
            modelBuilder.SaveModelAsFile(ModelPath);

            Console.WriteLine("=============== End of training process ===============");

            Console.WriteLine("=============== Predict a cluster for a single case (Single Iris data sample) ===============");

            // Test with one sample text
            var sampleIrisData = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };

            //Create the clusters: Create data files and plot a chart
            var modelScorer = new ModelScorer <IrisData, IrisPrediction>(mlContext);

            modelScorer.LoadModelFromZipFile(ModelPath);

            var prediction = modelScorer.PredictSingle(sampleIrisData);

            Console.WriteLine($"Cluster assigned for setosa flowers:" + prediction.SelectedClusterId);

            Console.WriteLine("=============== End of process, hit any key to finish ===============");
            Console.ReadKey();
        }
コード例 #3
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            DataLoader dataLoader       = new DataLoader(mlContext);
            var        trainingDataView = dataLoader.GetDataView(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessor       = new DataProcessor(mlContext);
            var dataProcessPipeline = dataProcessor.DataProcessPipeline;

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Set the selected training algorithm into the modelBuilder
            var modelBuilder = new Common.ModelBuilder <GitHubIssue, GitHubIssuePrediction>(mlContext, dataProcessPipeline);
            var trainer      = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features");

            modelBuilder.AddTrainer(trainer);
            modelBuilder.AddEstimator(new KeyToValueEstimator(mlContext, "PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValResults = modelBuilder.CrossValidateAndEvaluateMulticlassClassificationModel(trainingDataView, 6, "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics("SdcaMultiClassTrainer", crossValResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            // (OPTIONAL) Try/test a single prediction by loding the model from the file, first.
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "Entity Framework crashes", Description = "When connecting to the database, EF is crashing"
            };
            var modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(mlContext);

            modelScorer.LoadModelFromZipFile(ModelPath);
            var prediction = modelScorer.PredictSingle(issue);

            Console.WriteLine($"=============== Single Prediction - Result: {prediction.Area} ===============");
            //

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }
コード例 #4
0
        public static void VisualizeSomePredictions(MLContext mlContext,
                                                    string modelName,
                                                    string testDataLocation,
                                                    ModelScorer <DemandObservation, DemandPrediction> modelScorer,
                                                    int numberOfPredictions)
        {
            //Make a few prediction tests
            // Make the provided number of predictions and compare with observed data from the test dataset
            var testData = ReadSampleDataFromCsvFile(testDataLocation, numberOfPredictions);

            for (int i = 0; i < numberOfPredictions; i++)
            {
                var prediction = modelScorer.PredictSingle(testData[i]);

                Common.ConsoleHelper.PrintRegressionPredictionVersusObserved(prediction.PredictedCount.ToString(),
                                                                             testData[i].Count.ToString());
            }
        }
コード例 #5
0
        public static void BuildAndTrainModel(string DataSetLocation, string ModelPath, MyTrainerStrategy selectedStrategy)
        {
            // Create MLContext to be shared across the model creation workflow objects
            // Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext(seed: 0);

            // STEP 1: Common data loading configuration
            var textLoader       = GitHubLabelerTextLoaderFactory.CreateTextLoader(mlContext);
            var trainingDataView = textLoader.Read(DataSetLocation);

            // STEP 2: Common data process configuration with pipeline data transformations
            var dataProcessPipeline = GitHubLabelerDataProcessPipelineFactory.CreateDataProcessPipeline(mlContext);

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            Common.ConsoleHelper.PeekDataViewInConsole <GitHubIssue>(mlContext, trainingDataView, dataProcessPipeline, 2);
            //Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 2);

            // STEP 3: Create the selected training algorithm/trainer
            IEstimator <ITransformer> trainer = null;

            switch (selectedStrategy)
            {
            case MyTrainerStrategy.SdcaMultiClassTrainer:
                trainer = mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label,
                                                                                                     DefaultColumnNames.Features);
                break;

            case MyTrainerStrategy.OVAAveragedPerceptronTrainer:
            {
                // Create a binary classification trainer.
                var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(DefaultColumnNames.Label,
                                                                                                                 DefaultColumnNames.Features,
                                                                                                                 numIterations: 10);
                // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer.
                // In this strategy, a binary classification algorithm is used to train one classifier for each class, "
                // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, "
                // and choosing the prediction with the highest confidence score.
                trainer = new Ova(mlContext, averagedPerceptronBinaryTrainer);
                break;
            }

            default:
                break;
            }

            //Set the trainer/algorithm
            var modelBuilder = new Common.ModelBuilder <GitHubIssue, GitHubIssuePrediction>(mlContext, dataProcessPipeline);

            modelBuilder.AddTrainer(trainer);
            modelBuilder.AddEstimator(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate)
            // in order to evaluate and get the model's accuracy metrics
            Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ===============");
            var crossValResults = modelBuilder.CrossValidateAndEvaluateMulticlassClassificationModel(trainingDataView, 6, "Label");

            ConsoleHelper.PrintMulticlassClassificationFoldsAverageMetrics(trainer.ToString(), crossValResults);

            // STEP 5: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            modelBuilder.Train(trainingDataView);

            // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model)
            GitHubIssue issue = new GitHubIssue()
            {
                ID = "Any-ID", Title = "WebSockets communication is slow in my machine", Description = "The WebSockets communication used under the covers by SignalR looks like is going slow in my development machine.."
            };
            var modelScorer = new ModelScorer <GitHubIssue, GitHubIssuePrediction>(mlContext, modelBuilder.TrainedModel);
            var prediction  = modelScorer.PredictSingle(issue);

            Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.Area} ===============");
            //

            // STEP 6: Save/persist the trained model to a .ZIP file
            Console.WriteLine("=============== Saving the model to a file ===============");
            modelBuilder.SaveModelAsFile(ModelPath);

            Common.ConsoleHelper.ConsoleWriteHeader("Training process finalized");
        }