public void testPredictionForSingleIssue(IntentData intentData) { // Predict labels and scores for single hard-coded issue. var prediction = this._predEngine.Predict(intentData); var fullPredictions = this.getBestThreePredictions(prediction); Console.WriteLine($"==== Displaying prediction of Issue with Title = {intentData.text} ===="); Console.WriteLine("1st Label: " + fullPredictions[0].PredictedLabel + " with score: " + fullPredictions[0].Score); Console.WriteLine("2nd Label: " + fullPredictions[1].PredictedLabel + " with score: " + fullPredictions[1].Score); Console.WriteLine("3rd Label: " + fullPredictions[2].PredictedLabel + " with score: " + fullPredictions[2].Score); Console.WriteLine($"=============== Single Prediction - Result: {prediction.PredictedLabel} ==============="); }
private static void testSingleLabelPrediction() { var input = Console.ReadLine(); while (input != "exit") { var intenter = new Intenter(modelPath: modelPath, new MLContext()); var intent = new IntentData { text = input, label = string.Empty }; intenter.testPredictionForSingleIssue(intent); input = Console.ReadLine(); } }
private static void buildAndTrainModel(string dataSetLocation, string testSetLocation, string modelPath, MyTrainerStrategy selectedStrategy) { // Create MLContext to be shared across the model creation workflow objects // Set a random seed for repeatable/deterministic results across multiple trainings. var mlContext = new MLContext(seed: 1); // STEP 1: Common data loading configuration var trainingDataView = mlContext.Data.LoadFromTextFile <IntentData>(dataSetLocation, hasHeader: true, separatorChar: '\t', allowSparse: false); var testingDataView = mlContext.Data.LoadFromTextFile <IntentData>(testSetLocation, hasHeader: true, separatorChar: '\t', allowSparse: false); // STEP 2: Common data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.Conversion .MapValueToKey(outputColumnName: "label", inputColumnName: nameof(IntentData.label)) .Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "text", inputColumnName: nameof(IntentData.text))) .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", "text")) .AppendCacheCheckpoint(mlContext) ; // Use in-memory cache for small/medium datasets to lower training time. // Do NOT use it (remove .AppendCacheCheckpoint()) when handling very large datasets. // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features" ConsoleHelper.peekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 2); // STEP 3: Create the selected training algorithm/trainer IEstimator <ITransformer> trainer = null; switch (selectedStrategy) { case MyTrainerStrategy.SdcaMultiClassTrainer: trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("label", "Features"); break; case MyTrainerStrategy.OvaAveragedPerceptronTrainer: { // Create a binary classification trainer. var averagedPerceptronBinaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("label", "Features", numberOfIterations: 10); // Compose an OVA (One-Versus-All) trainer with the BinaryTrainer. // In this strategy, a binary classification algorithm is used to train one classifier for each class, " // which distinguishes that class from all other classes. Prediction is then performed by running these binary classifiers, " // and choosing the prediction with the highest confidence score. trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(averagedPerceptronBinaryTrainer, labelColumnName: "label"); break; } } //Set the trainer/algorithm and map label to value (original readable state) var trainingPipeline = dataProcessPipeline.Append(trainer).Append(mlContext.Transforms.Conversion .MapKeyToValue("PredictedLabel")) ; // STEP 4: Cross-Validate with single dataset (since we don't have two datasets, one for training and for evaluate) // in order to evaluate and get the model's accuracy metrics Console.WriteLine("=============== Cross-validating to get model's accuracy metrics ==============="); var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(data: testingDataView, estimator: trainingPipeline, numberOfFolds: 6, labelColumnName: "label"); ConsoleHelper.printMulticlassClassificationFoldsAverageMetrics(trainer?.ToString(), crossValidationResults); // STEP 5: Train the model fitting to the DataSet Console.WriteLine("=============== Training the model ==============="); var trainedModel = trainingPipeline.Fit(trainingDataView); // (OPTIONAL) Try/test a single prediction with the "just-trained model" (Before saving the model) var issue = new IntentData { text = "food" }; // Create prediction engine related to the loaded trained model var predEngine = mlContext.Model.CreatePredictionEngine <IntentData, IntentPrediction>(trainedModel); //Score var prediction = predEngine.Predict(issue); Console.WriteLine($"=============== Single Prediction just-trained-model - Result: {prediction.PredictedLabel} ==============="); // // STEP 6: Save/persist the trained model to a .ZIP file Console.WriteLine("=============== Saving the model to a file ==============="); mlContext.Model.Save(trainedModel, trainingDataView.Schema, modelPath); ConsoleHelper.consoleWriteHeader("Training process finalized"); }