public RankingMetrics EvaluateMetrics(IDataView data, string labelColumn, string groupIdColumn) { var rankingEvalOptions = new RankingEvaluatorOptions { DcgTruncationLevel = Math.Max(10, 2 * (int)_dcgTruncationLevel) }; return(_mlContext.Ranking.Evaluate(data, rankingEvalOptions, labelColumn, groupIdColumn)); }
public void TrainAndEvaluateRankingWithOptions() { var mlContext = new MLContext(seed: 1); int[] tlevels = { 50, 150, 100 }; var options = new RankingEvaluatorOptions(); foreach (int i in tlevels) { options.DcgTruncationLevel = i; var scoredData = GetScoredDataForRankingEvaluation(mlContext); var metrics = mlContext.Ranking.Evaluate(scoredData, options, labelColumnName: "Label", rowGroupColumnName: "GroupId"); Common.AssertMetrics(metrics); } }
static void Main(string[] args) { var context = new MLContext(); var data = context.Data.LoadFromTextFile <RankingData>("./ranking.tsv", separatorChar: '\t'); var split = context.Data.TrainTestSplit(data, testFraction: 0.2); var secondSplit = context.Data.TrainTestSplit(split.TestSet); //var validation = secondSplit.TrainSet; var sampleInput = secondSplit.TestSet; var rankingPipeline = context.Transforms.Conversion.MapValueToKey("Label") .Append(context.Transforms.Conversion.Hash("GroupId", "GroupId")) .Append(context.Ranking.Trainers.LightGbm()); var model = rankingPipeline.Fit(split.TrainSet); var predictions = model.Transform(split.TestSet); var options = new RankingEvaluatorOptions { DcgTruncationLevel = 5 }; var metrics = context.Ranking.Evaluate(predictions, options); var ndcg = metrics.NormalizedDiscountedCumulativeGains.Average(); Console.WriteLine($"nDGC - {ndcg}"); Console.Write(Environment.NewLine); var batchPredictions = model.Transform(sampleInput); var newPredictions = context.Data.CreateEnumerable <RankingPrediction>(batchPredictions, reuseRowObject: false); Console.WriteLine("Scores:"); foreach (var prediction in newPredictions) { Console.WriteLine($"{prediction.RelevanceScore}"); } //Console.WriteLine($"Relevance - {prediction.Score}"); }
/// <summary> /// Evaluates scored ranking data. /// </summary> /// <param name="data">The scored data.</param> /// <param name="options">Options to control the evaluation result.</param> /// <param name="labelColumnName">The name of the label column in <paramref name="data"/>.</param> /// <param name="rowGroupColumnName">The name of the groupId column in <paramref name="data"/>.</param> /// <param name="scoreColumnName">The name of the score column in <paramref name="data"/>.</param> /// <returns>The evaluation results for these calibrated outputs.</returns> public RankingMetrics Evaluate(IDataView data, RankingEvaluatorOptions options, string labelColumnName = DefaultColumnNames.Label, string rowGroupColumnName = DefaultColumnNames.GroupId, string scoreColumnName = DefaultColumnNames.Score) { Environment.CheckValue(data, nameof(data)); Environment.CheckNonEmpty(labelColumnName, nameof(labelColumnName)); Environment.CheckNonEmpty(scoreColumnName, nameof(scoreColumnName)); Environment.CheckNonEmpty(rowGroupColumnName, nameof(rowGroupColumnName)); var eval = new RankingEvaluator(Environment, options ?? new RankingEvaluatorOptions() { }); return(eval.Evaluate(data, labelColumnName, rowGroupColumnName, scoreColumnName)); }
private static (IDataView, IDataView) BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Download and load the data GetData(InputPath, OutputPath, TrainDatasetPath, TrainDatasetUrl, TestDatasetUrl, TestDatasetPath, ValidationDatasetUrl, ValidationDatasetPath); //ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDatasetPath, labelColumnIndex: 0, // separatorChar: '\t', hasHeader: true, groupColumns: false, allowSparse: true); var textLoaderOptions = new TextLoader.Options { Separators = new[] { '\t' }, HasHeader = true, Columns = new[] { new TextLoader.Column("Label", DataKind.Single, 0), new TextLoader.Column("GroupId", DataKind.Int32, 1), new TextLoader.Column("Features", DataKind.Single, 2, 133), } }; TextLoader textLoader = mlContext.Data.CreateTextLoader(textLoaderOptions); IDataView trainDataView = textLoader.Load(TrainDatasetPath); IDataView validationDataView = textLoader.Load(ValidationDatasetPath); IDataView testDataView = textLoader.Load(TestDatasetPath); // STEP 2: Display first few rows of training data ConsoleHelper.ShowDataViewInConsole(mlContext, trainDataView); // STEP 3: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RankingExperimentProgressHandler(); // STEP 4: Run AutoML ranking experiment ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML ranking experiment for {ExperimentTime} seconds..."); var experimentSettings = new RankingExperimentSettings { MaxExperimentTimeInSeconds = ExperimentTime, OptimizingMetric = RankingMetric.Ndcg, OptimizationMetricTruncationLevel = 10 }; ExperimentResult <RankingMetrics> experimentResult = mlContext.Auto() .CreateRankingExperiment(experimentSettings) .Execute( trainData: trainDataView, validationData: validationDataView, progressHandler: progressHandler); // Print top models found by AutoML Console.WriteLine("\n===== Evaluating model's NDCG (on validation data) ====="); PrintTopModels(experimentResult, experimentSettings.OptimizationMetricTruncationLevel); var rankingEvaluatorOptions = new RankingEvaluatorOptions { DcgTruncationLevel = Math.Min(10, (int)experimentSettings.OptimizationMetricTruncationLevel * 2) }; Console.WriteLine("\n===== Evaluating model's NDCG (on test data) ====="); IDataView predictions = experimentResult.BestRun.Model.Transform(testDataView); var metrics = mlContext.Ranking.Evaluate(predictions, rankingEvaluatorOptions); ConsoleHelper.PrintRankingMetrics(experimentResult.BestRun.TrainerName, metrics, experimentSettings.OptimizationMetricTruncationLevel); // STEP 5: Refit the model and get final metrics // Re-fit best pipeline on train and validation data, to produce // a model that is trained on as much data as is available while // still having test data for the final estimate of how well the // model will do in production. Console.WriteLine("\n===== Refitting on train+valid and evaluating model's NDCG (on test data) ====="); var trainPlusValidationDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath)); var refitModel = experimentResult.BestRun.Estimator.Fit(trainPlusValidationDataView); IDataView predictionsRefitOnTrainPlusValidation = refitModel.Transform(testDataView); var metricsRefitOnTrainPlusValidation = mlContext.Ranking.Evaluate(predictionsRefitOnTrainPlusValidation, rankingEvaluatorOptions); ConsoleHelper.PrintRankingMetrics(experimentResult.BestRun.TrainerName, metricsRefitOnTrainPlusValidation, experimentSettings.OptimizationMetricTruncationLevel); // STEP 6: Refit the model with all available data // Re-fit best pipeline again on train, validation, and test data, to // produce a model that is trained on as much data as is available. // This is the final model that can be deployed to production. // No metrics are printed since we no longer have an independent // scoring dataset. Console.WriteLine("\n===== Refitting on train+valid+test to get the final model to launch to production ====="); var trainPlusValidationPlusTestDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath, TestDatasetPath)); var refitModelOnTrainValidTest = experimentResult.BestRun.Estimator.Fit(trainPlusValidationPlusTestDataView); // STEP 7: Save/persist the trained model to a .ZIP file mlContext.Model.Save(refitModelOnTrainValidTest, trainDataView.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(predictionsRefitOnTrainPlusValidation, testDataView); }