public RankingMetrics EvaluateMetrics(IDataView data, string labelColumn, string groupIdColumn)
        {
            var rankingEvalOptions = new RankingEvaluatorOptions
            {
                DcgTruncationLevel = Math.Max(10, 2 * (int)_dcgTruncationLevel)
            };

            return(_mlContext.Ranking.Evaluate(data, rankingEvalOptions, labelColumn, groupIdColumn));
        }
        public void TrainAndEvaluateRankingWithOptions()
        {
            var mlContext = new MLContext(seed: 1);

            int[] tlevels = { 50, 150, 100 };
            var   options = new RankingEvaluatorOptions();

            foreach (int i in tlevels)
            {
                options.DcgTruncationLevel = i;
                var scoredData = GetScoredDataForRankingEvaluation(mlContext);
                var metrics    = mlContext.Ranking.Evaluate(scoredData, options, labelColumnName: "Label", rowGroupColumnName: "GroupId");
                Common.AssertMetrics(metrics);
            }
        }
示例#3
0
        static void Main(string[] args)
        {
            var context = new MLContext();

            var data = context.Data.LoadFromTextFile <RankingData>("./ranking.tsv", separatorChar: '\t');

            var split = context.Data.TrainTestSplit(data, testFraction: 0.2);

            var secondSplit = context.Data.TrainTestSplit(split.TestSet);

            //var validation = secondSplit.TrainSet;

            var sampleInput = secondSplit.TestSet;

            var rankingPipeline = context.Transforms.Conversion.MapValueToKey("Label")
                                  .Append(context.Transforms.Conversion.Hash("GroupId", "GroupId"))
                                  .Append(context.Ranking.Trainers.LightGbm());

            var model = rankingPipeline.Fit(split.TrainSet);

            var predictions = model.Transform(split.TestSet);

            var options = new RankingEvaluatorOptions
            {
                DcgTruncationLevel = 5
            };

            var metrics = context.Ranking.Evaluate(predictions, options);

            var ndcg = metrics.NormalizedDiscountedCumulativeGains.Average();

            Console.WriteLine($"nDGC - {ndcg}");
            Console.Write(Environment.NewLine);

            var batchPredictions = model.Transform(sampleInput);

            var newPredictions = context.Data.CreateEnumerable <RankingPrediction>(batchPredictions, reuseRowObject: false);

            Console.WriteLine("Scores:");
            foreach (var prediction in newPredictions)
            {
                Console.WriteLine($"{prediction.RelevanceScore}");
            }

            //Console.WriteLine($"Relevance - {prediction.Score}");
        }
示例#4
0
        /// <summary>
        /// Evaluates scored ranking data.
        /// </summary>
        /// <param name="data">The scored data.</param>
        /// <param name="options">Options to control the evaluation result.</param>
        /// <param name="labelColumnName">The name of the label column in <paramref name="data"/>.</param>
        /// <param name="rowGroupColumnName">The name of the groupId column in <paramref name="data"/>.</param>
        /// <param name="scoreColumnName">The name of the score column in <paramref name="data"/>.</param>
        /// <returns>The evaluation results for these calibrated outputs.</returns>
        public RankingMetrics Evaluate(IDataView data,
                                       RankingEvaluatorOptions options,
                                       string labelColumnName    = DefaultColumnNames.Label,
                                       string rowGroupColumnName = DefaultColumnNames.GroupId,
                                       string scoreColumnName    = DefaultColumnNames.Score)
        {
            Environment.CheckValue(data, nameof(data));
            Environment.CheckNonEmpty(labelColumnName, nameof(labelColumnName));
            Environment.CheckNonEmpty(scoreColumnName, nameof(scoreColumnName));
            Environment.CheckNonEmpty(rowGroupColumnName, nameof(rowGroupColumnName));

            var eval = new RankingEvaluator(Environment, options ?? new RankingEvaluatorOptions()
            {
            });

            return(eval.Evaluate(data, labelColumnName, rowGroupColumnName, scoreColumnName));
        }
        private static (IDataView, IDataView) BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Download and load the data
            GetData(InputPath, OutputPath, TrainDatasetPath, TrainDatasetUrl, TestDatasetUrl, TestDatasetPath,
                    ValidationDatasetUrl, ValidationDatasetPath);

            //ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDatasetPath, labelColumnIndex: 0,
            //    separatorChar: '\t', hasHeader: true, groupColumns: false, allowSparse: true);

            var textLoaderOptions = new TextLoader.Options
            {
                Separators = new[] { '\t' },
                HasHeader  = true,
                Columns    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Single, 0),
                    new TextLoader.Column("GroupId", DataKind.Int32, 1),
                    new TextLoader.Column("Features", DataKind.Single, 2, 133),
                }
            };

            TextLoader textLoader         = mlContext.Data.CreateTextLoader(textLoaderOptions);
            IDataView  trainDataView      = textLoader.Load(TrainDatasetPath);
            IDataView  validationDataView = textLoader.Load(ValidationDatasetPath);
            IDataView  testDataView       = textLoader.Load(TestDatasetPath);

            // STEP 2: Display first few rows of training data
            ConsoleHelper.ShowDataViewInConsole(mlContext, trainDataView);

            // STEP 3: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new RankingExperimentProgressHandler();

            // STEP 4: Run AutoML ranking experiment
            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML ranking experiment for {ExperimentTime} seconds...");

            var experimentSettings = new RankingExperimentSettings
            {
                MaxExperimentTimeInSeconds        = ExperimentTime,
                OptimizingMetric                  = RankingMetric.Ndcg,
                OptimizationMetricTruncationLevel = 10
            };

            ExperimentResult <RankingMetrics> experimentResult = mlContext.Auto()
                                                                 .CreateRankingExperiment(experimentSettings)
                                                                 .Execute(
                trainData: trainDataView,
                validationData: validationDataView,
                progressHandler: progressHandler);

            // Print top models found by AutoML
            Console.WriteLine("\n===== Evaluating model's NDCG (on validation data) =====");
            PrintTopModels(experimentResult, experimentSettings.OptimizationMetricTruncationLevel);

            var rankingEvaluatorOptions = new RankingEvaluatorOptions
            {
                DcgTruncationLevel = Math.Min(10, (int)experimentSettings.OptimizationMetricTruncationLevel * 2)
            };

            Console.WriteLine("\n===== Evaluating model's NDCG (on test data) =====");
            IDataView predictions = experimentResult.BestRun.Model.Transform(testDataView);
            var       metrics     = mlContext.Ranking.Evaluate(predictions, rankingEvaluatorOptions);

            ConsoleHelper.PrintRankingMetrics(experimentResult.BestRun.TrainerName, metrics, experimentSettings.OptimizationMetricTruncationLevel);

            // STEP 5: Refit the model and get final metrics
            // Re-fit best pipeline on train and validation data, to produce
            // a model that is trained on as much data as is available while
            // still having test data for the final estimate of how well the
            // model will do in production.
            Console.WriteLine("\n===== Refitting on train+valid and evaluating model's NDCG (on test data) =====");
            var       trainPlusValidationDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath));
            var       refitModel = experimentResult.BestRun.Estimator.Fit(trainPlusValidationDataView);
            IDataView predictionsRefitOnTrainPlusValidation = refitModel.Transform(testDataView);
            var       metricsRefitOnTrainPlusValidation     = mlContext.Ranking.Evaluate(predictionsRefitOnTrainPlusValidation, rankingEvaluatorOptions);

            ConsoleHelper.PrintRankingMetrics(experimentResult.BestRun.TrainerName, metricsRefitOnTrainPlusValidation, experimentSettings.OptimizationMetricTruncationLevel);

            // STEP 6: Refit the model with all available data
            // Re-fit best pipeline again on train, validation, and test data, to
            // produce a model that is trained on as much data as is available.
            // This is the final model that can be deployed to production.
            // No metrics are printed since we no longer have an independent
            // scoring dataset.
            Console.WriteLine("\n===== Refitting on train+valid+test to get the final model to launch to production =====");
            var trainPlusValidationPlusTestDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath, TestDatasetPath));
            var refitModelOnTrainValidTest          = experimentResult.BestRun.Estimator.Fit(trainPlusValidationPlusTestDataView);

            // STEP 7: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(refitModelOnTrainValidTest, trainDataView.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(predictionsRefitOnTrainPlusValidation, testDataView);
        }