Exemplo n.º 1
0
        public void AutoFitRankingTest()
        {
            string labelColumnName           = "Label";
            string scoreColumnName           = "Score";
            string groupIdColumnName         = "GroupId";
            string featuresColumnVectorNameA = "FeatureVectorA";
            string featuresColumnVectorNameB = "FeatureVectorB";
            var    mlContext = new MLContext(1);

            // STEP 1: Load data
            var reader        = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB));
            var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset()));
            var testDataView  = mlContext.Data.TakeRows(trainDataView, 500);

            trainDataView = mlContext.Data.SkipRows(trainDataView, 500);

            // STEP 2: Run AutoML experiment
            var experiment = mlContext.Auto()
                             .CreateRankingExperiment(5);

            ExperimentResult <RankingMetrics>[] experimentResults =
            {
                experiment.Execute(trainDataView, labelColumnName, groupIdColumnName),
                experiment.Execute(trainDataView, testDataView),
                experiment.Execute(trainDataView, testDataView,
                                   new ColumnInformation()
                {
                    LabelColumnName   = labelColumnName,
                    GroupIdColumnName = groupIdColumnName,
                }),
                experiment.Execute(trainDataView, testDataView,
                                   new ColumnInformation()
                {
                    LabelColumnName       = labelColumnName,
                    GroupIdColumnName     = groupIdColumnName,
                    SamplingKeyColumnName = groupIdColumnName
                })
            };

            for (int i = 0; i < experimentResults.Length; i++)
            {
                RunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun;
                Assert.True(experimentResults[i].RunDetails.Count() > 0);
                Assert.NotNull(bestRun.ValidationMetrics);
                Assert.True(bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains.Last() > 0.4);
                Assert.True(bestRun.ValidationMetrics.DiscountedCumulativeGains.Last() > 20);
                var outputSchema        = bestRun.Model.GetOutputSchema(trainDataView.Schema);
                var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB,
                                                         "Features", scoreColumnName };
                foreach (var col in outputSchema)
                {
                    Assert.True(col.Name == expectedOutputNames[col.Index]);
                }
            }
        }
Exemplo n.º 2
0
        public void AutoFitRankingCVTest()
        {
            string labelColumnName           = "Label";
            string groupIdColumnName         = "GroupIdCustom";
            string featuresColumnVectorNameA = "FeatureVectorA";
            string featuresColumnVectorNameB = "FeatureVectorB";
            uint   numFolds = 3;

            var mlContext = new MLContext(1);
            var reader    = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName,
                                                                        featuresColumnVectorNameA, featuresColumnVectorNameB));
            var trainDataView = reader.Load(DatasetUtil.GetMLSRDataset());

            // Take less than 1500 rows of data to satisfy CrossValSummaryRunner's
            // limit.
            trainDataView = mlContext.Data.TakeRows(trainDataView, 1499);

            var experiment = mlContext.Auto()
                             .CreateRankingExperiment(5);

            CrossValidationExperimentResult <RankingMetrics>[] experimentResults =
            {
                experiment.Execute(trainDataView, numFolds,
                                   new ColumnInformation()
                {
                    LabelColumnName   = labelColumnName,
                    GroupIdColumnName = groupIdColumnName
                }),
                experiment.Execute(trainDataView, numFolds,labelColumnName, groupIdColumnName)
            };
            for (int i = 0; i < experimentResults.Length; i++)
            {
                CrossValidationRunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun;
                Assert.True(experimentResults[i].RunDetails.Count() > 0);
                var enumerator = bestRun.Results.GetEnumerator();
                while (enumerator.MoveNext())
                {
                    var model = enumerator.Current;
                    Assert.True(model.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > 0.31);
                    Assert.True(model.ValidationMetrics.DiscountedCumulativeGains.Max() > 15);
                }
            }
        }
Exemplo n.º 3
0
        public void AutoFitRankingTest()
        {
            string labelColumnName           = "Label";
            string scoreColumnName           = "Score";
            string groupIdColumnName         = "CustomGroupId";
            string featuresColumnVectorNameA = "FeatureVectorA";
            string featuresColumnVectorNameB = "FeatureVectorB";
            var    mlContext = new MLContext(1);

            // STEP 1: Load data
            var reader        = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB));
            var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset()));
            var testDataView  = mlContext.Data.TakeRows(trainDataView, 500);

            trainDataView = mlContext.Data.SkipRows(trainDataView, 500);
            // STEP 2: Run AutoML experiment
            ExperimentResult <RankingMetrics> experimentResult = mlContext.Auto()
                                                                 .CreateRankingExperiment(new RankingExperimentSettings()
            {
                GroupIdColumnName = "CustomGroupId", MaxExperimentTimeInSeconds = 5
            })
                                                                 .Execute(trainDataView, testDataView,
                                                                          new ColumnInformation()
            {
                LabelColumnName   = labelColumnName,
                GroupIdColumnName = groupIdColumnName
            });

            RunDetail <RankingMetrics> bestRun = experimentResult.BestRun;

            Assert.True(experimentResult.RunDetails.Count() > 0);
            Assert.NotNull(bestRun.ValidationMetrics);
            Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > .5));
            Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.DiscountedCumulativeGains.Max() > 34));
            var outputSchema        = bestRun.Model.GetOutputSchema(trainDataView.Schema);
            var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB,
                                                     "Features", scoreColumnName };

            foreach (var col in outputSchema)
            {
                Assert.True(col.Name == expectedOutputNames[col.Index]);
            }
        }