public void AutoFitRankingTest() { string labelColumnName = "Label"; string scoreColumnName = "Score"; string groupIdColumnName = "GroupId"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; var mlContext = new MLContext(1); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset())); var testDataView = mlContext.Data.TakeRows(trainDataView, 500); trainDataView = mlContext.Data.SkipRows(trainDataView, 500); // STEP 2: Run AutoML experiment var experiment = mlContext.Auto() .CreateRankingExperiment(5); ExperimentResult <RankingMetrics>[] experimentResults = { experiment.Execute(trainDataView, labelColumnName, groupIdColumnName), experiment.Execute(trainDataView, testDataView), experiment.Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName, }), experiment.Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName, SamplingKeyColumnName = groupIdColumnName }) }; for (int i = 0; i < experimentResults.Length; i++) { RunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun; Assert.True(experimentResults[i].RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains.Last() > 0.4); Assert.True(bestRun.ValidationMetrics.DiscountedCumulativeGains.Last() > 20); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } } }
public void AutoFitRankingCVTest() { string labelColumnName = "Label"; string groupIdColumnName = "GroupIdCustom"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; uint numFolds = 3; var mlContext = new MLContext(1); var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(DatasetUtil.GetMLSRDataset()); // Take less than 1500 rows of data to satisfy CrossValSummaryRunner's // limit. trainDataView = mlContext.Data.TakeRows(trainDataView, 1499); var experiment = mlContext.Auto() .CreateRankingExperiment(5); CrossValidationExperimentResult <RankingMetrics>[] experimentResults = { experiment.Execute(trainDataView, numFolds, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName }), experiment.Execute(trainDataView, numFolds,labelColumnName, groupIdColumnName) }; for (int i = 0; i < experimentResults.Length; i++) { CrossValidationRunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun; Assert.True(experimentResults[i].RunDetails.Count() > 0); var enumerator = bestRun.Results.GetEnumerator(); while (enumerator.MoveNext()) { var model = enumerator.Current; Assert.True(model.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > 0.31); Assert.True(model.ValidationMetrics.DiscountedCumulativeGains.Max() > 15); } } }
public void AutoFitRankingTest() { string labelColumnName = "Label"; string scoreColumnName = "Score"; string groupIdColumnName = "CustomGroupId"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; var mlContext = new MLContext(1); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset())); var testDataView = mlContext.Data.TakeRows(trainDataView, 500); trainDataView = mlContext.Data.SkipRows(trainDataView, 500); // STEP 2: Run AutoML experiment ExperimentResult <RankingMetrics> experimentResult = mlContext.Auto() .CreateRankingExperiment(new RankingExperimentSettings() { GroupIdColumnName = "CustomGroupId", MaxExperimentTimeInSeconds = 5 }) .Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName }); RunDetail <RankingMetrics> bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > .5)); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.DiscountedCumulativeGains.Max() > 34)); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } }