public void IncorrectLabelColumnThrows() { var dataPath = DatasetUtil.GetUciAdultDataset(); var context = new MLContext(1); Assert.Throws <ArgumentException>(new System.Action(() => context.Auto().InferColumns(dataPath, "Junk", groupColumns: false))); }
public void AutoFitImageClassificationTrainTest() { var context = new MLContext(seed: 1); var datasetPath = DatasetUtil.GetFlowersDataset(); var columnInference = context.Auto().InferColumns(datasetPath, "Label"); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = context.Data.ShuffleRows(textLoader.Load(datasetPath), seed: 1); var originalColumnNames = trainData.Schema.Select(c => c.Name); TrainTestData trainTestData = context.Data.TrainTestSplit(trainData, testFraction: 0.2, seed: 1); IDataView trainDataset = SplitUtil.DropAllColumnsExcept(context, trainTestData.TrainSet, originalColumnNames); IDataView testDataset = SplitUtil.DropAllColumnsExcept(context, trainTestData.TestSet, originalColumnNames); var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainDataset, testDataset, columnInference.ColumnInformation); //Known issue, where on Ubuntu there is degradation in accuracy. if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX))) { Assert.Equal(0.778, result.BestRun.ValidationMetrics.MicroAccuracy, 3); } else { Assert.Equal(1, result.BestRun.ValidationMetrics.MicroAccuracy, 3); } var scoredData = result.BestRun.Model.Transform(trainData); Assert.Equal(TextDataViewType.Instance, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type); }
public void GetNextPipeline() { var context = new MLContext(1); var uciAdult = DatasetUtil.GetUciAdultDataView(); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(context, uciAdult, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // get next pipeline var pipeline = PipelineSuggester.GetNextPipeline(context, new List <PipelineScore>(), columns, TaskKind.BinaryClassification); // serialize & deserialize pipeline var serialized = JsonConvert.SerializeObject(pipeline); Console.WriteLine(serialized); var deserialized = JsonConvert.DeserializeObject <Pipeline>(serialized); // run pipeline var estimator = deserialized.ToEstimator(context); var scoredData = estimator.Fit(uciAdult).Transform(uciAdult); var score = context.BinaryClassification.EvaluateNonCalibrated(scoredData).Accuracy; var result = new PipelineScore(deserialized, score, true); Assert.NotNull(result); }
public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. // BinaryClassificationExperiment includes LightGBM, which is not 32-bit // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), "Training process was not successfully canceled after maximum experiment time was reached."); // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); } }
public void AutoFeaturizer_iris_test() { var context = new MLContext(1); var dataset = DatasetUtil.GetIrisDataView(); var pipeline = context.Auto().Featurizer(dataset, excludeColumns: new[] { "Label" }); Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions)); }
public void AutoFeaturizer_uci_adult_test() { var context = new MLContext(1); var dataset = DatasetUtil.GetUciAdultDataView(); var pipeline = context.Auto().Featurizer(dataset, outputColumnName: "OutputFeature", excludeColumns: new[] { "Label" }); Approvals.Verify(JsonSerializer.Serialize(pipeline, _jsonSerializerOptions)); }
public void IdentifyLabelColumnThroughIndexWithHeader() { var result = new MLContext().Auto().InferColumns(DatasetUtil.DownloadUciAdultDataset(), 14, hasHeader: true); Assert.True(result.TextLoaderOptions.HasHeader); var labelCol = result.TextLoaderOptions.Columns.First(c => c.Source[0].Min == 14 && c.Source[0].Max == 14); Assert.Equal("hours-per-week", labelCol.Name); Assert.Equal("hours-per-week", result.ColumnInformation.LabelColumnName); }
public void AutoFitRankingTest() { string labelColumnName = "Label"; string scoreColumnName = "Score"; string groupIdColumnName = "GroupId"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; var mlContext = new MLContext(1); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset())); var testDataView = mlContext.Data.TakeRows(trainDataView, 500); trainDataView = mlContext.Data.SkipRows(trainDataView, 500); // STEP 2: Run AutoML experiment var experiment = mlContext.Auto() .CreateRankingExperiment(5); ExperimentResult <RankingMetrics>[] experimentResults = { experiment.Execute(trainDataView, labelColumnName, groupIdColumnName), experiment.Execute(trainDataView, testDataView), experiment.Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName, }), experiment.Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName, SamplingKeyColumnName = groupIdColumnName }) }; for (int i = 0; i < experimentResults.Length; i++) { RunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun; Assert.True(experimentResults[i].RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(bestRun.ValidationMetrics.NormalizedDiscountedCumulativeGains.Last() > 0.4); Assert.True(bestRun.ValidationMetrics.DiscountedCumulativeGains.Last() > 20); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } } }
public void IdentifyLabelColumnThroughIndexWithoutHeader() { var result = new MLContext().Auto().InferColumns(DatasetUtil.DownloadIrisDataset(), DatasetUtil.IrisDatasetLabelColIndex); Assert.False(result.TextLoaderOptions.HasHeader); var labelCol = result.TextLoaderOptions.Columns.First(c => c.Source[0].Min == DatasetUtil.IrisDatasetLabelColIndex && c.Source[0].Max == DatasetUtil.IrisDatasetLabelColIndex); Assert.Equal(DefaultColumnNames.Label, labelCol.Name); Assert.Equal(DefaultColumnNames.Label, result.ColumnInformation.LabelColumnName); }
public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. // BinaryClassificationExperiment includes LightGBM, which is not 32-bit // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { var expectedExceptionMessage = "Operation was canceled"; var lastException = experiment.RunDetails.Last().Exception; var containsMessage = lastException.Message.Contains(expectedExceptionMessage); if (lastException is AggregateException lastAggregateException) { // Sometimes multiple threads might throw the same "Operation was cancelled" // exception and all of them are grouped inside an AggregateException // Must check that all exceptions are the expected one. containsMessage = true; foreach (var ex in lastAggregateException.Flatten().InnerExceptions) { if (!ex.Message.Contains(expectedExceptionMessage)) { containsMessage = false; } } } Assert.True(containsMessage, $"Did not obtain '{expectedExceptionMessage}' error." + $"Obtained unexpected error of type {lastException.GetType()} with message: {lastException.Message}"); // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so setting the internal maxModels parameter. int maxModels = culture == "en-US" ? 1 : 75; var experimentSettings = new RegressionExperimentSettings { MaxModels = maxModels }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.99); // Test the internal maxModels parameter Assert.True(culture == "en-US" || result.RunDetails.Count() == 75, $"RunDetails.Count() = {result.RunDetails.Count()}, is not 75"); } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFit_UCI_Adult_CrossValidation_10_Test() { var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var result = context.Auto() .CreateBinaryClassificationExperiment(1) .Execute(trainData, 10, DatasetUtil.UciAdultLabel); Assert.True(result.BestRun.Results.Select(x => x.ValidationMetrics.Accuracy).Min() > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.TrainerName); }
public void UnGroupReturnsMoreColumnsThanGroup() { var dataPath = DatasetUtil.DownloadUciAdultDataset(); var context = new MLContext(); var columnInferenceWithoutGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: false); foreach (var col in columnInferenceWithoutGrouping.TextLoaderOptions.Columns) { Assert.False(col.Source.Length > 1 || col.Source[0].Min != col.Source[0].Max); } var columnInferenceWithGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: true); Assert.True(columnInferenceWithGrouping.TextLoaderOptions.Columns.Count() < columnInferenceWithoutGrouping.TextLoaderOptions.Columns.Count()); }
public void InferColumnsColumnInfoParam() { var columnInfo = new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }; var result = new MLContext().Auto().InferColumns(DatasetUtil.DownloadMlNetGeneratedRegressionDataset(), columnInfo); var labelCol = result.TextLoaderOptions.Columns.First(c => c.Name == DatasetUtil.MlNetGeneratedRegressionLabel); Assert.Equal(DataKind.Single, labelCol.DataKind); Assert.Equal(DatasetUtil.MlNetGeneratedRegressionLabel, result.ColumnInformation.LabelColumnName); Assert.Single(result.ColumnInformation.NumericColumnNames); Assert.Equal(DefaultColumnNames.Features, result.ColumnInformation.NumericColumnNames.First()); Assert.Null(result.ColumnInformation.ExampleWeightColumnName); }
public void AutoFit_UCI_Adult_Train_Test_Split_Test() { var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var dataTrainTest = context.Data.TrainTestSplit(trainData); var result = context.Auto() .CreateBinaryClassificationExperiment(1) .Execute(dataTrainTest.TrainSet, dataTrainTest.TestSet, DatasetUtil.UciAdultLabel); Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.Model); Assert.NotNull(result.BestRun.TrainerName); }
public void GetNextPipelineMock() { var context = new MLContext(1); var uciAdult = DatasetUtil.GetUciAdultDataView(); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(context, uciAdult, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Get next pipeline loop var history = new List <PipelineScore>(); var task = TaskKind.BinaryClassification; var maxIterations = 60; for (var i = 0; i < maxIterations; i++) { // Get next pipeline var pipeline = PipelineSuggester.GetNextPipeline(context, history, columns, task, ((IChannelProvider)context).Start("AutoMLTest")); if (pipeline == null) { break; } var result = new PipelineScore(pipeline, AutoMlUtils.Random.Value.NextDouble(), true); history.Add(result); } Assert.Equal(maxIterations, history.Count); // Get all 'Stage 1' and 'Stage 2' runs from Pipeline Suggester var allAvailableTrainers = RecipeInference.AllowedTrainers(context, task, new ColumnInformation(), null); var stage1Runs = history.Take(allAvailableTrainers.Count()); var stage2Runs = history.Skip(allAvailableTrainers.Count()); // Get the trainer names from top 3 Stage 1 runs var topStage1Runs = stage1Runs.OrderByDescending(r => r.Score).Take(3); var topStage1TrainerNames = topStage1Runs.Select(r => r.Pipeline.Nodes.Last().Name); // Get unique trainer names from Stage 2 runs var stage2TrainerNames = stage2Runs.Select(r => r.Pipeline.Nodes.Last().Name).Distinct(); // Assert that are only 3 unique trainers used in stage 2 Assert.Equal(3, stage2TrainerNames.Count()); // Assert that all trainers in stage 2 were the top trainers from stage 1 Assert.False(topStage1TrainerNames.Except(stage2TrainerNames).Any()); }
public void AutoFitRankingTest() { string labelColumnName = "Label"; string scoreColumnName = "Score"; string groupIdColumnName = "CustomGroupId"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; var mlContext = new MLContext(1); // STEP 1: Load data var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(new MultiFileSource(DatasetUtil.GetMLSRDataset())); var testDataView = mlContext.Data.TakeRows(trainDataView, 500); trainDataView = mlContext.Data.SkipRows(trainDataView, 500); // STEP 2: Run AutoML experiment ExperimentResult <RankingMetrics> experimentResult = mlContext.Auto() .CreateRankingExperiment(new RankingExperimentSettings() { GroupIdColumnName = "CustomGroupId", MaxExperimentTimeInSeconds = 5 }) .Execute(trainDataView, testDataView, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName }); RunDetail <RankingMetrics> bestRun = experimentResult.BestRun; Assert.True(experimentResult.RunDetails.Count() > 0); Assert.NotNull(bestRun.ValidationMetrics); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > .5)); Assert.True(experimentResult.RunDetails.Max(i => i.ValidationMetrics.DiscountedCumulativeGains.Max() > 34)); var outputSchema = bestRun.Model.GetOutputSchema(trainDataView.Schema); var expectedOutputNames = new string[] { labelColumnName, groupIdColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB, "Features", scoreColumnName }; foreach (var col in outputSchema) { Assert.True(col.Name == expectedOutputNames[col.Index]); } }
public void AutoFitBinaryTest() { var context = new MLContext(); var dataPath = DatasetUtil.DownloadUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var result = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.Model); Assert.NotNull(result.BestRun.TrainerName); }
public void AutoFitRankingCVTest() { string labelColumnName = "Label"; string groupIdColumnName = "GroupIdCustom"; string featuresColumnVectorNameA = "FeatureVectorA"; string featuresColumnVectorNameB = "FeatureVectorB"; uint numFolds = 3; var mlContext = new MLContext(1); var reader = new TextLoader(mlContext, GetLoaderArgsRank(labelColumnName, groupIdColumnName, featuresColumnVectorNameA, featuresColumnVectorNameB)); var trainDataView = reader.Load(DatasetUtil.GetMLSRDataset()); // Take less than 1500 rows of data to satisfy CrossValSummaryRunner's // limit. trainDataView = mlContext.Data.TakeRows(trainDataView, 1499); var experiment = mlContext.Auto() .CreateRankingExperiment(5); CrossValidationExperimentResult <RankingMetrics>[] experimentResults = { experiment.Execute(trainDataView, numFolds, new ColumnInformation() { LabelColumnName = labelColumnName, GroupIdColumnName = groupIdColumnName }), experiment.Execute(trainDataView, numFolds,labelColumnName, groupIdColumnName) }; for (int i = 0; i < experimentResults.Length; i++) { CrossValidationRunDetail <RankingMetrics> bestRun = experimentResults[i].BestRun; Assert.True(experimentResults[i].RunDetails.Count() > 0); var enumerator = bestRun.Results.GetEnumerator(); while (enumerator.MoveNext()) { var model = enumerator.Current; Assert.True(model.ValidationMetrics.NormalizedDiscountedCumulativeGains.Max() > 0.31); Assert.True(model.ValidationMetrics.DiscountedCumulativeGains.Max() > 15); } } }
public void AutoFitContextLogTest() { // This test confirms that logs produced from contexts made during AutoML experiment // runs are correctly relayed to the main Experiment MLContext. _markerAutoFitContextLogTest = false; var context = new MLContext(1); context.Log += MlContextLog; var datasetPath = DatasetUtil.GetFlowersDataset(); var columnInference = context.Auto().InferColumns(datasetPath, "Label"); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(datasetPath); var result = context.Auto() .CreateMulticlassClassificationExperiment(15) .Execute(trainData, columnInference.ColumnInformation); Assert.True(_markerAutoFitContextLogTest, "Image classification trainer logs from Experiment's sub contexts" + "were not relayed to the main MLContext."); }
public void AutoFitRegressionTest() { var context = new MLContext(); var dataPath = DatasetUtil.DownloadMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(0) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); }
public void AutoFitImageClassification() { // This test executes the code path that model builder code will take to get a model using image // classification API. var context = new MLContext(1); context.Log += Context_Log; var datasetPath = DatasetUtil.GetFlowersDataset(); var columnInference = context.Auto().InferColumns(datasetPath, "Label"); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(datasetPath); var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainData, columnInference.ColumnInformation); Assert.InRange(result.BestRun.ValidationMetrics.MicroAccuracy, 0.80, 0.9); var scoredData = result.BestRun.Model.Transform(trainData); Assert.Equal(TextDataViewType.Instance, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type); }
public void AutoFitRegressionTest(string culture) { var originalCulture = Thread.CurrentThread.CurrentCulture; try { Thread.CurrentThread.CurrentCulture = new CultureInfo(culture); // If users run AutoML with a different locale, sometimes // the sweeper encounters problems when parsing some strings. // So testing in another culture is necessary. // Furthermore, these issues might only occur after ~70 // iterations, so more experiment time is needed for this to // occur. uint experimentTime = (uint)(culture == "en-US" ? 0 : 180); var experimentSettings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = experimentTime }; if (!Environment.Is64BitProcess) { // LightGBM isn't available on x86 machines experimentSettings.Trainers.Remove(RegressionTrainer.LightGbm); } var context = new MLContext(1); var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); var result = context.Auto() .CreateRegressionExperiment(experimentSettings) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); Assert.True(result.RunDetails.Max(i => i?.ValidationMetrics?.RSquared) > 0.9); // Ensure experimentTime allows enough iterations to fully test the internationalization code // If the below assertion fails, increase the experiment time so the number of iterations is met Assert.True(culture == "en-US" || result.RunDetails.Count() >= 75, $"RunDetails.Count() = {result.RunDetails.Count()}, below 75"); } catch (AggregateException ae) { // During CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests and throw OperationCanceledExceptions // from multiple threads in the form of a single AggregateException. foreach (var ex in ae.Flatten().InnerExceptions) { var ignoredExceptions = new List <Exception>(); if (ex is OperationCanceledException) { continue; } else { ignoredExceptions.Add(ex); } if (ignoredExceptions.Count > 0) { throw new AggregateException(ignoredExceptions); } } } finally { Thread.CurrentThread.CurrentCulture = originalCulture; } }
public void AutoFitWithPresplittedData() { // Models created in AutoML should work over the same data, // no matter how that data is splitted before passing it to the experiment execution // or to the model for prediction var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var dataFull = textLoader.Load(dataPath); var dataTrainTest = context.Data.TrainTestSplit(dataFull); var dataCV = context.Data.CrossValidationSplit(dataFull, numberOfFolds: 2); var modelFull = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(dataFull, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }) .BestRun .Model; var modelTrainTest = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(dataTrainTest.TrainSet, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }) .BestRun .Model; var modelCV = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(dataCV.First().TrainSet, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }) .BestRun .Model; var models = new[] { modelFull, modelTrainTest, modelCV }; foreach (var model in models) { var resFull = model.Transform(dataFull); var resTrainTest = model.Transform(dataTrainTest.TrainSet); var resCV = model.Transform(dataCV.First().TrainSet); Assert.Equal(30, resFull.Schema.Count); Assert.Equal(30, resTrainTest.Schema.Count); Assert.Equal(30, resCV.Schema.Count); foreach (var col in resFull.Schema) { Assert.Equal(col.Name, resTrainTest.Schema[col.Index].Name); Assert.Equal(col.Name, resCV.Schema[col.Index].Name); } } }
public void ValidateInferColsPath() { UserInputValidationUtil.ValidateInferColumnsArgs(DatasetUtil.GetUciAdultDataset()); }
public void LabelIndexOutOfBoundsThrows() { Assert.Throws <ArgumentOutOfRangeException>(() => new MLContext().Auto().InferColumns(DatasetUtil.DownloadUciAdultDataset(), 100)); }