public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. // BinaryClassificationExperiment includes LightGBM, which is not 32-bit // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { Assert.True(experiment.RunDetails.Last().Exception.Message.Contains("Operation was canceled"), "Training process was not successfully canceled after maximum experiment time was reached."); // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); } }
public void IncorrectLabelColumnThrows() { var dataPath = DatasetUtil.GetUciAdultDataset(); var context = new MLContext(1); Assert.Throws <ArgumentException>(new System.Action(() => context.Auto().InferColumns(dataPath, "Junk", groupColumns: false))); }
public void IdentifyLabelColumnThroughIndexWithHeader() { var result = new MLContext(1).Auto().InferColumns(DatasetUtil.GetUciAdultDataset(), 14, hasHeader: true); Assert.True(result.TextLoaderOptions.HasHeader); var labelCol = result.TextLoaderOptions.Columns.First(c => c.Source[0].Min == 14 && c.Source[0].Max == 14); Assert.Equal("hours-per-week", labelCol.Name); Assert.Equal("hours-per-week", result.ColumnInformation.LabelColumnName); }
public void AutoFitMaxExperimentTimeTest() { // A single binary classification experiment takes less than 5 seconds. // System.OperationCanceledException is thrown when ongoing experiment // is canceled and at least one model has been generated. // BinaryClassificationExperiment includes LightGBM, which is not 32-bit // compatible. var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var experiment = context.Auto() .CreateBinaryClassificationExperiment(15) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); // Ensure the (last) model that was training when maximum experiment time was reached has been stopped, // and that its MLContext has been canceled. Sometimes during CI unit testing, the host machines can run slower than normal, which // can increase the run time of unit tests, and may not produce multiple runs. if (experiment.RunDetails.Select(r => r.Exception == null).Count() > 1 && experiment.RunDetails.Last().Exception != null) { var expectedExceptionMessage = "Operation was canceled"; var lastException = experiment.RunDetails.Last().Exception; var containsMessage = lastException.Message.Contains(expectedExceptionMessage); if (lastException is AggregateException lastAggregateException) { // Sometimes multiple threads might throw the same "Operation was cancelled" // exception and all of them are grouped inside an AggregateException // Must check that all exceptions are the expected one. containsMessage = true; foreach (var ex in lastAggregateException.Flatten().InnerExceptions) { if (!ex.Message.Contains(expectedExceptionMessage)) { containsMessage = false; } } } Assert.True(containsMessage, $"Did not obtain '{expectedExceptionMessage}' error." + $"Obtained unexpected error of type {lastException.GetType()} with message: {lastException.Message}"); // Ensure that the best found model can still run after maximum experiment time was reached. IDataView predictions = experiment.BestRun.Model.Transform(trainData); } }
public void AutoFit_UCI_Adult_CrossValidation_10_Test() { var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var result = context.Auto() .CreateBinaryClassificationExperiment(1) .Execute(trainData, 10, DatasetUtil.UciAdultLabel); Assert.True(result.BestRun.Results.Select(x => x.ValidationMetrics.Accuracy).Min() > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.TrainerName); }
public void UnGroupReturnsMoreColumnsThanGroup() { var dataPath = DatasetUtil.GetUciAdultDataset(); var context = new MLContext(1); var columnInferenceWithoutGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: false); foreach (var col in columnInferenceWithoutGrouping.TextLoaderOptions.Columns) { Assert.False(col.Source.Length > 1 || col.Source[0].Min != col.Source[0].Max); } var columnInferenceWithGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: true); Assert.True(columnInferenceWithGrouping.TextLoaderOptions.Columns.Count() < columnInferenceWithoutGrouping.TextLoaderOptions.Columns.Count()); }
public void AutoFit_UCI_Adult_Train_Test_Split_Test() { var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); var dataTrainTest = context.Data.TrainTestSplit(trainData); var result = context.Auto() .CreateBinaryClassificationExperiment(1) .Execute(dataTrainTest.TrainSet, dataTrainTest.TestSet, DatasetUtil.UciAdultLabel); Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70); Assert.NotNull(result.BestRun.Estimator); Assert.NotNull(result.BestRun.Model); Assert.NotNull(result.BestRun.TrainerName); }
public void ValidateInferColsPath() { UserInputValidationUtil.ValidateInferColumnsArgs(DatasetUtil.GetUciAdultDataset()); }
public void AutoFitWithPresplittedData() { // Models created in AutoML should work over the same data, // no matter how that data is splitted before passing it to the experiment execution // or to the model for prediction var context = new MLContext(1); var dataPath = DatasetUtil.GetUciAdultDataset(); var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var dataFull = textLoader.Load(dataPath); var dataTrainTest = context.Data.TrainTestSplit(dataFull); var dataCV = context.Data.CrossValidationSplit(dataFull, numberOfFolds: 2); var modelFull = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(dataFull, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }) .BestRun .Model; var modelTrainTest = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(dataTrainTest.TrainSet, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }) .BestRun .Model; var modelCV = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(dataCV.First().TrainSet, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }) .BestRun .Model; var models = new[] { modelFull, modelTrainTest, modelCV }; foreach (var model in models) { var resFull = model.Transform(dataFull); var resTrainTest = model.Transform(dataTrainTest.TrainSet); var resCV = model.Transform(dataCV.First().TrainSet); Assert.Equal(30, resFull.Schema.Count); Assert.Equal(30, resTrainTest.Schema.Count); Assert.Equal(30, resCV.Schema.Count); foreach (var col in resFull.Schema) { Assert.Equal(col.Name, resTrainTest.Schema[col.Index].Name); Assert.Equal(col.Name, resCV.Schema[col.Index].Name); } } }
public void LabelIndexOutOfBoundsThrows() { Assert.Throws <ArgumentOutOfRangeException>(() => new MLContext(1).Auto().InferColumns(DatasetUtil.GetUciAdultDataset(), 100)); }