public void TestLearnerConstrainingByName() { string pathData = GetDataPath("adult.train"); int numOfSampleRows = 1000; int batchSize = 1; int numIterations = 1; int numTransformLevels = 2; var retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.Auc; // Using the simple, uniform random sampling (with replacement) brain. PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); // Run initial experiment. var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, metric, out var _, numOfSampleRows, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); // Keep only logistic regression and FastTree. amls.KeepSelectedLearners(retainedLearnerNames); var space = amls.GetSearchSpace(); // Make sure only learners left are those retained. Assert.Equal(retainedLearnerNames.Length, space.Item2.Length); Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName))); }
public void TestRegressionPipelineWithMinimizingMetric() { string pathData = GetDataPath("../Housing (regression)/housing.txt"); int numOfSampleRows = 100; int batchSize = 5; int numIterations = 10; int numTransformLevels = 1; AutoInference.SupportedMetric metric = AutoInference.SupportedMetric.L1; // Using the simple, uniform random sampling (with replacement) brain PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(Env); // Run initial experiments var amls = AutoInference.InferPipelines(Env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureRegressorTrainer); // Allow for one more iteration amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); // Do learning. Only retained learner should be left in all pipelines. bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); // Make sure hyperparameter value did not change Assert.NotNull(bestPipeline); Assert.True(amls.GetAllEvaluatedPipelines().All( p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue)); }
public void PipelineSweeperNoTransforms() { // Set up inputs for experiment string pathData = GetDataPath("adult.train"); string pathDataTest = GetDataPath("adult.test"); const int numOfSampleRows = 1000; const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=Label:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); #pragma warning restore 0618 const int batchSize = 5; const int numIterations = 20; const int numTransformLevels = 2; var env = new MLContext(); SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env); // Create search object var amls = new AutoInference.AutoMlMlState(Env, metric, autoMlEngine, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, datasetTrain, datasetTest); // Infer search space amls.InferSearchSpace(numTransformLevels); // Create macro object var pipelineSweepInput = new Microsoft.ML.Legacy.Models.PipelineSweeper() { BatchSize = batchSize, }; var exp = new Experiment(Env); var output = exp.Add(pipelineSweepInput); exp.Compile(); exp.SetInput(pipelineSweepInput.TrainingData, datasetTrain); exp.SetInput(pipelineSweepInput.TestingData, datasetTest); exp.SetInput(pipelineSweepInput.State, amls); exp.SetInput(pipelineSweepInput.CandidateOutputs, new IDataView[0]); exp.Run(); // Make sure you get back an AutoMlState, and that it ran for correct number of iterations // with at least minimal performance values (i.e., best should have AUC better than 0.1 on this dataset). AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State); Assert.NotNull(amlsOut); Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations); Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.8); }
public void TestLearn() { using (var env = new LocalEnvironment() .AddStandardComponents()) // AutoInference.InferPipelines uses ComponentCatalog to read text data { string pathData = GetDataPath("adult.train"); string pathDataTest = GetDataPath("adult.test"); int numOfSampleRows = 1000; int batchSize = 5; int numIterations = 10; int numTransformLevels = 3; SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env); // Test initial learning var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var schema, numTransformLevels, batchSize, metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations / 2), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations / 2); // Resume learning amls.UpdateTerminator(new IterationTerminator(numIterations)); bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations); // Use best pipeline for another task var inputFileTrain = new SimpleFileHandle(env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data; var inputFileTest = new SimpleFileHandle(env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data; #pragma warning restore 0618 // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and // we get unlucky and only select it every time, such that this test fails. Not // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80. bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, out var testMetricValue, out var trainMtericValue); env.Check(testMetricValue > 0.2); } Done(); }
public void TestHyperparameterFreezing() { string pathData = GetDataPath("adult.train"); int numOfSampleRows = 1000; int batchSize = 1; int numIterations = 10; int numTransformLevels = 3; using (var env = new LocalEnvironment() .AddStandardComponents()) // AutoInference uses ComponentCatalog to find all learners { SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc); // Using the simple, uniform random sampling (with replacement) brain PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(env); // Run initial experiments var amls = AutoInference.InferPipelines(env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize, metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer); // Clear results amls.ClearEvaluatedPipelines(); // Get space, remove transforms and all but one learner, freeze hyperparameters on learner. var space = amls.GetSearchSpace(); var transforms = space.Item1.Where(t => t.ExpertType != typeof(TransformInference.Experts.Categorical)).ToArray(); var learners = new[] { space.Item2.First() }; var hyperParam = learners[0].PipelineNode.SweepParams.First(); var frozenParamValue = hyperParam.RawValue; hyperParam.Frozen = true; amls.UpdateSearchSpace(learners, transforms); // Allow for one more iteration amls.UpdateTerminator(new IterationTerminator(numIterations + 1)); // Do learning. Only retained learner should be left in all pipelines. bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); // Make sure all pipelines have retained learner Assert.True(amls.GetAllEvaluatedPipelines().All(p => p.Learner.LearnerName == learners[0].LearnerName)); // Make sure hyperparameter value did not change Assert.NotNull(bestPipeline); Assert.Equal(bestPipeline.Learner.PipelineNode.SweepParams.First().RawValue, frozenParamValue); } }
public void TestTextDatasetLearn() { var env = new MLContext().AddStandardComponents(); // AutoInference uses ComponentCatalog to find all learners string pathData = GetDataPath(@"../UnitTest/tweets_labeled_10k_test_validation.tsv"); int batchSize = 5; int numIterations = 35; int numTransformLevels = 1; int numSampleRows = 100; SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro); // Using the simple, uniform random sampling (with replacement) engine PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env); // Test initial learning var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var _, numTransformLevels, batchSize, metric, out var _, numSampleRows, new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer); env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations); Done(); }