예제 #1
0
        public void TestLearnerConstrainingByName()
        {
            string pathData             = GetDataPath("adult.train");
            int    numOfSampleRows      = 1000;
            int    batchSize            = 1;
            int    numIterations        = 1;
            int    numTransformLevels   = 2;
            var    retainedLearnerNames = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" };

            using (var env = new LocalEnvironment()
                             .AddStandardComponents()) // AutoInference uses ComponentCatalog to find all learners
            {
                SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

                // Using the simple, uniform random sampling (with replacement) brain.
                PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(env);

                // Run initial experiment.
                var amls = AutoInference.InferPipelines(env, autoMlBrain, pathData, "", out var _,
                                                        numTransformLevels, batchSize, metric, out var _, numOfSampleRows,
                                                        new IterationTerminator(numIterations), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);

                // Keep only logistic regression and FastTree.
                amls.KeepSelectedLearners(retainedLearnerNames);
                var space = amls.GetSearchSpace();

                // Make sure only learners left are those retained.
                Assert.Equal(retainedLearnerNames.Length, space.Item2.Length);
                Assert.True(space.Item2.All(l => retainedLearnerNames.Any(r => r == l.LearnerName)));
            }
        }
예제 #2
0
        public void TestRegressionPipelineWithMinimizingMetric()
        {
            string pathData           = GetDataPath("../Housing (regression)/housing.txt");
            int    numOfSampleRows    = 100;
            int    batchSize          = 5;
            int    numIterations      = 10;
            int    numTransformLevels = 1;

            using (var env = new LocalEnvironment()
                             .AddStandardComponents()) // AutoInference uses ComponentCatalog to find all learners
            {
                SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro);

                // Using the simple, uniform random sampling (with replacement) brain
                PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(env);

                // Run initial experiments
                var amls = AutoInference.InferPipelines(env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
                                                        metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
                                                        MacroUtils.TrainerKinds.SignatureRegressorTrainer);

                // Allow for one more iteration
                amls.UpdateTerminator(new IterationTerminator(numIterations + 1));

                // Do learning. Only retained learner should be left in all pipelines.
                bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);

                // Make sure hyperparameter value did not change
                Assert.NotNull(bestPipeline);
                Assert.True(amls.GetAllEvaluatedPipelines().All(
                                p => p.PerformanceSummary.MetricValue >= bestPipeline.PerformanceSummary.MetricValue));
            }
        }
        public void PipelineSweeperNoTransforms()
        {
            // Set up inputs for experiment
            string       pathData        = GetDataPath("adult.train");
            string       pathDataTest    = GetDataPath("adult.test");
            const int    numOfSampleRows = 1000;
            const string schema          = "sep=, col=Features:R4:0,2,4,10-12 col=Label:R4:14 header=+";

            var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(Env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
            var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(Env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data.Take(numOfSampleRows);
#pragma warning restore 0618
            const int       batchSize          = 5;
            const int       numIterations      = 20;
            const int       numTransformLevels = 2;
            var             env    = new MLContext();
            SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

            // Using the simple, uniform random sampling (with replacement) engine
            PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(Env);

            // Create search object
            var amls = new AutoInference.AutoMlMlState(Env, metric, autoMlEngine, new IterationTerminator(numIterations),
                                                       MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer, datasetTrain, datasetTest);

            // Infer search space
            amls.InferSearchSpace(numTransformLevels);

            // Create macro object
            var pipelineSweepInput = new Microsoft.ML.Legacy.Models.PipelineSweeper()
            {
                BatchSize = batchSize,
            };

            var exp    = new Experiment(Env);
            var output = exp.Add(pipelineSweepInput);
            exp.Compile();
            exp.SetInput(pipelineSweepInput.TrainingData, datasetTrain);
            exp.SetInput(pipelineSweepInput.TestingData, datasetTest);
            exp.SetInput(pipelineSweepInput.State, amls);
            exp.SetInput(pipelineSweepInput.CandidateOutputs, new IDataView[0]);
            exp.Run();

            // Make sure you get back an AutoMlState, and that it ran for correct number of iterations
            // with at least minimal performance values (i.e., best should have AUC better than 0.1 on this dataset).
            AutoInference.AutoMlMlState amlsOut = (AutoInference.AutoMlMlState)exp.GetOutput(output.State);
            Assert.NotNull(amlsOut);
            Assert.Equal(amlsOut.GetAllEvaluatedPipelines().Length, numIterations);
            Assert.True(amlsOut.GetBestPipeline().PerformanceSummary.MetricValue > 0.8);
        }
예제 #4
0
        public void TestLearn()
        {
            using (var env = new LocalEnvironment()
                             .AddStandardComponents()) // AutoInference.InferPipelines uses ComponentCatalog to read text data
            {
                string          pathData           = GetDataPath("adult.train");
                string          pathDataTest       = GetDataPath("adult.test");
                int             numOfSampleRows    = 1000;
                int             batchSize          = 5;
                int             numIterations      = 10;
                int             numTransformLevels = 3;
                SupportedMetric metric             = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

                // Using the simple, uniform random sampling (with replacement) engine
                PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);

                // Test initial learning
                var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var schema, numTransformLevels, batchSize,
                                                        metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations / 2), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
                env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations / 2);

                // Resume learning
                amls.UpdateTerminator(new IterationTerminator(numIterations));
                bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
                env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);

                // Use best pipeline for another task
                var inputFileTrain = new SimpleFileHandle(env, pathData, false, false);
#pragma warning disable 0618
                var datasetTrain = ImportTextData.ImportText(env,
                                                             new ImportTextData.Input {
                    InputFile = inputFileTrain, CustomSchema = schema
                }).Data;
                var inputFileTest = new SimpleFileHandle(env, pathDataTest, false, false);
                var datasetTest   = ImportTextData.ImportText(env,
                                                              new ImportTextData.Input {
                    InputFile = inputFileTest, CustomSchema = schema
                }).Data;
#pragma warning restore 0618

                // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and
                // we get unlucky and only select it every time, such that this test fails. Not
                // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80.
                bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer,
                                                    out var testMetricValue, out var trainMtericValue);
                env.Check(testMetricValue > 0.2);
            }
            Done();
        }
예제 #5
0
        public void TestHyperparameterFreezing()
        {
            string pathData           = GetDataPath("adult.train");
            int    numOfSampleRows    = 1000;
            int    batchSize          = 1;
            int    numIterations      = 10;
            int    numTransformLevels = 3;

            using (var env = new LocalEnvironment()
                             .AddStandardComponents()) // AutoInference uses ComponentCatalog to find all learners
            {
                SupportedMetric metric = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

                // Using the simple, uniform random sampling (with replacement) brain
                PipelineOptimizerBase autoMlBrain = new UniformRandomEngine(env);

                // Run initial experiments
                var amls = AutoInference.InferPipelines(env, autoMlBrain, pathData, "", out var _, numTransformLevels, batchSize,
                                                        metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations),
                                                        MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);

                // Clear results
                amls.ClearEvaluatedPipelines();

                // Get space, remove transforms and all but one learner, freeze hyperparameters on learner.
                var space      = amls.GetSearchSpace();
                var transforms = space.Item1.Where(t =>
                                                   t.ExpertType != typeof(TransformInference.Experts.Categorical)).ToArray();
                var learners         = new[] { space.Item2.First() };
                var hyperParam       = learners[0].PipelineNode.SweepParams.First();
                var frozenParamValue = hyperParam.RawValue;
                hyperParam.Frozen = true;
                amls.UpdateSearchSpace(learners, transforms);

                // Allow for one more iteration
                amls.UpdateTerminator(new IterationTerminator(numIterations + 1));

                // Do learning. Only retained learner should be left in all pipelines.
                bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);

                // Make sure all pipelines have retained learner
                Assert.True(amls.GetAllEvaluatedPipelines().All(p => p.Learner.LearnerName == learners[0].LearnerName));

                // Make sure hyperparameter value did not change
                Assert.NotNull(bestPipeline);
                Assert.Equal(bestPipeline.Learner.PipelineNode.SweepParams.First().RawValue, frozenParamValue);
            }
        }
예제 #6
0
        public void TestTextDatasetLearn()
        {
            var             env                = new MLContext().AddStandardComponents(); // AutoInference uses ComponentCatalog to find all learners
            string          pathData           = GetDataPath(@"../UnitTest/tweets_labeled_10k_test_validation.tsv");
            int             batchSize          = 5;
            int             numIterations      = 35;
            int             numTransformLevels = 1;
            int             numSampleRows      = 100;
            SupportedMetric metric             = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro);

            // Using the simple, uniform random sampling (with replacement) engine
            PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);

            // Test initial learning
            var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var _, numTransformLevels, batchSize,
                                                    metric, out var _, numSampleRows, new IterationTerminator(numIterations),
                                                    MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer);

            env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);
            Done();
        }