Exemplo n.º 1
0
        public void TestPipelineNodeCloning()
        {
            var env = new MLContext().AddStandardComponents(); // AutoInference uses ComponentCatalog to find all learners
            var lr1 = RecipeInference
                      .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
                      .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("LogisticRegression"));

            var sdca1 = RecipeInference
                        .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
                        .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("StochasticDualCoordinateAscent"));

            // Clone and change hyperparam values
            var lr2 = lr1.Clone();

            lr1.PipelineNode.SweepParams[0].RawValue = 1.2f;
            lr2.PipelineNode.SweepParams[0].RawValue = 3.5f;
            var sdca2 = sdca1.Clone();

            sdca1.PipelineNode.SweepParams[0].RawValue = 3;
            sdca2.PipelineNode.SweepParams[0].RawValue = 0;

            // Make sure the changes are propagated to entry point objects
            env.Check(lr1.PipelineNode.UpdateProperties());
            env.Check(lr2.PipelineNode.UpdateProperties());
            env.Check(sdca1.PipelineNode.UpdateProperties());
            env.Check(sdca2.PipelineNode.UpdateProperties());
            env.Check(lr1.PipelineNode.CheckEntryPointStateMatchesParamValues());
            env.Check(lr2.PipelineNode.CheckEntryPointStateMatchesParamValues());
            env.Check(sdca1.PipelineNode.CheckEntryPointStateMatchesParamValues());
            env.Check(sdca2.PipelineNode.CheckEntryPointStateMatchesParamValues());

            // Make sure second object's set of changes didn't overwrite first object's
            env.Check(!lr1.PipelineNode.SweepParams[0].RawValue.Equals(lr2.PipelineNode.SweepParams[0].RawValue));
            env.Check(!sdca2.PipelineNode.SweepParams[0].RawValue.Equals(sdca1.PipelineNode.SweepParams[0].RawValue));
        }
Exemplo n.º 2
0
        public void TestLearn()
        {
            var             env                = new MLContext().AddStandardComponents(); // AutoInference uses ComponentCatalog to find all learners
            string          pathData           = GetDataPath("adult.train");
            string          pathDataTest       = GetDataPath("adult.test");
            int             numOfSampleRows    = 1000;
            int             batchSize          = 5;
            int             numIterations      = 10;
            int             numTransformLevels = 3;
            SupportedMetric metric             = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

            // Using the simple, uniform random sampling (with replacement) engine
            PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);

            // Test initial learning
            var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var schema, numTransformLevels, batchSize,
                                                    metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations / 2), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);

            env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations / 2);

            // Resume learning
            amls.UpdateTerminator(new IterationTerminator(numIterations));
            bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
            env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);

            // Use best pipeline for another task
            var inputFileTrain = new SimpleFileHandle(env, pathData, false, false);

#pragma warning disable 0618
            var datasetTrain = ImportTextData.ImportText(env,
                                                         new ImportTextData.Input {
                InputFile = inputFileTrain, CustomSchema = schema
            }).Data;
            var inputFileTest = new SimpleFileHandle(env, pathDataTest, false, false);
            var datasetTest   = ImportTextData.ImportText(env,
                                                          new ImportTextData.Input {
                InputFile = inputFileTest, CustomSchema = schema
            }).Data;
#pragma warning restore 0618

            // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and
            // we get unlucky and only select it every time, such that this test fails. Not
            // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80.
            bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer,
                                                out var testMetricValue, out var trainMtericValue);
            env.Check(testMetricValue > 0.2);
            Done();
        }
Exemplo n.º 3
0
        public void TestTextDatasetLearn()
        {
            var             env                = new MLContext().AddStandardComponents(); // AutoInference uses ComponentCatalog to find all learners
            string          pathData           = GetDataPath(@"../UnitTest/tweets_labeled_10k_test_validation.tsv");
            int             batchSize          = 5;
            int             numIterations      = 35;
            int             numTransformLevels = 1;
            int             numSampleRows      = 100;
            SupportedMetric metric             = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro);

            // Using the simple, uniform random sampling (with replacement) engine
            PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);

            // Test initial learning
            var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var _, numTransformLevels, batchSize,
                                                    metric, out var _, numSampleRows, new IterationTerminator(numIterations),
                                                    MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer);

            env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);
            Done();
        }