Ejemplo n.º 1
0
        public void TestPipelineNodeCloning()
        {
            using (var env = new ConsoleEnvironment())
            {
                var lr1 = RecipeInference
                          .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
                          .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("LogisticRegression"));

                var sdca1 = RecipeInference
                            .AllowedLearners(env, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer)
                            .First(learner => learner.PipelineNode != null && learner.LearnerName.Contains("StochasticDualCoordinateAscent"));

                // Clone and change hyperparam values
                var lr2 = lr1.Clone();
                lr1.PipelineNode.SweepParams[0].RawValue = 1.2f;
                lr2.PipelineNode.SweepParams[0].RawValue = 3.5f;
                var sdca2 = sdca1.Clone();
                sdca1.PipelineNode.SweepParams[0].RawValue = 3;
                sdca2.PipelineNode.SweepParams[0].RawValue = 0;

                // Make sure the changes are propagated to entry point objects
                env.Check(lr1.PipelineNode.UpdateProperties());
                env.Check(lr2.PipelineNode.UpdateProperties());
                env.Check(sdca1.PipelineNode.UpdateProperties());
                env.Check(sdca2.PipelineNode.UpdateProperties());
                env.Check(lr1.PipelineNode.CheckEntryPointStateMatchesParamValues());
                env.Check(lr2.PipelineNode.CheckEntryPointStateMatchesParamValues());
                env.Check(sdca1.PipelineNode.CheckEntryPointStateMatchesParamValues());
                env.Check(sdca2.PipelineNode.CheckEntryPointStateMatchesParamValues());

                // Make sure second object's set of changes didn't overwrite first object's
                env.Check(!lr1.PipelineNode.SweepParams[0].RawValue.Equals(lr2.PipelineNode.SweepParams[0].RawValue));
                env.Check(!sdca2.PipelineNode.SweepParams[0].RawValue.Equals(sdca1.PipelineNode.SweepParams[0].RawValue));
            }
        }
Ejemplo n.º 2
0
        public void TestLearn()
        {
            using (var env = new ConsoleEnvironment()
                             .AddStandardComponents()) // AutoInference.InferPipelines uses ComponentCatalog to read text data
            {
                string          pathData           = GetDataPath("adult.train");
                string          pathDataTest       = GetDataPath("adult.test");
                int             numOfSampleRows    = 1000;
                int             batchSize          = 5;
                int             numIterations      = 10;
                int             numTransformLevels = 3;
                SupportedMetric metric             = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.Auc);

                // Using the simple, uniform random sampling (with replacement) engine
                PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);

                // Test initial learning
                var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var schema, numTransformLevels, batchSize,
                                                        metric, out var bestPipeline, numOfSampleRows, new IterationTerminator(numIterations / 2), MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer);
                env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations / 2);

                // Resume learning
                amls.UpdateTerminator(new IterationTerminator(numIterations));
                bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows);
                env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);

                // Use best pipeline for another task
                var inputFileTrain = new SimpleFileHandle(env, pathData, false, false);
#pragma warning disable 0618
                var datasetTrain = ImportTextData.ImportText(env,
                                                             new ImportTextData.Input {
                    InputFile = inputFileTrain, CustomSchema = schema
                }).Data;
                var inputFileTest = new SimpleFileHandle(env, pathDataTest, false, false);
                var datasetTest   = ImportTextData.ImportText(env,
                                                              new ImportTextData.Input {
                    InputFile = inputFileTest, CustomSchema = schema
                }).Data;
#pragma warning restore 0618

                // REVIEW: Theoretically, it could be the case that a new, very bad learner is introduced and
                // we get unlucky and only select it every time, such that this test fails. Not
                // likely at all, but a non-zero probability. Should be ok, since all current learners are returning d > .80.
                bestPipeline.RunTrainTestExperiment(datasetTrain, datasetTest, metric, MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer,
                                                    out var testMetricValue, out var trainMtericValue);
                env.Check(testMetricValue > 0.2);
            }
            Done();
        }
Ejemplo n.º 3
0
        public void TestTextDatasetLearn()
        {
            using (var env = new ConsoleEnvironment())
            {
                string          pathData           = GetDataPath(@"../UnitTest/tweets_labeled_10k_test_validation.tsv");
                int             batchSize          = 5;
                int             numIterations      = 35;
                int             numTransformLevels = 1;
                int             numSampleRows      = 100;
                SupportedMetric metric             = PipelineSweeperSupportedMetrics.GetSupportedMetric(PipelineSweeperSupportedMetrics.Metrics.AccuracyMicro);

                // Using the simple, uniform random sampling (with replacement) engine
                PipelineOptimizerBase autoMlEngine = new UniformRandomEngine(env);

                // Test initial learning
                var amls = AutoInference.InferPipelines(env, autoMlEngine, pathData, "", out var _, numTransformLevels, batchSize,
                                                        metric, out var _, numSampleRows, new IterationTerminator(numIterations),
                                                        MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer);
                env.Check(amls.GetAllEvaluatedPipelines().Length == numIterations);
            }
            Done();
        }