Пример #1
0
        public static SuggestedPipeline GetNextInferredPipeline(MLContext context,
                                                                IEnumerable <SuggestedPipelineRunDetail> history,
                                                                DatasetColumnInfo[] columns,
                                                                TaskKind task,
                                                                bool isMaximizingMetric,
                                                                CacheBeforeTrainer cacheBeforeTrainer,
                                                                IChannel logger,
                                                                IEnumerable <TrainerName> trainerAllowList = null)
        {
            var availableTrainers = RecipeInference.AllowedTrainers(context, task,
                                                                    ColumnInformationUtil.BuildColumnInfo(columns), trainerAllowList);
            var transforms            = TransformInferenceApi.InferTransforms(context, task, columns).ToList();
            var transformsPostTrainer = TransformInferenceApi.InferTransformsPostTrainer(context, task, columns).ToList();

            // if we haven't run all pipelines once
            if (history.Count() < availableTrainers.Count())
            {
                return(GetNextFirstStagePipeline(context, history, availableTrainers, transforms, transformsPostTrainer, cacheBeforeTrainer));
            }

            // get top trainers from stage 1 runs
            var topTrainers = GetTopTrainers(history, availableTrainers, isMaximizingMetric);

            // sort top trainers by # of times they've been run, from lowest to highest
            var orderedTopTrainers = OrderTrainersByNumTrials(history, topTrainers);

            // keep as hash set of previously visited pipelines
            var visitedPipelines = new HashSet <SuggestedPipeline>(history.Select(h => h.Pipeline));

            // iterate over top trainers (from least run to most run),
            // to find next pipeline
            foreach (var trainer in orderedTopTrainers)
            {
                var newTrainer = trainer.Clone();

                // repeat until passes or runs out of chances
                const int maxNumberAttempts = 10;
                var       count             = 0;
                do
                {
                    // sample new hyperparameters for the learner
                    if (!SampleHyperparameters(context, newTrainer, history, isMaximizingMetric, logger))
                    {
                        // if unable to sample new hyperparameters for the learner
                        // (ie SMAC returned 0 suggestions), break
                        break;
                    }

                    var suggestedPipeline = SuggestedPipelineBuilder.Build(context, transforms, transformsPostTrainer, newTrainer, cacheBeforeTrainer);

                    // make sure we have not seen pipeline before
                    if (!visitedPipelines.Contains(suggestedPipeline))
                    {
                        return(suggestedPipeline);
                    }
                } while (++count <= maxNumberAttempts);
            }

            return(null);
        }
Пример #2
0
 /// <summary>
 /// Initializes a new instance of <see cref="ExperimentSettings"/>.
 /// </summary>
 public ExperimentSettings()
 {
     MaxExperimentTimeInSeconds = 24 * 60 * 60;
     CancellationToken          = default;
     CacheDirectoryName         = "Microsoft.ML.AutoML";
     CacheBeforeTrainer         = CacheBeforeTrainer.Auto;
     MaxModels = int.MaxValue;
 }
 /// <summary>
 /// Initializes a new instance of <see cref="ExperimentSettings"/>.
 /// </summary>
 public ExperimentSettings()
 {
     MaxExperimentTimeInSeconds = 24 * 60 * 60;
     CancellationToken          = default;
     CacheDirectory             = new DirectoryInfo(Path.Combine(Path.GetTempPath(), "Microsoft.ML.AutoML"));
     CacheBeforeTrainer         = CacheBeforeTrainer.Auto;
     MaxModels = int.MaxValue;
 }
 private static SuggestedPipeline BuildSuggestedPipeline(SuggestedTrainer trainer,
                                                         CacheBeforeTrainer cacheBeforeTrainer = CacheBeforeTrainer.Auto)
 {
     return(SuggestedPipelineBuilder.Build(_context,
                                           new List <SuggestedTransform>(),
                                           new List <SuggestedTransform>(),
                                           trainer, cacheBeforeTrainer));
 }
Пример #5
0
        private static SuggestedPipeline GetNextFirstStagePipeline(MLContext context,
                                                                   IEnumerable <SuggestedPipelineRunDetail> history,
                                                                   IEnumerable <SuggestedTrainer> availableTrainers,
                                                                   ICollection <SuggestedTransform> transforms,
                                                                   ICollection <SuggestedTransform> transformsPostTrainer,
                                                                   CacheBeforeTrainer cacheBeforeTrainer)
        {
            var trainer = availableTrainers.ElementAt(history.Count());

            return(SuggestedPipelineBuilder.Build(context, transforms, transformsPostTrainer, trainer, cacheBeforeTrainer));
        }
Пример #6
0
        public static SuggestedPipeline Build(MLContext context,
                                              ICollection <SuggestedTransform> transforms,
                                              ICollection <SuggestedTransform> transformsPostTrainer,
                                              SuggestedTrainer trainer,
                                              CacheBeforeTrainer cacheBeforeTrainerSettings)
        {
            var trainerInfo = trainer.BuildTrainer().Info;

            AddNormalizationTransforms(context, trainerInfo, transforms);
            var cacheBeforeTrainer = ShouldCacheBeforeTrainer(trainerInfo, cacheBeforeTrainerSettings);

            return(new SuggestedPipeline(transforms, transformsPostTrainer, trainer, context, cacheBeforeTrainer));
        }
Пример #7
0
 private static bool ShouldCacheBeforeTrainer(TrainerInfo trainerInfo, CacheBeforeTrainer cacheBeforeTrainerSettings)
 {
     return(cacheBeforeTrainerSettings == CacheBeforeTrainer.On || (cacheBeforeTrainerSettings == CacheBeforeTrainer.Auto && trainerInfo.WantCaching));
 }