protected string GetHashKey(long transformsBitMask, RecipeInference.SuggestedRecipe.SuggestedLearner learner)
        {
            var learnerName = learner.ToString();

            Host.Check(!string.IsNullOrEmpty(learnerName));
            return($"{learnerName}+{transformsBitMask}");
        }
예제 #2
0
        private void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, PipelinePattern[] history)
        {
            // If first time optimizing hyperparams, create new hyperparameter sweeper.
            if (!_hyperSweepers.ContainsKey(learner.LearnerName))
            {
                var sps = AutoMlUtils.ConvertToComponentFactories(learner.PipelineNode.SweepParams);
                if (sps.Length > 0)
                {
                    _hyperSweepers[learner.LearnerName] = new KdoSweeper(Env,
                                                                         new KdoSweeper.Arguments
                    {
                        SweptParameters         = sps,
                        NumberInitialPopulation = Math.Max(_remainingThirdStageTrials, 2)
                    });
                }
                else
                {
                    _hyperSweepers[learner.LearnerName] = new FalseSweeper();
                }
            }
            var sweeper      = _hyperSweepers[learner.LearnerName];
            var historyToUse = history.Where(p => p.Learner.LearnerName == learner.LearnerName).ToArray();

            if (_currentStage == (int)Stages.Third)
            {
                _remainingThirdStageTrials--;
                historyToUse = new PipelinePattern[0];
                if (_remainingThirdStageTrials < 1)
                {
                    _currentStage++;
                }
            }
            SampleHyperparameters(learner, sweeper, IsMaximizingMetric, historyToUse);
        }
 internal Pattern(TransformInference.SuggestedTransform[] transforms,
                  RecipeInference.SuggestedRecipe.SuggestedLearner learner,
                  string loader)
 {
     Transforms = transforms;
     Learner    = learner;
     Loader     = loader;
 }
예제 #4
0
        public static List <Sweep> GenerateCandidates(IHostEnvironment env, string dataFile, string schemaDefinitionFile)
        {
            var    patterns = new List <Sweep>();
            string loaderSettings;
            Type   predictorType;

            TransformInference.InferenceResult inferenceResult;

            // Get the initial recipes for this data.
            RecipeInference.SuggestedRecipe[] recipes = RecipeInference.InferRecipesFromData(env, dataFile, schemaDefinitionFile, out predictorType, out loaderSettings, out inferenceResult);

            //get all the trainers for this task, and generate the initial set of candidates.
            // Exclude the hidden learners, and the metalinear learners.
            var trainers = env.ComponentCatalog.GetAllDerivedClasses(typeof(ITrainer), predictorType).Where(cls => !cls.IsHidden);

            if (!string.IsNullOrEmpty(loaderSettings))
            {
                StringBuilder sb = new StringBuilder();
                CmdQuoter.QuoteValue(loaderSettings, sb, true);
                loaderSettings = sb.ToString();
            }

            string loader = $" loader=TextLoader{loaderSettings}";

            // REVIEW: there are more learners than recipes atm.
            // Flip looping through recipes, then through learners if the cardinality changes.
            foreach (ComponentCatalog.LoadableClassInfo cl in trainers)
            {
                string         learnerSettings;
                TrainerSweeper trainerSweeper = new TrainerSweeper();
                trainerSweeper.Parameters.AddRange(RecipeInference.GetLearnerSettingsAndSweepParams(env, cl, out learnerSettings));

                foreach (var recipe in recipes)
                {
                    RecipeInference.SuggestedRecipe.SuggestedLearner learner = new RecipeInference.SuggestedRecipe.SuggestedLearner
                    {
                        LoadableClassInfo = cl,
                        Settings          = learnerSettings
                    };

                    Pattern pattern = new Pattern(recipe.Transforms, learner, loader);
                    Sweep   sweep   = new Sweep(pattern, trainerSweeper);
                    patterns.Add(sweep);
                }
            }

            return(patterns);
        }
예제 #5
0
 public PipelinePattern(TransformInference.SuggestedTransform[] transforms,
                        RecipeInference.SuggestedRecipe.SuggestedLearner learner,
                        string loaderSettings, IHostEnvironment env, PipelineSweeperRunSummary summary = null)
 {
     // Make sure internal pipeline nodes and sweep params are cloned, not shared.
     // Cloning the transforms and learner rather than assigning outright
     // ensures that this will be the case. Doing this here allows us to not
     // worry about changing hyperparameter values in candidate pipelines
     // possibly overwritting other pipelines.
     Transforms         = transforms.Select(t => t.Clone()).ToArray();
     Learner            = learner.Clone();
     LoaderSettings     = loaderSettings;
     _env               = env;
     PerformanceSummary = summary;
     UniqueId           = Guid.NewGuid();
 }
예제 #6
0
        private static ParameterSet ConvertToParameterSet(TlcModule.SweepableParamAttribute[] hps,
                                                          RecipeInference.SuggestedRecipe.SuggestedLearner learner)
        {
            if (learner.PipelineNode.HyperSweeperParamSet != null)
            {
                return(learner.PipelineNode.HyperSweeperParamSet);
            }

            var paramValues = new IParameterValue[hps.Length];

            if (hps.Any(p => p.RawValue == null))
            {
                PopulateSweepableParams(learner);
            }

            for (int i = 0; i < hps.Length; i++)
            {
                Contracts.CheckValue(hps[i].RawValue, nameof(TlcModule.SweepableParamAttribute.RawValue));

                switch (hps[i])
                {
                case TlcModule.SweepableDiscreteParamAttribute dp:
                    var learnerVal =
                        learner.PipelineNode.GetPropertyValueByName(dp.Name, (IComparable)dp.Options[0]);
                    var optionIndex = (int)(dp.RawValue ?? dp.IndexOf(learnerVal));
                    paramValues[i] = new StringParameterValue(dp.Name, dp.Options[optionIndex].ToString());
                    break;

                case TlcModule.SweepableFloatParamAttribute fp:
                    paramValues[i] =
                        new FloatParameterValue(fp.Name,
                                                (float)(fp.RawValue ?? learner.PipelineNode.GetPropertyValueByName(fp.Name, 0f)));
                    break;

                case TlcModule.SweepableLongParamAttribute lp:
                    paramValues[i] =
                        new LongParameterValue(lp.Name,
                                               (long)(lp.RawValue ?? learner.PipelineNode.GetPropertyValueByName(lp.Name, 0L)));
                    break;
                }
            }

            learner.PipelineNode.HyperSweeperParamSet = new ParameterSet(paramValues);
            return(learner.PipelineNode.HyperSweeperParamSet);
        }
예제 #7
0
 /// <summary>
 /// Updates properties of entryPointObj instance based on the values in sweepParams
 /// </summary>
 public static void PopulateSweepableParams(RecipeInference.SuggestedRecipe.SuggestedLearner learner)
 {
     foreach (var param in learner.PipelineNode.SweepParams)
     {
         if (param is TlcModule.SweepableDiscreteParamAttribute dp)
         {
             var learnerVal = learner.PipelineNode.GetPropertyValueByName(dp.Name, (IComparable)dp.Options[0]);
             param.RawValue = dp.IndexOf(learnerVal);
         }
         else if (param is TlcModule.SweepableFloatParamAttribute fp)
         {
             param.RawValue = learner.PipelineNode.GetPropertyValueByName(fp.Name, 0f);
         }
         else if (param is TlcModule.SweepableLongParamAttribute lp)
         {
             param.RawValue = learner.PipelineNode.GetPropertyValueByName(lp.Name, 0L);
         }
     }
 }
        protected void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, ISweeper sweeper,
                                             bool isMaximizingMetric, PipelinePattern[] history)
        {
            // Make sure there are hyperparameters to sweep over.
            var hyperParams = learner.PipelineNode.SweepParams;

            if (hyperParams.Length == 0)
            {
                return;
            }

            // Get new set of hyperparameter values.
            var proposedParamSet = sweeper.ProposeSweeps(1, AutoMlUtils.ConvertToRunResults(history, isMaximizingMetric)).First();

            Env.Assert(proposedParamSet != null && proposedParamSet.All(ps => hyperParams.Any(hp => hp.Name == ps.Name)));

            // Associate proposed param set with learner, so that smart hyperparam
            // sweepers (like KDO) can map them back.
            learner.PipelineNode.HyperSweeperParamSet = proposedParamSet;

            var generatorSet = hyperParams.Select(AutoMlUtils.ToIValueGenerator).ToArray();
            var values       = SweeperProbabilityUtils.ParameterSetAsFloatArray(Host, generatorSet, proposedParamSet, false);

            // Update hyperparameters.
            for (int i = 0; i < hyperParams.Length; i++)
            {
                if (hyperParams[i] is TlcModule.SweepableDiscreteParamAttribute dp)
                {
                    hyperParams[i].RawValue = (int)values[i];
                }
                else
                {
                    hyperParams[i].RawValue = values[i];
                }
            }
        }
예제 #9
0
        private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandidates,
                                                 bool defaultHyperParams = false, bool uniformRandomTransforms = false)
        {
            const int maxNumberAttempts = 10;

            double[] learnerWeights  = LearnerHistoryToWeights(history, IsMaximizingMetric);
            var      candidates      = new List <PipelinePattern>();
            var      sampledLearners = new RecipeInference.SuggestedRecipe.SuggestedLearner[numCandidates];

            if (_currentStage == (int)Stages.Second || _currentStage == (int)Stages.Third)
            {
                // Select remaining learners in round-robin fashion.
                for (int i = 0; i < numCandidates; i++)
                {
                    sampledLearners[i] = AvailableLearners[i % AvailableLearners.Length].Clone();
                }
            }
            else
            {
                // Select learners, based on weights.
                var indices = ProbUtils.SampleCategoricalDistribution(numCandidates, learnerWeights);
                foreach (var item in indices.Select((idx, i) => new { idx, i }))
                {
                    sampledLearners[item.i] = AvailableLearners[item.idx].Clone();
                }
            }

            // Select hyperparameters and transforms based on learner and history.
            foreach (var learner in sampledLearners)
            {
                PipelinePattern pipeline;
                int             count = 0;
                bool            valid;
                string          hashKey;

                if (!defaultHyperParams)
                {
                    SampleHyperparameters(learner, history);
                }
                else
                {
                    AutoMlUtils.PopulateSweepableParams(learner);
                }

                do
                {   // Make sure transforms set is valid and have not seen pipeline before.
                    // Repeat until passes or runs out of chances.
                    pipeline = new PipelinePattern(
                        SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms),
                        learner, "", Env);
                    hashKey = GetHashKey(transformsBitMask, learner);
                    valid   = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey);
                    count++;
                } while (!valid && count <= maxNumberAttempts);

                // If maxed out chances and at second stage, move onto next stage.
                if (count >= maxNumberAttempts && _currentStage == (int)Stages.Second)
                {
                    _currentStage++;
                }

                // Keep only valid pipelines.
                if (valid)
                {
                    VisitedPipelines.Add(hashKey);
                    candidates.Add(pipeline);
                }
            }

            return(candidates.ToArray());
        }
예제 #10
0
        private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference.SuggestedRecipe.SuggestedLearner learner,
                                                                         PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false)
        {
            var sampledTransforms =
                new List <TransformInference.SuggestedTransform>(
                    AutoMlUtils.GetMandatoryTransforms(AvailableTransforms));
            var remainingAvailableTransforms =
                AvailableTransforms.Where(t => !sampledTransforms.Any(t.Equals)).ToArray();
            var mask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

            foreach (var transform in remainingAvailableTransforms)
            {
                // Add pseudo-mass to encourage sampling of untried transforms.
                double maxWeight     = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d;
                double allWeight     = Math.Max(maxWeight, 1d);
                double learnerWeight = Math.Max(maxWeight, 1d);
                int    allCounts     = 1;
                int    learnerCounts = 1;

                // Add mass according to performance.
                foreach (var pipeline in history)
                {
                    if (pipeline.Transforms.Any(transform.Equals))
                    {
                        allWeight +=
                            AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue,
                                                      maxWeight, IsMaximizingMetric);
                        allCounts++;

                        if (pipeline.Learner.LearnerName == learner.LearnerName)
                        {
                            learnerWeight += pipeline.PerformanceSummary.MetricValue;
                            learnerCounts++;
                        }
                    }
                }

                // Take average mass as weight, and take convex combination of
                // learner-specific weight and unconditioned weight.
                allWeight     /= allCounts > 0 ? allCounts : 1;
                learnerWeight /= learnerCounts > 0 ? learnerCounts : 1;
                var lambda         = MathUtils.Sigmoid(learnerCounts - 3);
                var combinedWeight = uniformRandomSampling ?
                                     0.5 : lambda * learnerWeight + (1 - lambda) * allWeight;

                // Sample transform according to combined weight.
                if (ProbUtils.SampleUniform() <= combinedWeight / maxWeight)
                {
                    mask |= 1L << transform.AtomicGroupId;
                }
            }

            // Add all chosen transforms.
            sampledTransforms.AddRange(remainingAvailableTransforms.Where(t =>
                                                                          AutoMlUtils.AtomicGroupPresent(mask, t.AtomicGroupId)));

            // Add final features concat transform. NOTE: computed bitmask should always
            // exclude the final features concat. If we forget to exclude that one, will
            // cause an error in verification, since it isn't included in the original
            // dependency mapping (i.e., its level isn't in the dictionary).
            sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                         DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles));
            transformsBitMask = mask;

            return(sampledTransforms.ToArray());
        }
예제 #11
0
 public static IRunResult ConvertToRunResult(RecipeInference.SuggestedRecipe.SuggestedLearner learner, PipelineSweeperRunSummary rs, bool isMetricMaximizing)
 {
     return(new RunResult(ConvertToParameterSet(learner.PipelineNode.SweepParams, learner), rs.MetricValue, isMetricMaximizing));
 }