/// <summary>
        /// Given a predictor type returns a set of all permissible learners (with their sweeper params, if defined).
        /// </summary>
        /// <returns>Array of viable learners.</returns>
        public static SuggestedRecipe.SuggestedLearner[] AllowedLearners(IHostEnvironment env, MacroUtils.TrainerKinds trainerKind)
        {
            //not all learners advertised in the API are available in CORE.
            var catalog = ModuleCatalog.CreateInstance(env);
            var availableLearnersList = catalog.AllEntryPoints().Where(
                x => x.InputKinds?.FirstOrDefault(i => i == typeof(CommonInputs.ITrainerInput)) != null);

            var learners     = new List <SuggestedRecipe.SuggestedLearner>();
            var type         = typeof(CommonInputs.ITrainerInput);
            var trainerTypes = typeof(Experiment).Assembly.GetTypes()
                               .Where(p => type.IsAssignableFrom(p) &&
                                      MacroUtils.IsTrainerOfKind(p, trainerKind));

            foreach (var tt in trainerTypes)
            {
                var sweepParams = AutoMlUtils.GetSweepRanges(tt);
                var epInputObj  = (CommonInputs.ITrainerInput)tt.GetConstructor(Type.EmptyTypes)?.Invoke(new object[] { });
                var sl          = new SuggestedRecipe.SuggestedLearner
                {
                    PipelineNode = new TrainerPipelineNode(epInputObj, sweepParams),
                    LearnerName  = tt.Name
                };

                if (sl.PipelineNode != null && availableLearnersList.FirstOrDefault(l => l.Name.Equals(sl.PipelineNode.GetEpName())) != null)
                {
                    learners.Add(sl);
                }
            }

            return(learners.ToArray());
        }
Esempio n. 2
0
        private void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, PipelinePattern[] history)
        {
            // If first time optimizing hyperparams, create new hyperparameter sweeper.
            if (!_hyperSweepers.ContainsKey(learner.LearnerName))
            {
                var sps = AutoMlUtils.ConvertToComponentFactories(learner.PipelineNode.SweepParams);
                if (sps.Length > 0)
                {
                    _hyperSweepers[learner.LearnerName] = new KdoSweeper(Env,
                                                                         new KdoSweeper.Arguments
                    {
                        SweptParameters         = sps,
                        NumberInitialPopulation = Math.Max(_remainingThirdStageTrials, 2)
                    });
                }
                else
                {
                    _hyperSweepers[learner.LearnerName] = new FalseSweeper();
                }
            }
            var sweeper      = _hyperSweepers[learner.LearnerName];
            var historyToUse = history.Where(p => p.Learner.LearnerName == learner.LearnerName).ToArray();

            if (_currentStage == (int)Stages.Third)
            {
                _remainingThirdStageTrials--;
                historyToUse = new PipelinePattern[0];
                if (_remainingThirdStageTrials < 1)
                {
                    _currentStage++;
                }
            }
            SampleHyperparameters(learner, sweeper, IsMaximizingMetric, historyToUse);
        }
        private void RandomlyPerturbSweepableHyperparameters(IEnumerable <TlcModule.SweepableParamAttribute> sweepParams)
        {
            foreach (var param in sweepParams)
            {
                switch (param)
                {
                case TlcModule.SweepableDiscreteParamAttribute disParam:
                    Env.Assert(disParam.Options.Length > 0, $"Trying to sweep over discrete parameter, {disParam.Name}, with no options.");
                    disParam.RawValue = Host.Rand.Next(disParam.Options.Length);
                    break;

                case TlcModule.SweepableFloatParamAttribute floParam:
                    var fvg = AutoMlUtils.ToIValueGenerator(floParam);
                    floParam.RawValue = ((IParameterValue <float>)fvg.CreateFromNormalized(Host.Rand.NextSingle())).Value;
                    break;

                case TlcModule.SweepableLongParamAttribute lonParam:
                    var lvg = AutoMlUtils.ToIValueGenerator(lonParam);
                    lonParam.RawValue = ((IParameterValue <long>)lvg.CreateFromNormalized(Host.Rand.NextSingle())).Value;
                    break;

                default:
                    throw new NotSupportedException($"Unknown type of sweepable parameter attribute: {param.GetType()}");
                }
            }
        }
 public virtual void UpdateLearners(RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners)
 {
     AvailableLearners = availableLearners;
     foreach (var learner in AvailableLearners)
     {
         AutoMlUtils.PopulateSweepableParams(learner);
     }
 }
        private TransformInference.SuggestedTransform[] SampleTransforms(out long transformsBitMask)
        {
            // For now, return all transforms.
            var sampledTransforms = AvailableTransforms.ToList();

            transformsBitMask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

            // Add final features concat transform.
            sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                         DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms));

            return(sampledTransforms.ToArray());
        }
        /// <summary>
        /// Runs a train-test experiment on the current pipeline, through entrypoints.
        /// </summary>
        public void RunTrainTestExperiment(IDataView trainData, IDataView testData,
                                           SupportedMetric metric, MacroUtils.TrainerKinds trainerKind, out double testMetricValue,
                                           out double trainMetricValue)
        {
            var experiment = CreateTrainTestExperiment(trainData, testData, trainerKind, true, out var trainTestOutput);

            experiment.Run();

            var dataOut         = experiment.GetOutput(trainTestOutput.OverallMetrics);
            var dataOutTraining = experiment.GetOutput(trainTestOutput.TrainingOverallMetrics);

            testMetricValue  = AutoMlUtils.ExtractValueFromIdv(_env, dataOut, metric.Name);
            trainMetricValue = AutoMlUtils.ExtractValueFromIdv(_env, dataOutTraining, metric.Name);
        }
Esempio n. 7
0
        public TransformPipelineNode(CommonInputs.ITransformInput entryPointObj,
                                     IEnumerable <TlcModule.SweepableParamAttribute> sweepParams = null,
                                     CommonInputs.ITrainerInput subTrainerObj = null)
        {
            var newEp = CloneEntryPoint(entryPointObj);

            _entryPointObj = newEp ?? entryPointObj;
            if (subTrainerObj != null)
            {
                _subTrainerObj = CloneEntryPoint(subTrainerObj);
            }
            SweepParams = sweepParams?.Select(p => p.Clone()).ToArray() ??
                          AutoMlUtils.GetSweepRanges(_entryPointObj.GetType());
        }
Esempio n. 8
0
        public TrainerPipelineNode(CommonInputs.ITrainerInput entryPointObj,
                                   IEnumerable <TlcModule.SweepableParamAttribute> sweepParams = null,
                                   ParameterSet hyperParameterSet = null)
        {
            var newEp = CloneEntryPoint(entryPointObj);

            _entryPointObj = newEp ?? entryPointObj;
            SweepParams    = sweepParams?.Select(p => p.Clone()).ToArray() ??
                             AutoMlUtils.GetSweepRanges(_entryPointObj.GetType());
            HyperSweeperParamSet = hyperParameterSet?.Clone();

            // Make sure sweep params and param set are consistent.
            if (HyperSweeperParamSet != null)
            {
                PropagateParamSetValues(HyperSweeperParamSet, SweepParams);
                UpdateProperties();
            }
        }
        protected double[] LearnerHistoryToWeights(PipelinePattern[] history, bool isMaximizingMetric)
        {
            int numLearners = AvailableLearners.Length;

            double[] weights = new double[numLearners];
            int[]    counts  = new int[numLearners];
            Dictionary <string, int> labelToIndex = new Dictionary <string, int>();
            double maxWeight = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d;

            // Map categorical values to their index
            for (int j = 0; j < numLearners; j++)
            {
                labelToIndex[AvailableLearners[j].LearnerName] = j;
            }

            // Add mass according to performance
            foreach (var pipeline in history)
            {
                if (AvailableLearners.All(l => l.LearnerName != pipeline.Learner.LearnerName))
                {
                    continue;
                }
                weights[labelToIndex[pipeline.Learner.LearnerName]] +=
                    AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue,
                                              maxWeight, isMaximizingMetric);
                counts[labelToIndex[pipeline.Learner.LearnerName]]++;
            }

            // Take average mass for each learner
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] /= counts[i] > 0 ? counts[i] : 1;
            }

            // If any learner has not been seen, default its average to 1.0
            // to encourage exploration of untried algorithms.
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] += counts[i] == 0 ? 1 : 0;
            }

            // Normalize weights to sum to one and return
            return(SweeperProbabilityUtils.Normalize(weights));
        }
        public virtual void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
                                     RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
                                     Func <PipelinePattern, long, bool> pipelineVerifier,
                                     IDataView originalData, IDataView fullyTransformedData, AutoInference.DependencyMap dependencyMapping,
                                     bool isMaximizingMetric)
        {
            AvailableLearners    = availableLearners;
            AvailableTransforms  = availableTransforms;
            PipelineVerifier     = pipelineVerifier;
            OriginalData         = originalData;
            FullyTransformedData = fullyTransformedData;
            DependencyMapping    = dependencyMapping;
            IsMaximizingMetric   = isMaximizingMetric;

            foreach (var learner in AvailableLearners)
            {
                AutoMlUtils.PopulateSweepableParams(learner);
            }
        }
Esempio n. 11
0
        public override PipelinePattern[] GetNextCandidates(IEnumerable <PipelinePattern> history, int numCandidates, RoleMappedData dataRoles)
        {
            var candidates = new List <PipelinePattern>();

            DataRoles = dataRoles;

            while (candidates.Count < numCandidates)
            {
                Contracts.Assert(0 <= _currentLearnerIndex && _currentLearnerIndex < AvailableLearners.Length);

                // Select hyperparameters and transforms based on learner and history.
                PipelinePattern pipeline;
                int             count = 0;
                bool            valid;
                var             learner = AvailableLearners[_currentLearnerIndex];

                // Make sure sweep paramater values exist; if not, populate them from learner object.
                if (learner.PipelineNode.SweepParams.Any(p => p.RawValue == null))
                {
                    AutoMlUtils.PopulateSweepableParams(learner);
                }

                do
                {   // Make sure transforms set is valid. Repeat until passes verifier.
                    pipeline = new PipelinePattern(SampleTransforms(out var transformsBitMask),
                                                   learner, "", Env);
                    valid = PipelineVerifier(pipeline, transformsBitMask);
                    count++;
                } while (!valid && count <= 1000);

                // Keep only valid pipelines.
                if (valid)
                {
                    candidates.Add(pipeline);
                }

                // Update current index
                _currentLearnerIndex = (_currentLearnerIndex + 1) % AvailableLearners.Length;
            }

            return(candidates.ToArray());
        }
        protected void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, ISweeper sweeper,
                                             bool isMaximizingMetric, PipelinePattern[] history)
        {
            // Make sure there are hyperparameters to sweep over.
            var hyperParams = learner.PipelineNode.SweepParams;

            if (hyperParams.Length == 0)
            {
                return;
            }

            // Get new set of hyperparameter values.
            var proposedParamSet = sweeper.ProposeSweeps(1, AutoMlUtils.ConvertToRunResults(history, isMaximizingMetric)).First();

            Env.Assert(proposedParamSet != null && proposedParamSet.All(ps => hyperParams.Any(hp => hp.Name == ps.Name)));

            // Associate proposed param set with learner, so that smart hyperparam
            // sweepers (like KDO) can map them back.
            learner.PipelineNode.HyperSweeperParamSet = proposedParamSet;

            var generatorSet = hyperParams.Select(AutoMlUtils.ToIValueGenerator).ToArray();
            var values       = SweeperProbabilityUtils.ParameterSetAsFloatArray(Host, generatorSet, proposedParamSet, false);

            // Update hyperparameters.
            for (int i = 0; i < hyperParams.Length; i++)
            {
                if (hyperParams[i] is TlcModule.SweepableDiscreteParamAttribute dp)
                {
                    hyperParams[i].RawValue = (int)values[i];
                }
                else
                {
                    hyperParams[i].RawValue = values[i];
                }
            }
        }
Esempio n. 13
0
        private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandidates,
                                                 bool defaultHyperParams = false, bool uniformRandomTransforms = false)
        {
            const int maxNumberAttempts = 10;

            double[] learnerWeights  = LearnerHistoryToWeights(history, IsMaximizingMetric);
            var      candidates      = new List <PipelinePattern>();
            var      sampledLearners = new RecipeInference.SuggestedRecipe.SuggestedLearner[numCandidates];

            if (_currentStage == (int)Stages.Second || _currentStage == (int)Stages.Third)
            {
                // Select remaining learners in round-robin fashion.
                for (int i = 0; i < numCandidates; i++)
                {
                    sampledLearners[i] = AvailableLearners[i % AvailableLearners.Length].Clone();
                }
            }
            else
            {
                // Select learners, based on weights.
                var indices = ProbUtils.SampleCategoricalDistribution(numCandidates, learnerWeights);
                foreach (var item in indices.Select((idx, i) => new { idx, i }))
                {
                    sampledLearners[item.i] = AvailableLearners[item.idx].Clone();
                }
            }

            // Select hyperparameters and transforms based on learner and history.
            foreach (var learner in sampledLearners)
            {
                PipelinePattern pipeline;
                int             count = 0;
                bool            valid;
                string          hashKey;

                if (!defaultHyperParams)
                {
                    SampleHyperparameters(learner, history);
                }
                else
                {
                    AutoMlUtils.PopulateSweepableParams(learner);
                }

                do
                {   // Make sure transforms set is valid and have not seen pipeline before.
                    // Repeat until passes or runs out of chances.
                    pipeline = new PipelinePattern(
                        SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms),
                        learner, "", Env);
                    hashKey = GetHashKey(transformsBitMask, learner);
                    valid   = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey);
                    count++;
                } while (!valid && count <= maxNumberAttempts);

                // If maxed out chances and at second stage, move onto next stage.
                if (count >= maxNumberAttempts && _currentStage == (int)Stages.Second)
                {
                    _currentStage++;
                }

                // Keep only valid pipelines.
                if (valid)
                {
                    VisitedPipelines.Add(hashKey);
                    candidates.Add(pipeline);
                }
            }

            return(candidates.ToArray());
        }
Esempio n. 14
0
        private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference.SuggestedRecipe.SuggestedLearner learner,
                                                                         PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false)
        {
            var sampledTransforms =
                new List <TransformInference.SuggestedTransform>(
                    AutoMlUtils.GetMandatoryTransforms(AvailableTransforms));
            var remainingAvailableTransforms =
                AvailableTransforms.Where(t => !sampledTransforms.Any(t.Equals)).ToArray();
            var mask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

            foreach (var transform in remainingAvailableTransforms)
            {
                // Add pseudo-mass to encourage sampling of untried transforms.
                double maxWeight     = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d;
                double allWeight     = Math.Max(maxWeight, 1d);
                double learnerWeight = Math.Max(maxWeight, 1d);
                int    allCounts     = 1;
                int    learnerCounts = 1;

                // Add mass according to performance.
                foreach (var pipeline in history)
                {
                    if (pipeline.Transforms.Any(transform.Equals))
                    {
                        allWeight +=
                            AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue,
                                                      maxWeight, IsMaximizingMetric);
                        allCounts++;

                        if (pipeline.Learner.LearnerName == learner.LearnerName)
                        {
                            learnerWeight += pipeline.PerformanceSummary.MetricValue;
                            learnerCounts++;
                        }
                    }
                }

                // Take average mass as weight, and take convex combination of
                // learner-specific weight and unconditioned weight.
                allWeight     /= allCounts > 0 ? allCounts : 1;
                learnerWeight /= learnerCounts > 0 ? learnerCounts : 1;
                var lambda         = MathUtils.Sigmoid(learnerCounts - 3);
                var combinedWeight = uniformRandomSampling ?
                                     0.5 : lambda * learnerWeight + (1 - lambda) * allWeight;

                // Sample transform according to combined weight.
                if (ProbUtils.SampleUniform() <= combinedWeight / maxWeight)
                {
                    mask |= 1L << transform.AtomicGroupId;
                }
            }

            // Add all chosen transforms.
            sampledTransforms.AddRange(remainingAvailableTransforms.Where(t =>
                                                                          AutoMlUtils.AtomicGroupPresent(mask, t.AtomicGroupId)));

            // Add final features concat transform. NOTE: computed bitmask should always
            // exclude the final features concat. If we forget to exclude that one, will
            // cause an error in verification, since it isn't included in the original
            // dependency mapping (i.e., its level isn't in the dictionary).
            sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                         DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles));
            transformsBitMask = mask;

            return(sampledTransforms.ToArray());
        }
            /// <summary>
            /// Search space is transforms X learners X hyperparameters.
            /// </summary>
            private void ComputeSearchSpace(int numTransformLevels, RecipeInference.SuggestedRecipe.SuggestedLearner[] learners,
                                            Func <IDataView, TransformInference.Arguments, TransformInference.SuggestedTransform[]> transformInferenceFunction)
            {
                _env.AssertValue(_trainData, nameof(_trainData), "Must set training data prior to inferring search space.");

                var h = _env.Register("ComputeSearchSpace");

                using (var ch = h.Start("ComputeSearchSpace"))
                {
                    _env.Check(IsValidLearnerSet(learners), "Unsupported learner encountered, cannot update search space.");

                    var dataSample    = _trainData;
                    var inferenceArgs = new TransformInference.Arguments
                    {
                        EstimatedSampleFraction         = 1.0,
                        ExcludeFeaturesConcatTransforms = true
                    };

                    // Initialize structure for mapping columns back to specific transforms
                    var dependencyMapping = new DependencyMap
                    {
                        { 0, AutoMlUtils.ComputeColumnResponsibilities(dataSample, new TransformInference.SuggestedTransform[0]) }
                    };

                    // Get suggested transforms for all levels. Defines another part of search space.
                    var transformsList = new List <TransformInference.SuggestedTransform>();
                    for (int i = 0; i < numTransformLevels; i++)
                    {
                        // Update level for transforms
                        inferenceArgs.Level = i + 1;

                        // Infer transforms using experts
                        var levelTransforms = transformInferenceFunction(dataSample, inferenceArgs);

                        // If no more transforms to apply, dataSample won't change. So end loop.
                        if (levelTransforms.Length == 0)
                        {
                            break;
                        }

                        // Make sure we don't overflow our bitmask
                        if (levelTransforms.Max(t => t.AtomicGroupId) > 64)
                        {
                            break;
                        }

                        // Level-up atomic group id offset.
                        inferenceArgs.AtomicIdOffset = levelTransforms.Max(t => t.AtomicGroupId) + 1;

                        // Apply transforms to dataview for this level.
                        dataSample = AutoMlUtils.ApplyTransformSet(_env, dataSample, levelTransforms);

                        // Keep list of which transforms can be responsible for which output columns
                        dependencyMapping.Add(inferenceArgs.Level,
                                              AutoMlUtils.ComputeColumnResponsibilities(dataSample, levelTransforms));
                        transformsList.AddRange(levelTransforms);
                    }

                    var transforms = transformsList.ToArray();
                    Func <PipelinePattern, long, bool> verifier = AutoMlUtils.ValidationWrapper(transforms, dependencyMapping);

                    // Save state, for resuming learning
                    _availableTransforms = transforms;
                    _availableLearners   = learners;
                    _dependencyMapping   = dependencyMapping;
                    _transformedData     = dataSample;

                    // Update autoML engine to know what the search space looks like
                    AutoMlEngine.SetSpace(_availableTransforms, _availableLearners, verifier,
                                          _trainData, _transformedData, _dependencyMapping, Metric.IsMaximizing);

                    ch.Done();
                }
            }
        private PipelinePattern[] GetRandomPipelines(int numOfPipelines)
        {
            Host.Check(AvailableLearners.All(l => l.PipelineNode != null));
            Host.Check(AvailableTransforms.All(t => t.PipelineNode != null));
            int atomicGroupLimit = AvailableTransforms.Select(t => t.AtomicGroupId)
                                   .DefaultIfEmpty(-1).Max() + 1;
            var pipelines  = new List <PipelinePattern>();
            int collisions = 0;
            int totalCount = 0;

            while (pipelines.Count < numOfPipelines)
            {
                // Generate random bitmask (set of transform atomic group IDs)
                long transformsBitMask = Host.Rand.Next((int)Math.Pow(2, atomicGroupLimit));

                // Include all "always on" transforms, such as autolabel.
                transformsBitMask |= AutoMlUtils.IncludeMandatoryTransforms(AvailableTransforms.ToList());

                // Get actual learner and transforms for pipeline
                var selectedLearner    = AvailableLearners[Host.Rand.Next(AvailableLearners.Length)];
                var selectedTransforms = AvailableTransforms.Where(t =>
                                                                   AutoMlUtils.AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToList();

                // Randomly change transform sweepable hyperparameter settings
                selectedTransforms.ForEach(t => RandomlyPerturbSweepableHyperparameters(t.PipelineNode));

                // Randomly change learner sweepable hyperparameter settings
                RandomlyPerturbSweepableHyperparameters(selectedLearner.PipelineNode);

                // Always include features concat transform
                selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                              DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, DataRoles));

                // Compute hash key for checking if we've already seen this pipeline.
                // However, if we keep missing, don't want to get stuck in infinite loop.
                // Try for a good number of times (for example, numOfPipelines * 4), then just add
                // all generated pipelines to get us out of rut.
                string hashKey = GetHashKey(transformsBitMask, selectedLearner);
                if (collisions < numOfPipelines * 4 && VisitedPipelines.Contains(hashKey))
                {
                    collisions++;
                    continue;
                }

                VisitedPipelines.Add(hashKey);
                collisions = 0;
                totalCount++;

                // Keep pipeline if valid
                var pipeline = new PipelinePattern(selectedTransforms.ToArray(), selectedLearner, "", Env);
                if (!TransformsMaskValidity.ContainsKey(transformsBitMask))
                {
                    TransformsMaskValidity.Add(transformsBitMask, PipelineVerifier(pipeline, transformsBitMask));
                }
                if (TransformsMaskValidity[transformsBitMask])
                {
                    pipelines.Add(pipeline);
                }

                // Only invalid pipelines available, stuck in loop.
                // Break out and return no pipelines.
                if (totalCount > numOfPipelines * 10)
                {
                    break;
                }
            }

            return(pipelines.ToArray());
        }
Esempio n. 17
0
 public bool CheckEntryPointStateMatchesParamValues() => AutoMlUtils.CheckEntryPointStateMatchesParamValues(_entryPointObj, SweepParams);
Esempio n. 18
0
 public bool UpdateProperties() => AutoMlUtils.UpdateProperties(_entryPointObj, SweepParams);