Beispiel #1
0
        private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandidates,
                                                 bool defaultHyperParams = false, bool uniformRandomTransforms = false)
        {
            const int maxNumberAttempts = 10;

            double[] learnerWeights  = LearnerHistoryToWeights(history, IsMaximizingMetric);
            var      candidates      = new List <PipelinePattern>();
            var      sampledLearners = new RecipeInference.SuggestedRecipe.SuggestedLearner[numCandidates];

            if (_currentStage == (int)Stages.Second || _currentStage == (int)Stages.Third)
            {
                // Select remaining learners in round-robin fashion.
                for (int i = 0; i < numCandidates; i++)
                {
                    sampledLearners[i] = AvailableLearners[i % AvailableLearners.Length].Clone();
                }
            }
            else
            {
                // Select learners, based on weights.
                var indices = ProbUtils.SampleCategoricalDistribution(numCandidates, learnerWeights);
                foreach (var item in indices.Select((idx, i) => new { idx, i }))
                {
                    sampledLearners[item.i] = AvailableLearners[item.idx].Clone();
                }
            }

            // Select hyperparameters and transforms based on learner and history.
            foreach (var learner in sampledLearners)
            {
                PipelinePattern pipeline;
                int             count = 0;
                bool            valid;
                string          hashKey;

                if (!defaultHyperParams)
                {
                    SampleHyperparameters(learner, history);
                }
                else
                {
                    AutoMlUtils.PopulateSweepableParams(learner);
                }

                do
                {   // Make sure transforms set is valid and have not seen pipeline before.
                    // Repeat until passes or runs out of chances.
                    pipeline = new PipelinePattern(
                        SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms),
                        learner, "", Env);
                    hashKey = GetHashKey(transformsBitMask, learner);
                    valid   = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey);
                    count++;
                } while (!valid && count <= maxNumberAttempts);

                // If maxed out chances and at second stage, move onto next stage.
                if (count >= maxNumberAttempts && _currentStage == (int)Stages.Second)
                {
                    _currentStage++;
                }

                // Keep only valid pipelines.
                if (valid)
                {
                    VisitedPipelines.Add(hashKey);
                    candidates.Add(pipeline);
                }
            }

            return(candidates.ToArray());
        }
Beispiel #2
0
        private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference.SuggestedRecipe.SuggestedLearner learner,
                                                                         PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false)
        {
            var sampledTransforms =
                new List <TransformInference.SuggestedTransform>(
                    AutoMlUtils.GetMandatoryTransforms(AvailableTransforms));
            var remainingAvailableTransforms =
                AvailableTransforms.Where(t => !sampledTransforms.Any(t.Equals)).ToArray();
            var mask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

            foreach (var transform in remainingAvailableTransforms)
            {
                // Add pseudo-mass to encourage sampling of untried transforms.
                double maxWeight     = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d;
                double allWeight     = Math.Max(maxWeight, 1d);
                double learnerWeight = Math.Max(maxWeight, 1d);
                int    allCounts     = 1;
                int    learnerCounts = 1;

                // Add mass according to performance.
                foreach (var pipeline in history)
                {
                    if (pipeline.Transforms.Any(transform.Equals))
                    {
                        allWeight +=
                            AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue,
                                                      maxWeight, IsMaximizingMetric);
                        allCounts++;

                        if (pipeline.Learner.LearnerName == learner.LearnerName)
                        {
                            learnerWeight += pipeline.PerformanceSummary.MetricValue;
                            learnerCounts++;
                        }
                    }
                }

                // Take average mass as weight, and take convex combination of
                // learner-specific weight and unconditioned weight.
                allWeight     /= allCounts > 0 ? allCounts : 1;
                learnerWeight /= learnerCounts > 0 ? learnerCounts : 1;
                var lambda         = MathUtils.Sigmoid(learnerCounts - 3);
                var combinedWeight = uniformRandomSampling ?
                                     0.5 : lambda * learnerWeight + (1 - lambda) * allWeight;

                // Sample transform according to combined weight.
                if (ProbUtils.SampleUniform() <= combinedWeight / maxWeight)
                {
                    mask |= 1L << transform.AtomicGroupId;
                }
            }

            // Add all chosen transforms.
            sampledTransforms.AddRange(remainingAvailableTransforms.Where(t =>
                                                                          AutoMlUtils.AtomicGroupPresent(mask, t.AtomicGroupId)));

            // Add final features concat transform. NOTE: computed bitmask should always
            // exclude the final features concat. If we forget to exclude that one, will
            // cause an error in verification, since it isn't included in the original
            // dependency mapping (i.e., its level isn't in the dictionary).
            sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                         DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles));
            transformsBitMask = mask;

            return(sampledTransforms.ToArray());
        }