private TransformInference.SuggestedTransform[] SampleTransforms(out long transformsBitMask)
        {
            // For now, return all transforms.
            var sampledTransforms = AvailableTransforms.ToList();

            transformsBitMask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

            // Add final features concat transform.
            sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                         DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms));

            return(sampledTransforms.ToArray());
        }
Esempio n. 2
0
        private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference.SuggestedRecipe.SuggestedLearner learner,
                                                                         PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false)
        {
            var sampledTransforms =
                new List <TransformInference.SuggestedTransform>(
                    AutoMlUtils.GetMandatoryTransforms(AvailableTransforms));
            var remainingAvailableTransforms =
                AvailableTransforms.Where(t => !sampledTransforms.Any(t.Equals)).ToArray();
            var mask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray());

            foreach (var transform in remainingAvailableTransforms)
            {
                // Add pseudo-mass to encourage sampling of untried transforms.
                double maxWeight     = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d;
                double allWeight     = Math.Max(maxWeight, 1d);
                double learnerWeight = Math.Max(maxWeight, 1d);
                int    allCounts     = 1;
                int    learnerCounts = 1;

                // Add mass according to performance.
                foreach (var pipeline in history)
                {
                    if (pipeline.Transforms.Any(transform.Equals))
                    {
                        allWeight +=
                            AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue,
                                                      maxWeight, IsMaximizingMetric);
                        allCounts++;

                        if (pipeline.Learner.LearnerName == learner.LearnerName)
                        {
                            learnerWeight += pipeline.PerformanceSummary.MetricValue;
                            learnerCounts++;
                        }
                    }
                }

                // Take average mass as weight, and take convex combination of
                // learner-specific weight and unconditioned weight.
                allWeight     /= allCounts > 0 ? allCounts : 1;
                learnerWeight /= learnerCounts > 0 ? learnerCounts : 1;
                var lambda         = MathUtils.Sigmoid(learnerCounts - 3);
                var combinedWeight = uniformRandomSampling ?
                                     0.5 : lambda * learnerWeight + (1 - lambda) * allWeight;

                // Sample transform according to combined weight.
                if (ProbUtils.SampleUniform() <= combinedWeight / maxWeight)
                {
                    mask |= 1L << transform.AtomicGroupId;
                }
            }

            // Add all chosen transforms.
            sampledTransforms.AddRange(remainingAvailableTransforms.Where(t =>
                                                                          AutoMlUtils.AtomicGroupPresent(mask, t.AtomicGroupId)));

            // Add final features concat transform. NOTE: computed bitmask should always
            // exclude the final features concat. If we forget to exclude that one, will
            // cause an error in verification, since it isn't included in the original
            // dependency mapping (i.e., its level isn't in the dictionary).
            sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                         DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles));
            transformsBitMask = mask;

            return(sampledTransforms.ToArray());
        }
        private PipelinePattern[] GetRandomPipelines(int numOfPipelines)
        {
            Host.Check(AvailableLearners.All(l => l.PipelineNode != null));
            Host.Check(AvailableTransforms.All(t => t.PipelineNode != null));
            int atomicGroupLimit = AvailableTransforms.Select(t => t.AtomicGroupId)
                                   .DefaultIfEmpty(-1).Max() + 1;
            var pipelines  = new List <PipelinePattern>();
            int collisions = 0;
            int totalCount = 0;

            while (pipelines.Count < numOfPipelines)
            {
                // Generate random bitmask (set of transform atomic group IDs)
                long transformsBitMask = Host.Rand.Next((int)Math.Pow(2, atomicGroupLimit));

                // Include all "always on" transforms, such as autolabel.
                transformsBitMask |= AutoMlUtils.IncludeMandatoryTransforms(AvailableTransforms.ToList());

                // Get actual learner and transforms for pipeline
                var selectedLearner    = AvailableLearners[Host.Rand.Next(AvailableLearners.Length)];
                var selectedTransforms = AvailableTransforms.Where(t =>
                                                                   AutoMlUtils.AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToList();

                // Randomly change transform sweepable hyperparameter settings
                selectedTransforms.ForEach(t => RandomlyPerturbSweepableHyperparameters(t.PipelineNode));

                // Randomly change learner sweepable hyperparameter settings
                RandomlyPerturbSweepableHyperparameters(selectedLearner.PipelineNode);

                // Always include features concat transform
                selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData,
                                                                              DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, DataRoles));

                // Compute hash key for checking if we've already seen this pipeline.
                // However, if we keep missing, don't want to get stuck in infinite loop.
                // Try for a good number of times (for example, numOfPipelines * 4), then just add
                // all generated pipelines to get us out of rut.
                string hashKey = GetHashKey(transformsBitMask, selectedLearner);
                if (collisions < numOfPipelines * 4 && VisitedPipelines.Contains(hashKey))
                {
                    collisions++;
                    continue;
                }

                VisitedPipelines.Add(hashKey);
                collisions = 0;
                totalCount++;

                // Keep pipeline if valid
                var pipeline = new PipelinePattern(selectedTransforms.ToArray(), selectedLearner, "", Env);
                if (!TransformsMaskValidity.ContainsKey(transformsBitMask))
                {
                    TransformsMaskValidity.Add(transformsBitMask, PipelineVerifier(pipeline, transformsBitMask));
                }
                if (TransformsMaskValidity[transformsBitMask])
                {
                    pipelines.Add(pipeline);
                }

                // Only invalid pipelines available, stuck in loop.
                // Break out and return no pipelines.
                if (totalCount > numOfPipelines * 10)
                {
                    break;
                }
            }

            return(pipelines.ToArray());
        }