private TransformInference.SuggestedTransform[] SampleTransforms(RecipeInference.SuggestedRecipe.SuggestedLearner learner, PipelinePattern[] history, out long transformsBitMask, bool uniformRandomSampling = false) { var sampledTransforms = new List <TransformInference.SuggestedTransform>( AutoMlUtils.GetMandatoryTransforms(AvailableTransforms)); var remainingAvailableTransforms = AvailableTransforms.Where(t => !sampledTransforms.Any(t.Equals)).ToArray(); var mask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray()); foreach (var transform in remainingAvailableTransforms) { // Add pseudo-mass to encourage sampling of untried transforms. double maxWeight = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d; double allWeight = Math.Max(maxWeight, 1d); double learnerWeight = Math.Max(maxWeight, 1d); int allCounts = 1; int learnerCounts = 1; // Add mass according to performance. foreach (var pipeline in history) { if (pipeline.Transforms.Any(transform.Equals)) { allWeight += AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue, maxWeight, IsMaximizingMetric); allCounts++; if (pipeline.Learner.LearnerName == learner.LearnerName) { learnerWeight += pipeline.PerformanceSummary.MetricValue; learnerCounts++; } } } // Take average mass as weight, and take convex combination of // learner-specific weight and unconditioned weight. allWeight /= allCounts > 0 ? allCounts : 1; learnerWeight /= learnerCounts > 0 ? learnerCounts : 1; var lambda = MathUtils.Sigmoid(learnerCounts - 3); var combinedWeight = uniformRandomSampling ? 0.5 : lambda * learnerWeight + (1 - lambda) * allWeight; // Sample transform according to combined weight. if (ProbUtils.SampleUniform() <= combinedWeight / maxWeight) { mask |= 1L << transform.AtomicGroupId; } } // Add all chosen transforms. sampledTransforms.AddRange(remainingAvailableTransforms.Where(t => AutoMlUtils.AtomicGroupPresent(mask, t.AtomicGroupId))); // Add final features concat transform. NOTE: computed bitmask should always // exclude the final features concat. If we forget to exclude that one, will // cause an error in verification, since it isn't included in the original // dependency mapping (i.e., its level isn't in the dictionary). sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData, DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms, DataRoles)); transformsBitMask = mask; return(sampledTransforms.ToArray()); }
private PipelinePattern[] GetRandomPipelines(int numOfPipelines) { Host.Check(AvailableLearners.All(l => l.PipelineNode != null)); Host.Check(AvailableTransforms.All(t => t.PipelineNode != null)); int atomicGroupLimit = AvailableTransforms.Select(t => t.AtomicGroupId) .DefaultIfEmpty(-1).Max() + 1; var pipelines = new List <PipelinePattern>(); int collisions = 0; int totalCount = 0; while (pipelines.Count < numOfPipelines) { // Generate random bitmask (set of transform atomic group IDs) long transformsBitMask = Host.Rand.Next((int)Math.Pow(2, atomicGroupLimit)); // Include all "always on" transforms, such as autolabel. transformsBitMask |= AutoMlUtils.IncludeMandatoryTransforms(AvailableTransforms.ToList()); // Get actual learner and transforms for pipeline var selectedLearner = AvailableLearners[Host.Rand.Next(AvailableLearners.Length)]; var selectedTransforms = AvailableTransforms.Where(t => AutoMlUtils.AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToList(); // Randomly change transform sweepable hyperparameter settings selectedTransforms.ForEach(t => RandomlyPerturbSweepableHyperparameters(t.PipelineNode)); // Randomly change learner sweepable hyperparameter settings RandomlyPerturbSweepableHyperparameters(selectedLearner.PipelineNode); // Always include features concat transform selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData, DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, DataRoles)); // Compute hash key for checking if we've already seen this pipeline. // However, if we keep missing, don't want to get stuck in infinite loop. // Try for a good number of times (for example, numOfPipelines * 4), then just add // all generated pipelines to get us out of rut. string hashKey = GetHashKey(transformsBitMask, selectedLearner); if (collisions < numOfPipelines * 4 && VisitedPipelines.Contains(hashKey)) { collisions++; continue; } VisitedPipelines.Add(hashKey); collisions = 0; totalCount++; // Keep pipeline if valid var pipeline = new PipelinePattern(selectedTransforms.ToArray(), selectedLearner, "", Env); if (!TransformsMaskValidity.ContainsKey(transformsBitMask)) { TransformsMaskValidity.Add(transformsBitMask, PipelineVerifier(pipeline, transformsBitMask)); } if (TransformsMaskValidity[transformsBitMask]) { pipelines.Add(pipeline); } // Only invalid pipelines available, stuck in loop. // Break out and return no pipelines. if (totalCount > numOfPipelines * 10) { break; } } return(pipelines.ToArray()); }