private PipelinePattern[] NextCandidates(PipelinePattern[] history, int numCandidates, bool defaultHyperParams = false, bool uniformRandomTransforms = false) { const int maxNumberAttempts = 10; double[] learnerWeights = LearnerHistoryToWeights(history, IsMaximizingMetric); var candidates = new List <PipelinePattern>(); var sampledLearners = new RecipeInference.SuggestedRecipe.SuggestedLearner[numCandidates]; if (_currentStage == (int)Stages.Second || _currentStage == (int)Stages.Third) { // Select remaining learners in round-robin fashion. for (int i = 0; i < numCandidates; i++) { sampledLearners[i] = AvailableLearners[i % AvailableLearners.Length].Clone(); } } else { // Select learners, based on weights. var indices = ProbUtils.SampleCategoricalDistribution(numCandidates, learnerWeights); foreach (var item in indices.Select((idx, i) => new { idx, i })) { sampledLearners[item.i] = AvailableLearners[item.idx].Clone(); } } // Select hyperparameters and transforms based on learner and history. foreach (var learner in sampledLearners) { PipelinePattern pipeline; int count = 0; bool valid; string hashKey; if (!defaultHyperParams) { SampleHyperparameters(learner, history); } else { AutoMlUtils.PopulateSweepableParams(learner); } do { // Make sure transforms set is valid and have not seen pipeline before. // Repeat until passes or runs out of chances. pipeline = new PipelinePattern( SampleTransforms(learner, history, out var transformsBitMask, uniformRandomTransforms), learner, "", Env); hashKey = GetHashKey(transformsBitMask, learner); valid = PipelineVerifier(pipeline, transformsBitMask) && !VisitedPipelines.Contains(hashKey); count++; } while (!valid && count <= maxNumberAttempts); // If maxed out chances and at second stage, move onto next stage. if (count >= maxNumberAttempts && _currentStage == (int)Stages.Second) { _currentStage++; } // Keep only valid pipelines. if (valid) { VisitedPipelines.Add(hashKey); candidates.Add(pipeline); } } return(candidates.ToArray()); }
private PipelinePattern[] GetRandomPipelines(int numOfPipelines) { Host.Check(AvailableLearners.All(l => l.PipelineNode != null)); Host.Check(AvailableTransforms.All(t => t.PipelineNode != null)); int atomicGroupLimit = AvailableTransforms.Select(t => t.AtomicGroupId) .DefaultIfEmpty(-1).Max() + 1; var pipelines = new List <PipelinePattern>(); int collisions = 0; int totalCount = 0; while (pipelines.Count < numOfPipelines) { // Generate random bitmask (set of transform atomic group IDs) long transformsBitMask = Host.Rand.Next((int)Math.Pow(2, atomicGroupLimit)); // Include all "always on" transforms, such as autolabel. transformsBitMask |= AutoMlUtils.IncludeMandatoryTransforms(AvailableTransforms.ToList()); // Get actual learner and transforms for pipeline var selectedLearner = AvailableLearners[Host.Rand.Next(AvailableLearners.Length)]; var selectedTransforms = AvailableTransforms.Where(t => AutoMlUtils.AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToList(); // Randomly change transform sweepable hyperparameter settings selectedTransforms.ForEach(t => RandomlyPerturbSweepableHyperparameters(t.PipelineNode)); // Randomly change learner sweepable hyperparameter settings RandomlyPerturbSweepableHyperparameters(selectedLearner.PipelineNode); // Always include features concat transform selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData, DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, DataRoles)); // Compute hash key for checking if we've already seen this pipeline. // However, if we keep missing, don't want to get stuck in infinite loop. // Try for a good number of times (for example, numOfPipelines * 4), then just add // all generated pipelines to get us out of rut. string hashKey = GetHashKey(transformsBitMask, selectedLearner); if (collisions < numOfPipelines * 4 && VisitedPipelines.Contains(hashKey)) { collisions++; continue; } VisitedPipelines.Add(hashKey); collisions = 0; totalCount++; // Keep pipeline if valid var pipeline = new PipelinePattern(selectedTransforms.ToArray(), selectedLearner, "", Env); if (!TransformsMaskValidity.ContainsKey(transformsBitMask)) { TransformsMaskValidity.Add(transformsBitMask, PipelineVerifier(pipeline, transformsBitMask)); } if (TransformsMaskValidity[transformsBitMask]) { pipelines.Add(pipeline); } // Only invalid pipelines available, stuck in loop. // Break out and return no pipelines. if (totalCount > numOfPipelines * 10) { break; } } return(pipelines.ToArray()); }