public PipelinePattern[] GetNextCandidates(int numberOfCandidates) { if (_terminator.ShouldTerminate(_history)) { return new PipelinePattern[] { } } ; var currentBatchSize = numberOfCandidates; if (_terminator is IterationTerminator itr) { currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates); } BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize, _dataRoles); using (var ch = _host.Start("Suggested Pipeline")) { foreach (var pipeline in BatchCandidates) { ch.Info($"AutoInference Pipeline Id : {pipeline.UniqueId}"); foreach (var transform in pipeline.Transforms) { ch.Info($"AutoInference Transform : {transform.Transform}"); } ch.Info($"AutoInference Learner : {pipeline.Learner}"); } } return(BatchCandidates); }
public void KeepSelectedLearners(IEnumerable <string> learnersToKeep) { var allLearners = RecipeInference.AllowedLearners(_env, TrainerKind); _env.AssertNonEmpty(allLearners); _availableLearners = allLearners.Where(l => learnersToKeep.Contains(l.LearnerName)).ToArray(); AutoMlEngine.UpdateLearners(_availableLearners); }
public PipelinePattern[] GetNextCandidates(int numberOfCandidates) { if (_terminator.ShouldTerminate(_history)) { return new PipelinePattern[] { } } ; var currentBatchSize = numberOfCandidates; if (_terminator is IterationTerminator itr) { currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates); } BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize); return(BatchCandidates); }
private void MainLearningLoop(int batchSize, int numOfTrainingRows) { var stopwatch = new Stopwatch(); var probabilityUtils = new Sweeper.Algorithms.SweeperProbabilityUtils(_host); while (!_terminator.ShouldTerminate(_history)) { // Get next set of candidates var currentBatchSize = batchSize; if (_terminator is IterationTerminator itr) { currentBatchSize = Math.Min(itr.RemainingIterations(_history), batchSize); } var candidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Values, currentBatchSize); // Break if no candidates returned, means no valid pipeline available. if (candidates.Length == 0) { break; } // Evaluate them on subset of data foreach (var candidate in candidates) { try { ProcessPipeline(probabilityUtils, stopwatch, candidate, numOfTrainingRows); } catch (Exception) { stopwatch.Stop(); return; } } } }
/// <summary> /// Search space is transforms X learners X hyperparameters. /// </summary> private void ComputeSearchSpace(int numTransformLevels, RecipeInference.SuggestedRecipe.SuggestedLearner[] learners, Func <IDataView, TransformInference.Arguments, TransformInference.SuggestedTransform[]> transformInferenceFunction) { _env.AssertValue(_trainData, nameof(_trainData), "Must set training data prior to inferring search space."); var h = _env.Register("ComputeSearchSpace"); using (var ch = h.Start("ComputeSearchSpace")) { _env.Check(IsValidLearnerSet(learners), "Unsupported learner encountered, cannot update search space."); var dataSample = _trainData; var inferenceArgs = new TransformInference.Arguments { EstimatedSampleFraction = 1.0, ExcludeFeaturesConcatTransforms = true }; // Initialize structure for mapping columns back to specific transforms var dependencyMapping = new DependencyMap { { 0, AutoMlUtils.ComputeColumnResponsibilities(dataSample, new TransformInference.SuggestedTransform[0]) } }; // Get suggested transforms for all levels. Defines another part of search space. var transformsList = new List <TransformInference.SuggestedTransform>(); for (int i = 0; i < numTransformLevels; i++) { // Update level for transforms inferenceArgs.Level = i + 1; // Infer transforms using experts var levelTransforms = transformInferenceFunction(dataSample, inferenceArgs); // If no more transforms to apply, dataSample won't change. So end loop. if (levelTransforms.Length == 0) { break; } // Make sure we don't overflow our bitmask if (levelTransforms.Max(t => t.AtomicGroupId) > 64) { break; } // Level-up atomic group id offset. inferenceArgs.AtomicIdOffset = levelTransforms.Max(t => t.AtomicGroupId) + 1; // Apply transforms to dataview for this level. dataSample = AutoMlUtils.ApplyTransformSet(_env, dataSample, levelTransforms); // Keep list of which transforms can be responsible for which output columns dependencyMapping.Add(inferenceArgs.Level, AutoMlUtils.ComputeColumnResponsibilities(dataSample, levelTransforms)); transformsList.AddRange(levelTransforms); } var transforms = transformsList.ToArray(); Func <PipelinePattern, long, bool> verifier = AutoMlUtils.ValidationWrapper(transforms, dependencyMapping); // Save state, for resuming learning _availableTransforms = transforms; _availableLearners = learners; _dependencyMapping = dependencyMapping; _transformedData = dataSample; // Update autoML engine to know what the search space looks like AutoMlEngine.SetSpace(_availableTransforms, _availableLearners, verifier, _trainData, _transformedData, _dependencyMapping, Metric.IsMaximizing); ch.Done(); } }