Пример #1
0
            public PipelinePattern[] GetNextCandidates(int numberOfCandidates)
            {
                if (_terminator.ShouldTerminate(_history))
                {
                    return new PipelinePattern[] { }
                }
                ;
                var currentBatchSize = numberOfCandidates;

                if (_terminator is IterationTerminator itr)
                {
                    currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates);
                }
                BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize, _dataRoles);

                using (var ch = _host.Start("Suggested Pipeline"))
                {
                    foreach (var pipeline in BatchCandidates)
                    {
                        ch.Info($"AutoInference Pipeline Id : {pipeline.UniqueId}");
                        foreach (var transform in pipeline.Transforms)
                        {
                            ch.Info($"AutoInference Transform : {transform.Transform}");
                        }
                        ch.Info($"AutoInference Learner : {pipeline.Learner}");
                    }
                }

                return(BatchCandidates);
            }
            public void KeepSelectedLearners(IEnumerable <string> learnersToKeep)
            {
                var allLearners = RecipeInference.AllowedLearners(_env, TrainerKind);

                _env.AssertNonEmpty(allLearners);
                _availableLearners = allLearners.Where(l => learnersToKeep.Contains(l.LearnerName)).ToArray();
                AutoMlEngine.UpdateLearners(_availableLearners);
            }
            public PipelinePattern[] GetNextCandidates(int numberOfCandidates)
            {
                if (_terminator.ShouldTerminate(_history))
                {
                    return new PipelinePattern[] { }
                }
                ;
                var currentBatchSize = numberOfCandidates;

                if (_terminator is IterationTerminator itr)
                {
                    currentBatchSize = Math.Min(itr.RemainingIterations(_history), numberOfCandidates);
                }
                BatchCandidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Select(kvp => kvp.Value), currentBatchSize);
                return(BatchCandidates);
            }
            private void MainLearningLoop(int batchSize, int numOfTrainingRows)
            {
                var stopwatch        = new Stopwatch();
                var probabilityUtils = new Sweeper.Algorithms.SweeperProbabilityUtils(_host);

                while (!_terminator.ShouldTerminate(_history))
                {
                    // Get next set of candidates
                    var currentBatchSize = batchSize;
                    if (_terminator is IterationTerminator itr)
                    {
                        currentBatchSize = Math.Min(itr.RemainingIterations(_history), batchSize);
                    }
                    var candidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Values, currentBatchSize);

                    // Break if no candidates returned, means no valid pipeline available.
                    if (candidates.Length == 0)
                    {
                        break;
                    }

                    // Evaluate them on subset of data
                    foreach (var candidate in candidates)
                    {
                        try
                        {
                            ProcessPipeline(probabilityUtils, stopwatch, candidate, numOfTrainingRows);
                        }
                        catch (Exception)
                        {
                            stopwatch.Stop();
                            return;
                        }
                    }
                }
            }
            /// <summary>
            /// Search space is transforms X learners X hyperparameters.
            /// </summary>
            private void ComputeSearchSpace(int numTransformLevels, RecipeInference.SuggestedRecipe.SuggestedLearner[] learners,
                                            Func <IDataView, TransformInference.Arguments, TransformInference.SuggestedTransform[]> transformInferenceFunction)
            {
                _env.AssertValue(_trainData, nameof(_trainData), "Must set training data prior to inferring search space.");

                var h = _env.Register("ComputeSearchSpace");

                using (var ch = h.Start("ComputeSearchSpace"))
                {
                    _env.Check(IsValidLearnerSet(learners), "Unsupported learner encountered, cannot update search space.");

                    var dataSample    = _trainData;
                    var inferenceArgs = new TransformInference.Arguments
                    {
                        EstimatedSampleFraction         = 1.0,
                        ExcludeFeaturesConcatTransforms = true
                    };

                    // Initialize structure for mapping columns back to specific transforms
                    var dependencyMapping = new DependencyMap
                    {
                        { 0, AutoMlUtils.ComputeColumnResponsibilities(dataSample, new TransformInference.SuggestedTransform[0]) }
                    };

                    // Get suggested transforms for all levels. Defines another part of search space.
                    var transformsList = new List <TransformInference.SuggestedTransform>();
                    for (int i = 0; i < numTransformLevels; i++)
                    {
                        // Update level for transforms
                        inferenceArgs.Level = i + 1;

                        // Infer transforms using experts
                        var levelTransforms = transformInferenceFunction(dataSample, inferenceArgs);

                        // If no more transforms to apply, dataSample won't change. So end loop.
                        if (levelTransforms.Length == 0)
                        {
                            break;
                        }

                        // Make sure we don't overflow our bitmask
                        if (levelTransforms.Max(t => t.AtomicGroupId) > 64)
                        {
                            break;
                        }

                        // Level-up atomic group id offset.
                        inferenceArgs.AtomicIdOffset = levelTransforms.Max(t => t.AtomicGroupId) + 1;

                        // Apply transforms to dataview for this level.
                        dataSample = AutoMlUtils.ApplyTransformSet(_env, dataSample, levelTransforms);

                        // Keep list of which transforms can be responsible for which output columns
                        dependencyMapping.Add(inferenceArgs.Level,
                                              AutoMlUtils.ComputeColumnResponsibilities(dataSample, levelTransforms));
                        transformsList.AddRange(levelTransforms);
                    }

                    var transforms = transformsList.ToArray();
                    Func <PipelinePattern, long, bool> verifier = AutoMlUtils.ValidationWrapper(transforms, dependencyMapping);

                    // Save state, for resuming learning
                    _availableTransforms = transforms;
                    _availableLearners   = learners;
                    _dependencyMapping   = dependencyMapping;
                    _transformedData     = dataSample;

                    // Update autoML engine to know what the search space looks like
                    AutoMlEngine.SetSpace(_availableTransforms, _availableLearners, verifier,
                                          _trainData, _transformedData, _dependencyMapping, Metric.IsMaximizing);

                    ch.Done();
                }
            }