예제 #1
0
        public IList <TRunDetail> Execute()
        {
            var stopwatch        = Stopwatch.StartNew();
            var iterationResults = new List <TRunDetail>();

            do
            {
                var iterationStopwatch = Stopwatch.StartNew();

                // get next pipeline
                var getPipelineStopwatch = Stopwatch.StartNew();
                var pipeline             = PipelineSuggester.GetNextInferredPipeline(_context, _history, _datasetColumnInfo, _task,
                                                                                     _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerAllowList);

                var pipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds;

                // break if no candidates returned, means no valid pipeline available
                if (pipeline == null)
                {
                    break;
                }

                // evaluate pipeline
                _logger.Trace($"Evaluating pipeline {pipeline.ToString()}");
                (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail)
                    = _runner.Run(pipeline, _modelDirectory, _history.Count + 1);

                _history.Add(suggestedPipelineRunDetail);
                WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch);

                runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds;
                runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds;

                ReportProgress(runDetail);
                iterationResults.Add(runDetail);

                // if model is perfect, break
                if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score))
                {
                    break;
                }

                // If after third run, all runs have failed so far, throw exception
                if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded))
                {
                    throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}");
                }
            } while (_history.Count < _experimentSettings.MaxModels &&
                     !_experimentSettings.CancellationToken.IsCancellationRequested &&
                     stopwatch.Elapsed.TotalSeconds < _experimentSettings.MaxExperimentTimeInSeconds);

            return(iterationResults);
        }
예제 #2
0
        private static bool IsPerfectModel <TMetrics>(IMetricsAgent <TMetrics> metricsAgent, TMetrics metrics)
        {
            var score = metricsAgent.GetScore(metrics);

            return(metricsAgent.IsModelPerfect(score));
        }
예제 #3
0
        public IList <TRunDetail> Execute()
        {
            var iterationResults = new List <TRunDetail>();

            // Create a timer for the max duration of experiment. When given time has
            // elapsed, MaxExperimentTimeExpiredEvent is called to interrupt training
            // of current model. Timer is not used if no experiment time is given, or
            // is not a positive number.
            if (_experimentSettings.MaxExperimentTimeInSeconds > 0)
            {
                _maxExperimentTimeTimer = new Timer(
                    new TimerCallback(MaxExperimentTimeExpiredEvent), null,
                    _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite
                    );
            }
            // If given max duration of experiment is 0, only 1 model will be trained.
            // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is
            // either 0 or >0.
            else
            {
                _experimentTimerExpired = true;
            }

            // Add second timer to check for the cancelation signal from the main MLContext
            // to the active child MLContext. This timer will propagate the cancelation
            // signal from the main to the child MLContexs if the main MLContext is
            // canceled.
            _mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000);

            // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to
            // maintain variability between training iterations.
            int?mainContextSeed = ((IHostEnvironmentInternal)_context.Model.GetEnvironment()).Seed;

            _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : null;

            do
            {
                try
                {
                    var iterationStopwatch = Stopwatch.StartNew();

                    // get next pipeline
                    var getPipelineStopwatch = Stopwatch.StartNew();

                    // A new MLContext is needed per model run. When max experiment time is reached, each used
                    // context is canceled to stop further model training. The cancellation of the main MLContext
                    // a user has instantiated is not desirable, thus additional MLContexts are used.
                    _currentModelMLContext      = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next());
                    _currentModelMLContext.Log += RelayCurrentContextLogsToLogger;
                    var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task,
                                                                             _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList);
                    // break if no candidates returned, means no valid pipeline available
                    if (pipeline == null)
                    {
                        break;
                    }

                    // evaluate pipeline
                    _logger.Trace($"Evaluating pipeline {pipeline.ToString()}");
                    (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail)
                        = _runner.Run(pipeline, _modelDirectory, _history.Count + 1);

                    _history.Add(suggestedPipelineRunDetail);
                    WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch);

                    runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds;
                    runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds;

                    ReportProgress(runDetail);
                    iterationResults.Add(runDetail);

                    // if model is perfect, break
                    if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score))
                    {
                        break;
                    }

                    // If after third run, all runs have failed so far, throw exception
                    if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded))
                    {
                        throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}");
                    }
                }
                catch (OperationCanceledException e)
                {
                    // This exception is thrown when the IHost/MLContext of the trainer is canceled due to
                    // reaching maximum experiment time. Simply catch this exception and return finished
                    // iteration results.
                    _logger.Warning(_operationCancelledMessage, e.Message);
                    return(iterationResults);
                }
                catch (AggregateException e)
                {
                    // This exception is thrown when the IHost/MLContext of the trainer is canceled due to
                    // reaching maximum experiment time. Simply catch this exception and return finished
                    // iteration results. For some trainers, like FastTree, because training is done in parallel
                    // in can throw multiple OperationCancelledExceptions. This causes them to be returned as an
                    // AggregateException and misses the first catch block. This is to handle that case.
                    if (e.InnerExceptions.All(exception => exception is OperationCanceledException))
                    {
                        _logger.Warning(_operationCancelledMessage, e.Message);
                        return(iterationResults);
                    }

                    throw;
                }
            } while (_history.Count < _experimentSettings.MaxModels &&
                     !_experimentSettings.CancellationToken.IsCancellationRequested &&
                     !_experimentTimerExpired);
            return(iterationResults);
        }