public IList <TRunDetail> Execute() { var stopwatch = Stopwatch.StartNew(); var iterationResults = new List <TRunDetail>(); do { var iterationStopwatch = Stopwatch.StartNew(); // get next pipeline var getPipelineStopwatch = Stopwatch.StartNew(); var pipeline = PipelineSuggester.GetNextInferredPipeline(_context, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerAllowList); var pipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; // break if no candidates returned, means no valid pipeline available if (pipeline == null) { break; } // evaluate pipeline _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); _history.Add(suggestedPipelineRunDetail); WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; ReportProgress(runDetail); iterationResults.Add(runDetail); // if model is perfect, break if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) { break; } // If after third run, all runs have failed so far, throw exception if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) { throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); } } while (_history.Count < _experimentSettings.MaxModels && !_experimentSettings.CancellationToken.IsCancellationRequested && stopwatch.Elapsed.TotalSeconds < _experimentSettings.MaxExperimentTimeInSeconds); return(iterationResults); }
public IList <TRunDetail> Execute() { var iterationResults = new List <TRunDetail>(); // Create a timer for the max duration of experiment. When given time has // elapsed, MaxExperimentTimeExpiredEvent is called to interrupt training // of current model. Timer is not used if no experiment time is given, or // is not a positive number. if (_experimentSettings.MaxExperimentTimeInSeconds > 0) { _maxExperimentTimeTimer = new Timer( new TimerCallback(MaxExperimentTimeExpiredEvent), null, _experimentSettings.MaxExperimentTimeInSeconds * 1000, Timeout.Infinite ); } // If given max duration of experiment is 0, only 1 model will be trained. // _experimentSettings.MaxExperimentTimeInSeconds is of type uint, it is // either 0 or >0. else { _experimentTimerExpired = true; } // Add second timer to check for the cancelation signal from the main MLContext // to the active child MLContext. This timer will propagate the cancelation // signal from the main to the child MLContexs if the main MLContext is // canceled. _mainContextCanceledTimer = new Timer(new TimerCallback(MainContextCanceledEvent), null, 1000, 1000); // Pseudo random number generator to result in deterministic runs with the provided main MLContext's seed and to // maintain variability between training iterations. int?mainContextSeed = ((IHostEnvironmentInternal)_context.Model.GetEnvironment()).Seed; _newContextSeedGenerator = (mainContextSeed.HasValue) ? RandomUtils.Create(mainContextSeed.Value) : null; do { try { var iterationStopwatch = Stopwatch.StartNew(); // get next pipeline var getPipelineStopwatch = Stopwatch.StartNew(); // A new MLContext is needed per model run. When max experiment time is reached, each used // context is canceled to stop further model training. The cancellation of the main MLContext // a user has instantiated is not desirable, thus additional MLContexts are used. _currentModelMLContext = _newContextSeedGenerator == null ? new MLContext() : new MLContext(_newContextSeedGenerator.Next()); _currentModelMLContext.Log += RelayCurrentContextLogsToLogger; var pipeline = PipelineSuggester.GetNextInferredPipeline(_currentModelMLContext, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _logger, _trainerAllowList); // break if no candidates returned, means no valid pipeline available if (pipeline == null) { break; } // evaluate pipeline _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); _history.Add(suggestedPipelineRunDetail); WriteIterationLog(pipeline, suggestedPipelineRunDetail, iterationStopwatch); runDetail.RuntimeInSeconds = iterationStopwatch.Elapsed.TotalSeconds; runDetail.PipelineInferenceTimeInSeconds = getPipelineStopwatch.Elapsed.TotalSeconds; ReportProgress(runDetail); iterationResults.Add(runDetail); // if model is perfect, break if (_metricsAgent.IsModelPerfect(suggestedPipelineRunDetail.Score)) { break; } // If after third run, all runs have failed so far, throw exception if (_history.Count() == 3 && _history.All(r => !r.RunSucceeded)) { throw new InvalidOperationException($"Training failed with the exception: {_history.Last().Exception}"); } } catch (OperationCanceledException e) { // This exception is thrown when the IHost/MLContext of the trainer is canceled due to // reaching maximum experiment time. Simply catch this exception and return finished // iteration results. _logger.Warning(_operationCancelledMessage, e.Message); return(iterationResults); } catch (AggregateException e) { // This exception is thrown when the IHost/MLContext of the trainer is canceled due to // reaching maximum experiment time. Simply catch this exception and return finished // iteration results. For some trainers, like FastTree, because training is done in parallel // in can throw multiple OperationCancelledExceptions. This causes them to be returned as an // AggregateException and misses the first catch block. This is to handle that case. if (e.InnerExceptions.All(exception => exception is OperationCanceledException)) { _logger.Warning(_operationCancelledMessage, e.Message); return(iterationResults); } throw; } } while (_history.Count < _experimentSettings.MaxModels && !_experimentSettings.CancellationToken.IsCancellationRequested && !_experimentTimerExpired); return(iterationResults); }