public static Pipeline GetNextPipeline(MLContext context, IEnumerable <PipelineScore> history, DatasetColumnInfo[] columns, TaskKind task, bool isMaximizingMetric = true) { var inferredHistory = history.Select(r => SuggestedPipelineRunDetail.FromPipelineRunResult(context, r)); var nextInferredPipeline = GetNextInferredPipeline(context, inferredHistory, columns, task, isMaximizingMetric, CacheBeforeTrainer.Auto); return(nextInferredPipeline?.ToPipeline()); }
Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum) { var modelFileInfo = GetModelFileInfo(modelDirectory, iterationNum); var trainResult = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainData, _validData, _groupIdColumn, _labelColumn, _metricsAgent, _preprocessorTransform, modelFileInfo, _modelInputSchema, _logger); var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, trainResult.score, trainResult.exception == null, trainResult.metrics, trainResult.model, trainResult.exception); var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer); return(suggestedPipelineRunDetail, runDetail); }
Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum) { var trainResults = new List <(ModelContainer model, TMetrics metrics, Exception exception, double score)>(); for (var i = 0; i < _trainDatasets.Length; i++) { var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1); var trainResult = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i], _labelColumn, _metricsAgent, _preprocessorTransforms?.ElementAt(i), modelFileInfo, _modelInputSchema, _logger); trainResults.Add(trainResult); } var allRunsSucceeded = trainResults.All(r => r.exception == null); if (!allRunsSucceeded) { var firstException = trainResults.First(r => r.exception != null).exception; var errorRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, double.NaN, false, null, null, firstException); return(errorRunDetail, errorRunDetail.ToIterationResult(_preFeaturizer)); } // Get the model from the best fold var bestFoldIndex = BestResultUtil.GetIndexOfBestScore(trainResults.Select(r => r.score), _optimizingMetricInfo.IsMaximizing); // bestFoldIndex will be -1 if the optimization metric for all folds is NaN. // In this case, return model from the first fold. bestFoldIndex = bestFoldIndex != -1 ? bestFoldIndex : 0; var bestModel = trainResults.ElementAt(bestFoldIndex).model; // Get the average metrics across all folds var avgScore = GetAverageOfNonNaNScores(trainResults.Select(x => x.score)); var indexClosestToAvg = GetIndexClosestToAverage(trainResults.Select(r => r.score), avgScore); var metricsClosestToAvg = trainResults[indexClosestToAvg].metrics; var avgMetrics = GetAverageMetrics(trainResults.Select(x => x.metrics), metricsClosestToAvg); // Build result objects var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, avgScore, allRunsSucceeded, avgMetrics, bestModel, null); var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer); return(suggestedPipelineRunDetail, runDetail); }
private void WriteIterationLog(SuggestedPipeline pipeline, SuggestedPipelineRunDetail runResult, Stopwatch stopwatch) { _logger.Trace($"{_history.Count}\t{runResult.Score}\t{stopwatch.Elapsed}\t{pipeline.ToString()}"); }