示例#1
0
        Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum)
        {
            var modelFileInfo = GetModelFileInfo(modelDirectory, iterationNum);
            var trainResult   = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainData, _validData, _groupIdColumn,
                                                                 _labelColumn, _metricsAgent, _preprocessorTransform, modelFileInfo, _modelInputSchema, _logger);
            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline,
                                                                                       trainResult.score,
                                                                                       trainResult.exception == null,
                                                                                       trainResult.metrics,
                                                                                       trainResult.model,
                                                                                       trainResult.exception);
            var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);

            return(suggestedPipelineRunDetail, runDetail);
        }
        Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum)
        {
            var trainResults = new List <(ModelContainer model, TMetrics metrics, Exception exception, double score)>();

            for (var i = 0; i < _trainDatasets.Length; i++)
            {
                var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1);
                var trainResult   = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i],
                                                                     _labelColumn, _metricsAgent, _preprocessorTransforms?.ElementAt(i), modelFileInfo, _modelInputSchema,
                                                                     _logger);
                trainResults.Add(trainResult);
            }

            var allRunsSucceeded = trainResults.All(r => r.exception == null);

            if (!allRunsSucceeded)
            {
                var firstException = trainResults.First(r => r.exception != null).exception;
                var errorRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, double.NaN, false, null, null, firstException);
                return(errorRunDetail, errorRunDetail.ToIterationResult(_preFeaturizer));
            }

            // Get the model from the best fold
            var bestFoldIndex = BestResultUtil.GetIndexOfBestScore(trainResults.Select(r => r.score), _optimizingMetricInfo.IsMaximizing);

            // bestFoldIndex will be -1 if the optimization metric for all folds is NaN.
            // In this case, return model from the first fold.
            bestFoldIndex = bestFoldIndex != -1 ? bestFoldIndex : 0;
            var bestModel = trainResults.ElementAt(bestFoldIndex).model;

            // Get the average metrics across all folds
            var avgScore            = GetAverageOfNonNaNScores(trainResults.Select(x => x.score));
            var indexClosestToAvg   = GetIndexClosestToAverage(trainResults.Select(r => r.score), avgScore);
            var metricsClosestToAvg = trainResults[indexClosestToAvg].metrics;
            var avgMetrics          = GetAverageMetrics(trainResults.Select(x => x.metrics), metricsClosestToAvg);

            // Build result objects
            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, avgScore, allRunsSucceeded, avgMetrics, bestModel, null);
            var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);

            return(suggestedPipelineRunDetail, runDetail);
        }
示例#3
0
        Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum)
        {
            var trainResults = new List <SuggestedPipelineTrainResult <TMetrics> >();

            for (var i = 0; i < _trainDatasets.Length; i++)
            {
                var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1);
                var trainResult   = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i],
                                                                     _labelColumn, _metricsAgent, _preprocessorTransforms?[i], modelFileInfo, _modelInputSchema, _logger);
                trainResults.Add(new SuggestedPipelineTrainResult <TMetrics>(trainResult.model, trainResult.metrics, trainResult.exception, trainResult.score));
            }

            var avgScore         = CalcAverageScore(trainResults.Select(r => r.Score));
            var allRunsSucceeded = trainResults.All(r => r.Exception == null);

            var suggestedPipelineRunDetail = new SuggestedPipelineCrossValRunDetail <TMetrics>(pipeline, avgScore, allRunsSucceeded, trainResults);
            var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);

            return(suggestedPipelineRunDetail, runDetail);
        }