Пример #1
0
        public static Pipeline GetNextPipeline(MLContext context,
                                               IEnumerable <PipelineScore> history,
                                               DatasetColumnInfo[] columns,
                                               TaskKind task,
                                               bool isMaximizingMetric = true)
        {
            var inferredHistory      = history.Select(r => SuggestedPipelineRunDetail.FromPipelineRunResult(context, r));
            var nextInferredPipeline = GetNextInferredPipeline(context, inferredHistory, columns, task, isMaximizingMetric, CacheBeforeTrainer.Auto);

            return(nextInferredPipeline?.ToPipeline());
        }
Пример #2
0
        Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum)
        {
            var modelFileInfo = GetModelFileInfo(modelDirectory, iterationNum);
            var trainResult   = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainData, _validData, _groupIdColumn,
                                                                 _labelColumn, _metricsAgent, _preprocessorTransform, modelFileInfo, _modelInputSchema, _logger);
            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline,
                                                                                       trainResult.score,
                                                                                       trainResult.exception == null,
                                                                                       trainResult.metrics,
                                                                                       trainResult.model,
                                                                                       trainResult.exception);
            var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);

            return(suggestedPipelineRunDetail, runDetail);
        }
        Run(SuggestedPipeline pipeline, DirectoryInfo modelDirectory, int iterationNum)
        {
            var trainResults = new List <(ModelContainer model, TMetrics metrics, Exception exception, double score)>();

            for (var i = 0; i < _trainDatasets.Length; i++)
            {
                var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1);
                var trainResult   = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i],
                                                                     _labelColumn, _metricsAgent, _preprocessorTransforms?.ElementAt(i), modelFileInfo, _modelInputSchema,
                                                                     _logger);
                trainResults.Add(trainResult);
            }

            var allRunsSucceeded = trainResults.All(r => r.exception == null);

            if (!allRunsSucceeded)
            {
                var firstException = trainResults.First(r => r.exception != null).exception;
                var errorRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, double.NaN, false, null, null, firstException);
                return(errorRunDetail, errorRunDetail.ToIterationResult(_preFeaturizer));
            }

            // Get the model from the best fold
            var bestFoldIndex = BestResultUtil.GetIndexOfBestScore(trainResults.Select(r => r.score), _optimizingMetricInfo.IsMaximizing);

            // bestFoldIndex will be -1 if the optimization metric for all folds is NaN.
            // In this case, return model from the first fold.
            bestFoldIndex = bestFoldIndex != -1 ? bestFoldIndex : 0;
            var bestModel = trainResults.ElementAt(bestFoldIndex).model;

            // Get the average metrics across all folds
            var avgScore            = GetAverageOfNonNaNScores(trainResults.Select(x => x.score));
            var indexClosestToAvg   = GetIndexClosestToAverage(trainResults.Select(r => r.score), avgScore);
            var metricsClosestToAvg = trainResults[indexClosestToAvg].metrics;
            var avgMetrics          = GetAverageMetrics(trainResults.Select(x => x.metrics), metricsClosestToAvg);

            // Build result objects
            var suggestedPipelineRunDetail = new SuggestedPipelineRunDetail <TMetrics>(pipeline, avgScore, allRunsSucceeded, avgMetrics, bestModel, null);
            var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);

            return(suggestedPipelineRunDetail, runDetail);
        }
Пример #4
0
 private void WriteIterationLog(SuggestedPipeline pipeline, SuggestedPipelineRunDetail runResult, Stopwatch stopwatch)
 {
     _logger.Trace($"{_history.Count}\t{runResult.Score}\t{stopwatch.Elapsed}\t{pipeline.ToString()}");
 }