public static CombinedOutput CombineMetrics(IHostEnvironment env, CombineMetricsInput input) { var eval = GetEvaluator(env, input.Kind); var perInst = EvaluateUtils.ConcatenatePerInstanceDataViews(env, eval, true, true, input.PerInstanceMetrics.Select( idv => RoleMappedData.CreateOpt(idv, new[] { RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Label, input.LabelColumn), RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Weight, input.WeightColumn.Value), RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Group, input.GroupColumn.Value) })).ToArray(), out var variableSizeVectorColumnNames); var warnings = input.Warnings != null ? new List <IDataView>(input.Warnings) : new List <IDataView>(); if (variableSizeVectorColumnNames.Length > 0) { var dvBldr = new ArrayDataViewBuilder(env); var warn = $"Detected columns of variable length: {string.Join(", ", variableSizeVectorColumnNames)}." + $" Consider setting collateMetrics- for meaningful per-Folds results."; dvBldr.AddColumn(MetricKinds.ColumnNames.WarningText, TextType.Instance, new DvText(warn)); warnings.Add(dvBldr.GetDataView()); } env.Assert(Utils.Size(perInst) == 1); var overall = eval.GetOverallResults(input.OverallMetrics); overall = EvaluateUtils.CombineFoldMetricsDataViews(env, overall, input.OverallMetrics.Length); IDataView conf = null; if (Utils.Size(input.ConfusionMatrix) > 0) { EvaluateUtils.ReconcileSlotNames <double>(env, input.ConfusionMatrix, MetricKinds.ColumnNames.Count, NumberType.R8); for (int i = 0; i < input.ConfusionMatrix.Length; i++) { var idv = input.ConfusionMatrix[i]; // Find the old Count column and drop it. for (int col = 0; col < idv.Schema.ColumnCount; col++) { if (idv.Schema.IsHidden(col) && idv.Schema.GetColumnName(col).Equals(MetricKinds.ColumnNames.Count)) { input.ConfusionMatrix[i] = new ChooseColumnsByIndexTransform(env, new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Index = new[] { col } }, idv); break; } } } conf = EvaluateUtils.ConcatenateOverallMetrics(env, input.ConfusionMatrix); } var warningsIdv = warnings.Count > 0 ? AppendRowsDataView.Create(env, warnings[0].Schema, warnings.ToArray()) : null; return(new CombinedOutput() { PerInstanceMetrics = perInst[0], OverallMetrics = overall, ConfusionMatrix = conf, Warnings = warningsIdv }); }
private void RunCore(IChannel ch, string cmd) { Host.AssertValue(ch); IPredictor inputPredictor = null; if (Args.ContinueTrain && !TrainUtils.TryLoadPredictor(ch, Host, Args.InputModelFile, out inputPredictor)) { ch.Warning("No input model file specified or model file did not contain a predictor. The model state cannot be initialized."); } ch.Trace("Constructing data pipeline"); IDataLoader loader = CreateRawLoader(); // If the per-instance results are requested and there is no name column, add a GenerateNumberTransform. var preXf = Args.PreTransform; if (!string.IsNullOrEmpty(Args.OutputDataFile)) { string name = TrainUtils.MatchNameOrDefaultOrNull(ch, loader.Schema, nameof(Args.NameColumn), Args.NameColumn, DefaultColumnNames.Name); if (name == null) { var args = new GenerateNumberTransform.Arguments(); args.Column = new[] { new GenerateNumberTransform.Column() { Name = DefaultColumnNames.Name }, }; args.UseCounter = true; var options = CmdParser.GetSettings(ch, args, new GenerateNumberTransform.Arguments()); preXf = preXf.Concat( new[] { new KeyValuePair <string, SubComponent <IDataTransform, SignatureDataTransform> >( "", new SubComponent <IDataTransform, SignatureDataTransform>( GenerateNumberTransform.LoadName, options)) }).ToArray(); } } loader = CompositeDataLoader.Create(Host, loader, preXf); ch.Trace("Binding label and features columns"); IDataView pipe = loader; var stratificationColumn = GetSplitColumn(ch, loader, ref pipe); var scorer = Args.Scorer; var evaluator = Args.Evaluator; Func <IDataView> validDataCreator = null; if (Args.ValidationFile != null) { validDataCreator = () => { // Fork the command. var impl = new CrossValidationCommand(this); return(impl.CreateRawLoader(dataFile: Args.ValidationFile)); }; } FoldHelper fold = new FoldHelper(Host, RegistrationName, pipe, stratificationColumn, Args, CreateRoleMappedData, ApplyAllTransformsToData, scorer, evaluator, validDataCreator, ApplyAllTransformsToData, inputPredictor, cmd, loader, !string.IsNullOrEmpty(Args.OutputDataFile)); var tasks = fold.GetCrossValidationTasks(); if (!evaluator.IsGood()) { evaluator = EvaluateUtils.GetEvaluatorType(ch, tasks[0].Result.ScoreSchema); } var eval = evaluator.CreateInstance(Host); // Print confusion matrix and fold results for each fold. for (int i = 0; i < tasks.Length; i++) { var dict = tasks[i].Result.Metrics; MetricWriter.PrintWarnings(ch, dict); eval.PrintFoldResults(ch, dict); } // Print the overall results. if (!TryGetOverallMetrics(tasks.Select(t => t.Result.Metrics).ToArray(), out var overallList)) { throw ch.Except("No overall metrics found"); } var overall = eval.GetOverallResults(overallList.ToArray()); MetricWriter.PrintOverallMetrics(Host, ch, Args.SummaryFilename, overall, Args.NumFolds); eval.PrintAdditionalMetrics(ch, tasks.Select(t => t.Result.Metrics).ToArray()); Dictionary <string, IDataView>[] metricValues = tasks.Select(t => t.Result.Metrics).ToArray(); SendTelemetryMetric(metricValues); // Save the per-instance results. if (!string.IsNullOrWhiteSpace(Args.OutputDataFile)) { var perInstance = EvaluateUtils.ConcatenatePerInstanceDataViews(Host, eval, Args.CollateMetrics, Args.OutputExampleFoldIndex, tasks.Select(t => t.Result.PerInstanceResults).ToArray(), out var variableSizeVectorColumnNames); if (variableSizeVectorColumnNames.Length > 0) { ch.Warning("Detected columns of variable length: {0}. Consider setting collateMetrics- for meaningful per-Folds results.", string.Join(", ", variableSizeVectorColumnNames)); } if (Args.CollateMetrics) { ch.Assert(perInstance.Length == 1); MetricWriter.SavePerInstance(Host, ch, Args.OutputDataFile, perInstance[0]); } else { int i = 0; foreach (var idv in perInstance) { MetricWriter.SavePerInstance(Host, ch, ConstructPerFoldName(Args.OutputDataFile, i), idv); i++; } } } }