public static CommonOutputs.MacroOutput <Output> CrossValidateBinary( IHostEnvironment env, Arguments input, EntryPointNode node) { // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); // Split the input data into folds. var exp = new Experiment(env); var cvSplit = new Legacy.Models.CrossValidatorDatasetSplitter(); cvSplit.Data.VarName = node.GetInputVariable("Data").ToJson(); cvSplit.NumFolds = input.NumFolds; cvSplit.StratificationColumn = input.StratificationColumn; var cvSplitOutput = exp.Add(cvSplit); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); var predModelVars = new Var <PredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestBinaryMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()) }; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; args.Outputs.Model = new Var <PredictorModel> { VarName = mapping[input.Outputs.Model.VarName] }; // Set the input bindings for the TrainTest entry point. var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary <string, string>(); var predModelVar = new Var <PredictorModel>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestBinaryMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; subGraphNodes.Add(EntryPointNode.Create(env, "Models.TrainTestBinaryEvaluator", args, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); var outModels = new Legacy.Data.PredictorModelArrayConverter { Model = new ArrayVar <PredictorModel>(predModelVars) }; var outModelsOutput = new Legacy.Data.PredictorModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); exp.Add(outModels, outModelsOutput); var warnings = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(warningsVars) }; var warningsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); warningsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); exp.Add(warnings, warningsOutput); var overallMetrics = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(overallMetricsVars) }; var overallMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); overallMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); exp.Add(overallMetrics, overallMetricsOutput); var instanceMetrics = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(instanceMetricsVars) }; var instanceMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); instanceMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); exp.Add(instanceMetrics, instanceMetricsOutput); var confusionMatrices = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(confusionMatrixVars) }; var confusionMatricesOutput = new Legacy.Data.IDataViewArrayConverter.Output(); confusionMatricesOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); exp.Add(confusionMatrices, confusionMatricesOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public static CommonOutputs.MacroOutput <Output> CrossValidate( IHostEnvironment env, Arguments input, EntryPointNode node) { env.CheckValue(input, nameof(input)); // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } // Split the input data into folds. var exp = new Experiment(env); var cvSplit = new Legacy.Models.CrossValidatorDatasetSplitter(); cvSplit.Data.VarName = node.GetInputVariable("Data").ToJson(); cvSplit.NumFolds = input.NumFolds; cvSplit.StratificationColumn = input.StratificationColumn; var cvSplitOutput = exp.Add(cvSplit); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); var predModelVars = new Var <IPredictorModel> [input.NumFolds]; var transformModelVars = new Var <ITransformModel> [input.NumFolds]; var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()), TransformModel = null, LabelColumn = input.LabelColumn, GroupColumn = input.GroupColumn, WeightColumn = input.WeightColumn, NameColumn = input.NameColumn }; if (transformModelVarName != null) { args.TransformModel = new Var <ITransformModel> { VarName = transformModelVarName.VariableName } } ; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName)) { args.Outputs.PredictorModel = new Var <IPredictorModel> { VarName = mapping[input.Outputs.PredictorModel.VarName] }; } else { args.Outputs.PredictorModel = null; } if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName)) { args.Outputs.TransformModel = new Var <ITransformModel> { VarName = mapping[input.Outputs.TransformModel.VarName] }; } else { args.Outputs.TransformModel = null; } // Set train/test trainer kind to match. args.Kind = input.Kind; // Set the input bindings for the TrainTest entry point. var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary <string, string>(); var transformModelVar = new Var <ITransformModel>(); var predModelVar = new Var <IPredictorModel>(); if (input.Outputs.PredictorModel == null) { outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName); transformModelVars[k] = transformModelVar; Legacy.Transforms.ModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new Legacy.Transforms.ModelCombiner { Models = new ArrayVar <ITransformModel>( new Var <ITransformModel>[] { new Var <ITransformModel> { VarName = transformModelVarName.VariableName }, transformModelVar } ) }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); transformModelVars[k] = modelCombineOutput.OutputModel; } } else { outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; Legacy.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new Legacy.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = predModelVar }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); predModelVars[k] = modelCombineOutput.PredictorModel; } } var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); // Convert predictors from all folds into an array of predictors. if (input.Outputs.PredictorModel == null) { var outModels = new Legacy.Data.TransformModelArrayConverter { TransformModel = new ArrayVar <ITransformModel>(transformModelVars) }; var outModelsOutput = new Legacy.Data.TransformModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel)); exp.Add(outModels, outModelsOutput); } else { var outModels = new Legacy.Data.PredictorModelArrayConverter { Model = new ArrayVar <IPredictorModel>(predModelVars) }; var outModelsOutput = new Legacy.Data.PredictorModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); exp.Add(outModels, outModelsOutput); } // Convert warnings data views from all folds into an array of data views. var warnings = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(warningsVars) }; var warningsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); exp.Add(warnings, warningsOutput); // Convert overall metrics data views from all folds into an array of data views. var overallMetrics = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(overallMetricsVars) }; var overallMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); exp.Add(overallMetrics, overallMetricsOutput); // Convert per instance data views from all folds into an array of data views. var instanceMetrics = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(instanceMetricsVars) }; var instanceMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output(); exp.Add(instanceMetrics, instanceMetricsOutput); Legacy.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null; if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer || input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer) { // Convert confusion matrix data views from all folds into an array of data views. var confusionMatrices = new Legacy.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(confusionMatrixVars) }; confusionMatricesOutput = new Legacy.Data.IDataViewArrayConverter.Output(); exp.Add(confusionMatrices, confusionMatricesOutput); } var combineArgs = new CombineMetricsInput(); combineArgs.Kind = input.Kind; combineArgs.LabelColumn = input.LabelColumn; combineArgs.WeightColumn = input.WeightColumn; combineArgs.GroupColumn = input.GroupColumn; combineArgs.NameColumn = input.NameColumn; // Set the input bindings for the CombineMetrics entry point. var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var combineInputMap = new Dictionary <ParameterBinding, VariableBinding>(); var warningsArray = new SimpleParameterBinding(nameof(combineArgs.Warnings)); combineInputBindingMap.Add(nameof(combineArgs.Warnings), new List <ParameterBinding> { warningsArray }); combineInputMap.Add(warningsArray, new SimpleVariableBinding(warningsOutput.OutputData.VarName)); var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics)); combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> { overallArray }); combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName)); var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics)); combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> { combinePerInstArray }); combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName)); if (confusionMatricesOutput != null) { var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix)); combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> { combineConfArray }); combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName)); } var combineOutputMap = new Dictionary <string, string>(); var combineWarningVar = new Var <IDataView>(); combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName); var combineOverallMetric = new Var <IDataView>(); combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName); var combineInstanceMetric = new Var <IDataView>(); combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName); if (confusionMatricesOutput != null) { var combineConfusionMatrix = new Var <IDataView>(); combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName); } subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap)); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }