/// <summary> /// Checks the given JSON object key-value pair is a valid EntryPoint output. /// Extracts out any variables that need to be populated and adds them to the /// EntryPoint context. /// </summary> private void CheckAndMarkOutputValue(KeyValuePair <string, JToken> pair) { if (!VariableBinding.IsBindingToken(pair.Value)) { throw _host.Except("Only variables allowed as outputs"); } // Output variable. var varBinding = VariableBinding.Create(_host, pair.Value.Value <string>()); if (!(varBinding is SimpleVariableBinding)) { throw _host.Except($"Output '{pair.Key}' can only be bound to a variable"); } var valueType = _outputHelper.GetFieldType(pair.Key); if (valueType == null) { throw _host.Except($"Unexpected output name: '{pair.Key}"); } if (!EntryPointVariable.IsValidType(valueType)) { throw _host.Except($"Output '{pair.Key}' has invalid type"); } _context.AddOutputVariable(varBinding.VariableName, valueType); _outputMap[pair.Key] = varBinding.VariableName; }
public void AddInputVariable(VariableBinding binding, Type type) { _ectx.AssertValue(binding); _ectx.AssertValue(type); if (binding is ArrayIndexVariableBinding) { type = Utils.MarshalInvoke(MakeArray <int>, type); } else if (binding is DictionaryKeyVariableBinding) { type = Utils.MarshalInvoke(MakeDictionary <int>, type); } EntryPointVariable v; if (!_vars.TryGetValue(binding.VariableName, out v)) { v = new EntryPointVariable(_ectx, binding.VariableName, type); _vars[binding.VariableName] = v; } else if (v.Type != type) { throw _ectx.Except($"Variable '{v.Name}' is used as {v.Type} and as {type}"); } v.MarkUsage(true); }
public object GetValueOrNull(VariableBinding binding) { _ectx.AssertValue(binding); EntryPointVariable v; if (!TryGetVariable(binding.VariableName, out v)) { return(null); } return(binding.GetVariableValueOrNull(v)); }
public void RenameInputVariable(string oldName, VariableBinding newBinding) { var toModify = new List <ParameterBinding>(); foreach (var kvp in _inputMap) { if (kvp.Value.VariableName == oldName) { toModify.Add(kvp.Key); } } foreach (var parameterBinding in toModify) { _inputMap[parameterBinding] = newBinding; } }
public static CommonOutputs.MacroOutput <Output> TrainTest( IHostEnvironment env, Arguments input, EntryPointNode node) { // Create default pipeline ID if one not given. input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N"); // Parse the subgraph. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, node.Catalog); // Change the subgraph to use the training data as input. var varName = input.Inputs.Data.VarName; VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } var trainingVar = node.GetInputVariable(nameof(input.TrainingData)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar); } subGraphRunContext.RemoveVariable(dataVariable); // Change the subgraph to use the model variable as output. varName = input.Outputs.Model.VarName; if (!subGraphRunContext.TryGetVariable(varName, out dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } string outputVarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameOutputVariable(dataVariable.Name, outputVarName); } subGraphRunContext.RemoveVariable(dataVariable); // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Change all the subgraph nodes to use the main context. foreach (var subGraphNode in subGraphNodes) { subGraphNode.SetContext(node.Context); } // Testing using test data set var testingVar = node.GetInputVariable(nameof(input.TestingData)); var exp = new Experiment(env); //combine the predictor model with any potential transfrom model passed from the outer graph if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = { VarName = outputVarName } }; var modelCombineOutput = exp.Add(modelCombine); outputVarName = modelCombineOutput.PredictorModel.VarName; } // Add the scoring node for testing. var scoreNode = new ML.Transforms.DatasetScorer { Data = { VarName = testingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; var scoreNodeOutput = exp.Add(scoreNode); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Do not double-add previous nodes. exp.Reset(); // REVIEW: we need to extract the proper label column name here to pass to the evaluators. // This is where you would add code to do it. var settings = new MacroUtils.EvaluatorSettings { LabelColumn = DefaultColumnNames.Label }; string outVariableName; if (input.IncludeTrainingMetrics) { // Add the scoring node for training. var scoreNodeTraining = new ML.Transforms.DatasetScorer { Data = { VarName = trainingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; var scoreNodeTrainingOutput = exp.Add(scoreNodeTraining); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Do not double-add previous nodes. exp.Reset(); // Add the evaluator node for training. var evalInputOutputTraining = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNodeTraining = evalInputOutputTraining.Item1; var evalOutputTraining = evalInputOutputTraining.Item2; evalNodeTraining.Data.VarName = scoreNodeTrainingOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out outVariableName)) { evalOutputTraining.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outVariableName)) { evalOutputTraining.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outVariableName)) { evalOutputTraining.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outVariableName) && evalOutputTraining is CommonOutputs.IClassificationEvaluatorOutput eoTraining) { eoTraining.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNodeTraining, evalOutputTraining); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); } // Do not double-add previous nodes. exp.Reset(); // Add the evaluator node for testing. var evalInputOutput = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNode = evalInputOutput.Item1; var evalOutput = evalInputOutput.Item2; evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out outVariableName)) { evalOutput.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName)) { evalOutput.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName)) { evalOutput.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName) && evalOutput is CommonOutputs.IClassificationEvaluatorOutput eo) { eo.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNode, evalOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Marks as an atomic unit that can be run in // a distributed fashion. foreach (var subGraphNode in subGraphNodes) { subGraphNode.StageId = input.PipelineId; } return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public static CommonOutputs.MacroOutput <Output> CrossValidate( IHostEnvironment env, Arguments input, EntryPointNode node) { env.CheckValue(input, nameof(input)); // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } // Split the input data into folds. var exp = new Experiment(env); var cvSplit = new Models.CrossValidatorDatasetSplitter(); cvSplit.Data.VarName = node.GetInputVariable("Data").ToJson(); cvSplit.NumFolds = input.NumFolds; cvSplit.StratificationColumn = input.StratificationColumn; var cvSplitOutput = exp.Add(cvSplit); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); var predModelVars = new Var <IPredictorModel> [input.NumFolds]; var transformModelVars = new Var <ITransformModel> [input.NumFolds]; var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()), TransformModel = null, LabelColumn = input.LabelColumn, GroupColumn = input.GroupColumn, WeightColumn = input.WeightColumn }; if (transformModelVarName != null) { args.TransformModel = new Var <ITransformModel> { VarName = transformModelVarName.VariableName } } ; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName)) { args.Outputs.PredictorModel = new Var <IPredictorModel> { VarName = mapping[input.Outputs.PredictorModel.VarName] }; } else { args.Outputs.PredictorModel = null; } if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName)) { args.Outputs.TransformModel = new Var <ITransformModel> { VarName = mapping[input.Outputs.TransformModel.VarName] }; } else { args.Outputs.TransformModel = null; } // Set train/test trainer kind to match. args.Kind = input.Kind; // Set the input bindings for the TrainTest entry point. var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary <string, string>(); var transformModelVar = new Var <ITransformModel>(); var predModelVar = new Var <IPredictorModel>(); if (input.Outputs.PredictorModel == null) { outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName); transformModelVars[k] = transformModelVar; ML.Transforms.ModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.ModelCombiner { Models = new ArrayVar <ITransformModel>( new Var <ITransformModel>[] { new Var <ITransformModel> { VarName = transformModelVarName.VariableName }, transformModelVar } ) }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); transformModelVars[k] = modelCombineOutput.OutputModel; } } else { outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = predModelVar }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); predModelVars[k] = modelCombineOutput.PredictorModel; } } var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); // Convert predictors from all folds into an array of predictors. if (input.Outputs.PredictorModel == null) { var outModels = new ML.Data.TransformModelArrayConverter { TransformModel = new ArrayVar <ITransformModel>(transformModelVars) }; var outModelsOutput = new ML.Data.TransformModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel)); exp.Add(outModels, outModelsOutput); } else { var outModels = new ML.Data.PredictorModelArrayConverter { Model = new ArrayVar <IPredictorModel>(predModelVars) }; var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); exp.Add(outModels, outModelsOutput); } // Convert warnings data views from all folds into an array of data views. var warnings = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(warningsVars) }; var warningsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(warnings, warningsOutput); // Convert overall metrics data views from all folds into an array of data views. var overallMetrics = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(overallMetricsVars) }; var overallMetricsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(overallMetrics, overallMetricsOutput); // Convert per instance data views from all folds into an array of data views. var instanceMetrics = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(instanceMetricsVars) }; var instanceMetricsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(instanceMetrics, instanceMetricsOutput); ML.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null; if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer || input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer) { // Convert confusion matrix data views from all folds into an array of data views. var confusionMatrices = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(confusionMatrixVars) }; confusionMatricesOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(confusionMatrices, confusionMatricesOutput); } var combineArgs = new CombineMetricsInput(); combineArgs.Kind = input.Kind; combineArgs.LabelColumn = input.LabelColumn; combineArgs.WeightColumn = input.WeightColumn; combineArgs.GroupColumn = input.GroupColumn; // Set the input bindings for the CombineMetrics entry point. var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var combineInputMap = new Dictionary <ParameterBinding, VariableBinding>(); var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics)); combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> { overallArray }); combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName)); var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics)); combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> { combinePerInstArray }); combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName)); if (confusionMatricesOutput != null) { var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix)); combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> { combineConfArray }); combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName)); } var combineOutputMap = new Dictionary <string, string>(); var combineWarningVar = new Var <IDataView>(); combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName); var combineOverallMetric = new Var <IDataView>(); combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName); var combineInstanceMetric = new Var <IDataView>(); combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName); if (confusionMatricesOutput != null) { var combineConfusionMatrix = new Var <IDataView>(); combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName); } subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap)); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public static CommonOutputs.MacroOutput <Output> CrossValidate( IHostEnvironment env, Arguments input, EntryPointNode node) { env.CheckValue(input, nameof(input)); // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } // Split the input data into folds. var splitArgs = new CVSplit.Input(); splitArgs.NumFolds = input.NumFolds; splitArgs.StratificationColumn = input.StratificationColumn; var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var inputData = node.GetInputVariable(nameof(splitArgs.Data)); ParameterBinding paramBinding = new SimpleParameterBinding(nameof(splitArgs.Data)); inputBindingMap.Add(nameof(splitArgs.Data), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputData); var outputMap = new Dictionary <string, string>(); var splitOutputTrainData = new ArrayVar <IDataView>(); var splitOutputTestData = new ArrayVar <IDataView>(); outputMap.Add(nameof(CVSplit.Output.TrainData), splitOutputTrainData.VarName); outputMap.Add(nameof(CVSplit.Output.TestData), splitOutputTestData.VarName); var splitNode = EntryPointNode.Create(env, "Models.CrossValidatorDatasetSplitter", splitArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(splitNode); var predModelVars = new Var <PredictorModel> [input.NumFolds]; var inputTransformModelVars = new Var <PredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()), TransformModel = null, LabelColumn = input.LabelColumn, GroupColumn = input.GroupColumn, WeightColumn = input.WeightColumn, NameColumn = input.NameColumn }; if (transformModelVarName != null) { args.TransformModel = new Var <TransformModel> { VarName = transformModelVarName.VariableName } } ; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; args.Outputs.PredictorModel = new Var <PredictorModel> { VarName = mapping[input.Outputs.PredictorModel.VarName] }; // Set train/test trainer kind to match. args.Kind = input.Kind; // Set the input bindings for the TrainTest entry point. inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(splitOutputTrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(splitOutputTestData.VarName, k)); outputMap = new Dictionary <string, string>(); var transformModelVar = new Var <TransformModel>(); var predModelVar = new Var <PredictorModel>(); outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var combineModelsArgs = new ModelOperations.SimplePredictorModelInput(); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var inputTransformModel = new SimpleVariableBinding(transformModelVarName.VariableName); var inputPredictorModel = new SimpleVariableBinding(predModelVar.VarName); paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.TransformModel)); inputBindingMap.Add(nameof(combineModelsArgs.TransformModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputTransformModel); paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.PredictorModel)); inputBindingMap.Add(nameof(combineModelsArgs.PredictorModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputPredictorModel); outputMap = new Dictionary <string, string>(); var combineNodeOutputPredictorModel = new Var <PredictorModel>(); predModelVars[k] = combineNodeOutputPredictorModel; outputMap.Add(nameof(ModelOperations.PredictorModelOutput.PredictorModel), combineNodeOutputPredictorModel.VarName); EntryPointNode combineNode = EntryPointNode.Create(env, "Transforms.TwoHeterogeneousModelCombiner", combineModelsArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(combineNode); } var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Context, inputBindingMap, inputMap, outputMap)); } // Convert the predictor models to an array of predictor models. MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, subGraphNodes, predModelVars, node.GetOutputVariableName(nameof(Output.PredictorModel))); // Convert the warnings, overall, per instance and confusion matrix data views into an array. var warningsArrayVar = new ArrayVar <IDataView>(); var overallArrayVar = new ArrayVar <IDataView>(); var instanceArrayVar = new ArrayVar <IDataView>(); ArrayVar <IDataView> confusionMatrixArrayVar = null; MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, warningsVars, warningsArrayVar.VarName); MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, overallMetricsVars, overallArrayVar.VarName); MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, instanceMetricsVars, instanceArrayVar.VarName); if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer || input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer) { confusionMatrixArrayVar = new ArrayVar <IDataView>(); MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, confusionMatrixVars, confusionMatrixArrayVar.VarName); } var combineArgs = new CombineMetricsInput(); combineArgs.Kind = input.Kind; combineArgs.LabelColumn = input.LabelColumn; combineArgs.WeightColumn = input.WeightColumn; combineArgs.GroupColumn = input.GroupColumn; combineArgs.NameColumn = input.NameColumn; // Set the input bindings for the CombineMetrics entry point. var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var combineInputMap = new Dictionary <ParameterBinding, VariableBinding>(); var warningsArray = new SimpleParameterBinding(nameof(combineArgs.Warnings)); combineInputBindingMap.Add(nameof(combineArgs.Warnings), new List <ParameterBinding> { warningsArray }); combineInputMap.Add(warningsArray, new SimpleVariableBinding(warningsArrayVar.VarName)); var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics)); combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> { overallArray }); combineInputMap.Add(overallArray, new SimpleVariableBinding(overallArrayVar.VarName)); var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics)); combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> { combinePerInstArray }); combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceArrayVar.VarName)); if (confusionMatrixArrayVar != null) { var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix)); combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> { combineConfArray }); combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatrixArrayVar.VarName)); } var combineOutputMap = new Dictionary <string, string>(); var combineWarningVar = new Var <IDataView>(); combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName); var combineOverallMetric = new Var <IDataView>(); combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName); var combineInstanceMetric = new Var <IDataView>(); combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName); if (confusionMatrixArrayVar != null) { var combineConfusionMatrix = new Var <IDataView>(); combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName); } var combineMetricsNode = EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap); subGraphNodes.Add(combineMetricsNode); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
/// <summary> /// Checks the given JSON object key-value pair is a valid EntryPoint input and /// extracts out any variables that need to be populated. These variables will be /// added to the EntryPoint context. Input parameters that are not set to variables /// will be immediately set using the input builder instance. /// </summary> private void CheckAndSetInputValue(KeyValuePair <string, JToken> pair) { var inputName = _inputBuilder.GetFieldNameOrNull(pair.Key); if (VariableBinding.IsBindingToken(pair.Value)) { Type valueType = _inputBuilder.GetFieldTypeOrNull(pair.Key); if (valueType == null) { throw _host.Except($"Unexpected input name: '{pair.Key}'"); } if (!EntryPointVariable.IsValidType(valueType)) { throw _host.Except($"Unexpected input variable type: {valueType}"); } var varBinding = VariableBinding.Create(_host, pair.Value.Value <string>()); _context.AddInputVariable(varBinding, valueType); if (!_inputBindingMap.ContainsKey(inputName)) { _inputBindingMap[inputName] = new List <ParameterBinding>(); } var paramBinding = new SimpleParameterBinding(inputName); _inputBindingMap[inputName].Add(paramBinding); _inputMap[paramBinding] = varBinding; } else if (pair.Value is JArray && ((JArray)pair.Value).Any(tok => VariableBinding.IsBindingToken(tok))) { // REVIEW: EntryPoint arrays and dictionaries containing // variables must ONLY contain variables right now. if (!((JArray)pair.Value).All(tok => VariableBinding.IsBindingToken(tok))) { throw _host.Except($"Input {pair.Key} may ONLY contain variables."); } Type valueType = _inputBuilder.GetFieldTypeOrNull(pair.Key); if (valueType == null || !valueType.HasElementType) { throw _host.Except($"Unexpected input name: '{pair.Key}'"); } valueType = valueType.GetElementType(); int i = 0; foreach (var varName in (JArray)pair.Value) { var varBinding = VariableBinding.Create(_host, varName.Value <string>()); _context.AddInputVariable(varBinding, valueType); if (!_inputBindingMap.ContainsKey(inputName)) { _inputBindingMap[inputName] = new List <ParameterBinding>(); } var paramBinding = new ArrayIndexParameterBinding(inputName, i++); _inputBindingMap[inputName].Add(paramBinding); _inputMap[paramBinding] = varBinding; } } // REVIEW: Implement support for Dictionary of variable values. We need to differentiate // between a Dictionary and a Component here, and likely need to support nested components // all of which might have variables. Our current machinery only works at the 'Node' level. else { // This is not a variable. if (!_inputBuilder.TrySetValueJson(pair.Key, pair.Value)) { throw _host.Except($"Unexpected input: '{pair.Key}'"); } } }
public static CommonOutputs.MacroOutput <Output> TrainTest( IHostEnvironment env, Arguments input, EntryPointNode node) { // Create default pipeline ID if one not given. input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N"); // Parse the subgraph. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, label: input.LabelColumn, input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null, input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null, input.NameColumn.IsExplicit ? input.NameColumn.Value : null); // Change the subgraph to use the training data as input. var varName = input.Inputs.Data.VarName; VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } var trainingVar = node.GetInputVariable(nameof(input.TrainingData)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar); } subGraphRunContext.RemoveVariable(dataVariable); // Change the subgraph to use the model variable as output. varName = input.Outputs.PredictorModel.VarName; if (!subGraphRunContext.TryGetVariable(varName, out dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } string predictorModelVarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameOutputVariable(dataVariable.Name, predictorModelVarName); } subGraphRunContext.RemoveVariable(dataVariable); // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Change all the subgraph nodes to use the main context. foreach (var subGraphNode in subGraphNodes) { subGraphNode.SetContext(node.Context); } // Testing using test data set var testingVar = node.GetInputVariable(nameof(input.TestingData)); //var exp = new Experiment(env); Dictionary <string, List <ParameterBinding> > inputBindingMap; Dictionary <ParameterBinding, VariableBinding> inputMap; ParameterBinding paramBinding; Dictionary <string, string> outputMap; //combine the predictor model with any potential transfrom model passed from the outer graph if (transformModelVarName != null && transformModelVarName.VariableName != null) { var combineArgs = new ModelOperations.SimplePredictorModelInput(); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var inputTransformModel = new SimpleVariableBinding(transformModelVarName.VariableName); var inputPredictorModel = new SimpleVariableBinding(predictorModelVarName); paramBinding = new SimpleParameterBinding(nameof(combineArgs.TransformModel)); inputBindingMap.Add(nameof(combineArgs.TransformModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputTransformModel); paramBinding = new SimpleParameterBinding(nameof(combineArgs.PredictorModel)); inputBindingMap.Add(nameof(combineArgs.PredictorModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputPredictorModel); outputMap = new Dictionary <string, string>(); var combineNodeOutputPredictorModel = new Var <PredictorModel>(); predictorModelVarName = combineNodeOutputPredictorModel.VarName; outputMap.Add(nameof(ModelOperations.PredictorModelOutput.PredictorModel), combineNodeOutputPredictorModel.VarName); EntryPointNode combineNode = EntryPointNode.Create(env, "Transforms.TwoHeterogeneousModelCombiner", combineArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(combineNode); } // Add the scoring node for testing. var args = new ScoreModel.Input(); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); paramBinding = new SimpleParameterBinding(nameof(args.Data)); inputBindingMap.Add(nameof(args.Data), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, testingVar); var scoreNodeInputPredictorModel = new SimpleVariableBinding(predictorModelVarName); paramBinding = new SimpleParameterBinding(nameof(args.PredictorModel)); inputBindingMap.Add(nameof(args.PredictorModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, scoreNodeInputPredictorModel); var scoreNodeOutputScoredData = new Var <IDataView>(); var scoreNodeOutputScoringTransform = new Var <TransformModel>(); outputMap = new Dictionary <string, string>(); outputMap.Add(nameof(ScoreModel.Output.ScoredData), scoreNodeOutputScoredData.VarName); outputMap.Add(nameof(ScoreModel.Output.ScoringTransform), scoreNodeOutputScoringTransform.VarName); EntryPointNode scoreNode = EntryPointNode.Create(env, "Transforms.DatasetScorer", args, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(scoreNode); var evalDataVarName = scoreNodeOutputScoredData.VarName; // REVIEW: add similar support for FeatureColumn. var settings = new MacroUtils.EvaluatorSettings { LabelColumn = input.LabelColumn, WeightColumn = input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null, GroupColumn = input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null, NameColumn = input.NameColumn.IsExplicit ? input.NameColumn.Value : null }; if (input.IncludeTrainingMetrics) { string evalTrainingDataVarName; args = new ScoreModel.Input(); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); paramBinding = new SimpleParameterBinding(nameof(args.Data)); inputBindingMap.Add(nameof(args.Data), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, trainingVar); scoreNodeInputPredictorModel = new SimpleVariableBinding(predictorModelVarName); paramBinding = new SimpleParameterBinding(nameof(args.PredictorModel)); inputBindingMap.Add(nameof(args.PredictorModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, scoreNodeInputPredictorModel); scoreNodeOutputScoredData = new Var <IDataView>(); scoreNodeOutputScoringTransform = new Var <TransformModel>(); outputMap = new Dictionary <string, string>(); outputMap.Add(nameof(ScoreModel.Output.ScoredData), scoreNodeOutputScoredData.VarName); outputMap.Add(nameof(ScoreModel.Output.ScoringTransform), scoreNodeOutputScoringTransform.VarName); scoreNode = EntryPointNode.Create(env, "Transforms.DatasetScorer", args, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(scoreNode); evalTrainingDataVarName = scoreNodeOutputScoredData.VarName; // Add the evaluator node for training. var evalTrainingArgs = MacroUtils.GetEvaluatorArgs(input.Kind, out var evalTrainingEntryPointName, settings); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var evalTrainingNodeInputData = new SimpleVariableBinding(evalTrainingDataVarName); paramBinding = new SimpleParameterBinding(nameof(evalTrainingArgs.Data)); inputBindingMap.Add(nameof(evalTrainingArgs.Data), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, evalTrainingNodeInputData); outputMap = new Dictionary <string, string>(); if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out var outTrainingVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.Warnings), outTrainingVariableName); } if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outTrainingVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.OverallMetrics), outTrainingVariableName); } if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outTrainingVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.PerInstanceMetrics), outTrainingVariableName); } if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outTrainingVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.ConfusionMatrix), outTrainingVariableName); } EntryPointNode evalTrainingNode = EntryPointNode.Create(env, evalTrainingEntryPointName, evalTrainingArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(evalTrainingNode); } // Add the evaluator node for testing. var evalArgs = MacroUtils.GetEvaluatorArgs(input.Kind, out var evalEntryPointName, settings); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var evalNodeInputData = new SimpleVariableBinding(evalDataVarName); paramBinding = new SimpleParameterBinding(nameof(evalArgs.Data)); inputBindingMap.Add(nameof(evalArgs.Data), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, evalNodeInputData); outputMap = new Dictionary <string, string>(); if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out var outVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.Warnings), outVariableName); } if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.OverallMetrics), outVariableName); } if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.PerInstanceMetrics), outVariableName); } if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName)) { outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.ConfusionMatrix), outVariableName); } EntryPointNode evalNode = EntryPointNode.Create(env, evalEntryPointName, evalArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(evalNode); // Marks as an atomic unit that can be run in // a distributed fashion. foreach (var subGraphNode in subGraphNodes) { subGraphNode.StageId = input.PipelineId; } return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }