Exemplo n.º 1
0
        internal static void ConvertIPredictorModelsToArray(IHostEnvironment env, RunContext context, List <EntryPointNode> subGraphNodes,
                                                            Var <PredictorModel>[] predModelVars, string outputVarName)
        {
            var predictorArrayConverterArgs = new ArrayIPredictorModelInput();
            var inputBindingMap             = new Dictionary <string, List <ParameterBinding> >();
            var inputMap = new Dictionary <ParameterBinding, VariableBinding>();

            var argName = nameof(predictorArrayConverterArgs.Model);

            inputBindingMap.Add(argName, new List <ParameterBinding>());
            for (int i = 0; i < predModelVars.Length; i++)
            {
                var paramBinding = new ArrayIndexParameterBinding(argName, i);
                inputBindingMap[argName].Add(paramBinding);
                inputMap[paramBinding] = new SimpleVariableBinding(predModelVars[i].VarName);
            }
            var outputMap = new Dictionary <string, string>();
            var output    = new ArrayVar <PredictorModel>();

            outputMap.Add(nameof(MacroUtils.ArrayIPredictorModelOutput.OutputModel), outputVarName);
            var arrayConvertNode = EntryPointNode.Create(env, "Data.PredictorModelArrayConverter", predictorArrayConverterArgs,
                                                         context, inputBindingMap, inputMap, outputMap);

            subGraphNodes.Add(arrayConvertNode);
        }
Exemplo n.º 2
0
        public static EntryPointNode Create(
            IHostEnvironment env,
            string entryPointName,
            object arguments,
            ModuleCatalog catalog,
            RunContext context,
            Dictionary <string, List <ParameterBinding> > inputBindingMap,
            Dictionary <ParameterBinding, VariableBinding> inputMap,
            Dictionary <string, string> outputMap,
            bool checkpoint = false,
            string stageId  = "",
            float cost      = float.NaN)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonEmpty(entryPointName, nameof(entryPointName));
            env.CheckValue(arguments, nameof(arguments));
            env.CheckValue(catalog, nameof(catalog));
            env.CheckValue(context, nameof(context));
            env.CheckValue(inputBindingMap, nameof(inputBindingMap));
            env.CheckValue(inputMap, nameof(inputMap));
            env.CheckValue(outputMap, nameof(outputMap));
            ModuleCatalog.EntryPointInfo info;
            bool success = catalog.TryFindEntryPoint(entryPointName, out info);

            env.Assert(success);

            var inputBuilder = new InputBuilder(env, info.InputType, catalog);
            var outputHelper = new OutputHelper(env, info.OutputType);

            var entryPointNode = new EntryPointNode(env, catalog, context, context.GenerateId(entryPointName), entryPointName,
                                                    inputBuilder.GetJsonObject(arguments, inputBindingMap, inputMap),
                                                    outputHelper.GetJsonObject(outputMap), checkpoint, stageId, cost);

            return(entryPointNode);
        }
Exemplo n.º 3
0
        public EntryPointGraph(IHostEnvironment env, ModuleCatalog moduleCatalog, JArray nodes)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(RegistrationName);
            _host.CheckValue(moduleCatalog, nameof(moduleCatalog));
            _host.CheckValue(nodes, nameof(nodes));

            _context = new RunContext(_host);
            _nodes   = EntryPointNode.ValidateNodes(_host, _context, nodes, moduleCatalog);
        }
Exemplo n.º 4
0
        public void RunNode(EntryPointNode node)
        {
            _host.CheckValue(node, nameof(node));
            _host.Assert(_nodes.Contains(node));

            node.Run();
            if (node.IsMacro)
            {
                _nodes.AddRange(node.MacroNodes);
            }
        }
Exemplo n.º 5
0
        internal static void ConvertIdataViewsToArray(IHostEnvironment env, RunContext context, List <EntryPointNode> subGraphNodes,
                                                      Var <IDataView>[] vars, string outputVarName)
        {
            var dataviewArrayConverterArgs = new ArrayIDataViewInput();
            var inputBindingMap            = new Dictionary <string, List <ParameterBinding> >();
            var inputMap = new Dictionary <ParameterBinding, VariableBinding>();

            var argName = nameof(dataviewArrayConverterArgs.Data);

            inputBindingMap.Add(argName, new List <ParameterBinding>());
            for (int i = 0; i < vars.Length; i++)
            {
                var paramBinding = new ArrayIndexParameterBinding(argName, i);
                inputBindingMap[argName].Add(paramBinding);
                inputMap[paramBinding] = new SimpleVariableBinding(vars[i].VarName);
            }
            var outputMap = new Dictionary <string, string>();

            outputMap.Add(nameof(ArrayIDataViewOutput.OutputData), outputVarName);
            var arrayConvertNode = EntryPointNode.Create(env, "Data.IDataViewArrayConverter", dataviewArrayConverterArgs,
                                                         context, inputBindingMap, inputMap, outputMap);

            subGraphNodes.Add(arrayConvertNode);
        }
Exemplo n.º 6
0
        public static CommonOutputs.MacroOutput <Output> CrossValidateBinary(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            // Split the input data into folds.
            var exp     = new Experiment(env);
            var cvSplit = new Legacy.Models.CrossValidatorDatasetSplitter();

            cvSplit.Data.VarName         = node.GetInputVariable("Data").ToJson();
            cvSplit.NumFolds             = input.NumFolds;
            cvSplit.StratificationColumn = input.StratificationColumn;
            var cvSplitOutput = exp.Add(cvSplit);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            var predModelVars       = new Var <PredictorModel> [input.NumFolds];
            var warningsVars        = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars  = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestBinaryMacro.Arguments
                {
                    Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray())
                };
                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };
                args.Outputs.Model = new Var <PredictorModel>
                {
                    VarName = mapping[input.Outputs.Model.VarName]
                };

                // Set the input bindings for the TrainTest entry point.
                var inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                var inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData    = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k));
                var outputMap    = new Dictionary <string, string>();
                var predModelVar = new Var <PredictorModel>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.PredictorModel), predModelVar.VarName);
                predModelVars[k] = predModelVar;
                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                subGraphNodes.Add(EntryPointNode.Create(env, "Models.TrainTestBinaryEvaluator", args, node.Context, inputBindingMap, inputMap, outputMap));
            }

            exp.Reset();

            var outModels = new Legacy.Data.PredictorModelArrayConverter
            {
                Model = new ArrayVar <PredictorModel>(predModelVars)
            };
            var outModelsOutput = new Legacy.Data.PredictorModelArrayConverter.Output();

            outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel));
            exp.Add(outModels, outModelsOutput);

            var warnings = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(warningsVars)
            };
            var warningsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            warningsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            exp.Add(warnings, warningsOutput);

            var overallMetrics = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(overallMetricsVars)
            };
            var overallMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            overallMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            exp.Add(overallMetrics, overallMetricsOutput);

            var instanceMetrics = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(instanceMetricsVars)
            };
            var instanceMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            instanceMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            exp.Add(instanceMetrics, instanceMetricsOutput);

            var confusionMatrices = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(confusionMatrixVars)
            };
            var confusionMatricesOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            confusionMatricesOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
            exp.Add(confusionMatrices, confusionMatricesOutput);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
Exemplo n.º 7
0
        public static CommonOutputs.MacroOutput <Output> PipelineSweep(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.Check(input.StateArguments != null || input.State is AutoInference.AutoMlMlState,
                      "Must have a valid AutoML State, or pass arguments to create one.");
            env.Check(input.BatchSize > 0, "Batch size must be > 0.");

            // If no current state, create object and set data.
            if (input.State == null)
            {
                input.State = input.StateArguments?.CreateComponent(env);

                if (input.State is AutoInference.AutoMlMlState inState)
                {
                    inState.SetTrainTestData(input.TrainingData, input.TestingData);
                }
                else
                {
                    throw env.Except($"Incompatible type. Expecting type {typeof(AutoInference.AutoMlMlState)}, received type {input.State?.GetType()}.");
                }

                var result = node.AddNewVariable("State", input.State);
                node.Context.AddInputVariable(result.Item2, typeof(IMlState));
            }
            var autoMlState = (AutoInference.AutoMlMlState)input.State;

            // The indicators are just so the macro knows those pipelines need to
            // be run before performing next expansion. If we add them as inputs
            // to the next iteration, the next iteration cannot run until they have
            // their values set. Thus, indicators are needed.
            var pipelineIndicators = new List <Var <IDataView> >();

            var expNodes = new List <EntryPointNode>();

            // Keep versions of the training and testing var names
            var training = new Var <IDataView> {
                VarName = node.GetInputVariable("TrainingData").VariableName
            };
            var testing = new Var <IDataView> {
                VarName = node.GetInputVariable("TestingData").VariableName
            };
            var amlsVarObj =
                new Var <IMlState>()
            {
                VarName = node.GetInputVariable(nameof(input.State)).VariableName
            };

            // Make sure search space is defined. If not, infer,
            // with default number of transform levels.
            if (!autoMlState.IsSearchSpaceDefined())
            {
                autoMlState.InferSearchSpace(numTransformLevels: 1);
            }

            // Extract performance summaries and assign to previous candidate pipelines.
            foreach (var pipeline in autoMlState.BatchCandidates)
            {
                if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId),
                                                out var v))
                {
                    pipeline.PerformanceSummary =
                        AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name);
                    autoMlState.AddEvaluated(pipeline);
                }
            }

            node.OutputMap.TryGetValue("Results", out string outDvName);
            var outDvVar = new Var <IDataView>()
            {
                VarName = outDvName
            };

            node.OutputMap.TryGetValue("State", out string outStateName);
            var outStateVar = new Var <IMlState>()
            {
                VarName = outStateName
            };

            // Get next set of candidates.
            var candidatePipelines = autoMlState.GetNextCandidates(input.BatchSize);

            // Check if termination condition was met, i.e. no more candidates were returned.
            // If so, end expansion and add a node to extract the sweep result.
            if (candidatePipelines == null || candidatePipelines.Length == 0)
            {
                // Add a node to extract the sweep result.
                return(new CommonOutputs.MacroOutput <Output>()
                {
                    Nodes = expNodes
                });
            }

            // Prep all returned candidates
            foreach (var p in candidatePipelines)
            {
                // Add train test experiments to current graph for candidate pipeline
                var subgraph        = new Experiment(env);
                var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph);

                // Change variable name to reference pipeline ID in output map, context and entrypoint output.
                var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId);
                var sgNode     = EntryPointNode.ValidateNodes(env, node.Context,
                                                              new JArray(subgraph.GetNodes().Last()), node.Catalog).Last();
                sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true);
                trainTestOutput.OverallMetrics.VarName = uniqueName;
                expNodes.Add(sgNode);

                // Store indicators, to pass to next iteration of macro.
                pipelineIndicators.Add(trainTestOutput.OverallMetrics);
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = expNodes
            });
        }
        public static CommonOutputs.MacroOutput <Output> TrainTestBinary(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // Parse the subgraph.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes);

            // Change the subgraph to use the training data as input.
            var varName = input.Inputs.Data.VarName;
            EntryPointVariable variable;

            if (!subGraphRunContext.TryGetVariable(varName, out variable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            var trainingVar = node.GetInputVariable("TrainingData");

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameInputVariable(variable.Name, trainingVar);
            }
            subGraphRunContext.RemoveVariable(variable);

            // Change the subgraph to use the model variable as output.
            varName = input.Outputs.Model.VarName;
            if (!subGraphRunContext.TryGetVariable(varName, out variable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            string outputVarName = node.GetOutputVariableName("PredictorModel");

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameOutputVariable(variable.Name, outputVarName);
            }
            subGraphRunContext.RemoveVariable(variable);

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Change all the subgraph nodes to use the main context.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.SetContext(node.Context);
            }

            // Add the scoring node.
            var testingVar = node.GetInputVariable("TestingData");
            var exp        = new Experiment(env);
            var scoreNode  = new Legacy.Transforms.DatasetScorer();

            scoreNode.Data.VarName           = testingVar.ToJson();
            scoreNode.PredictorModel.VarName = outputVarName;
            var scoreNodeOutput = exp.Add(scoreNode);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            // Add the evaluator node.
            exp.Reset();
            var evalNode = new Legacy.Models.BinaryClassificationEvaluator();

            evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName;
            var    evalOutput = new Legacy.Models.BinaryClassificationEvaluator.Output();
            string outVariableName;

            if (node.OutputMap.TryGetValue("Warnings", out outVariableName))
            {
                evalOutput.Warnings.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue("OverallMetrics", out outVariableName))
            {
                evalOutput.OverallMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue("PerInstanceMetrics", out outVariableName))
            {
                evalOutput.PerInstanceMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue("ConfusionMatrix", out outVariableName))
            {
                evalOutput.ConfusionMatrix.VarName = outVariableName;
            }
            exp.Add(evalNode, evalOutput);
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            var stageId = Guid.NewGuid().ToString("N");

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.StageId = stageId;
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
        private static Tuple <List <EntryPointNode>, Var <IPredictorModel> > ProcessClass(IHostEnvironment env, int k, string label, Arguments input, EntryPointNode node)
        {
            var macroNodes = new List <EntryPointNode>();

            // Convert label into T,F based on k.
            var remapper = new Legacy.Transforms.LabelIndicator
            {
                ClassIndex = k,
                Column     = new[]
                {
                    new Legacy.Transforms.LabelIndicatorTransformColumn
                    {
                        ClassIndex = k,
                        Name       = label,
                        Source     = label
                    }
                },
                Data = { VarName = node.GetInputVariable(nameof(input.TrainingData)).ToJson() }
            };
            var exp             = new Experiment(env);
            var remapperOutNode = exp.Add(remapper);
            var subNodes        = EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes());

            macroNodes.AddRange(subNodes);

            // Parse the nodes in input.Nodes into a temporary run context.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes);

            // Rename all the variables such that they don't conflict with the ones in the outer run context.
            var  mapping     = new Dictionary <string, string>();
            bool foundOutput = false;
            Var <IPredictorModel> predModelVar = null;

            foreach (var entryPointNode in subGraphNodes)
            {
                // Rename variables in input/output maps, and in subgraph context.
                entryPointNode.RenameAllVariables(mapping);
                foreach (var kvp in mapping)
                {
                    subGraphRunContext.RenameContextVariable(kvp.Key, kvp.Value);
                }

                // Grab a hold of output model from this subgraph.
                if (entryPointNode.GetOutputVariableName("PredictorModel") is string mvn)
                {
                    predModelVar = new Var <IPredictorModel> {
                        VarName = mvn
                    };
                    foundOutput = true;
                }

                // Connect label remapper output to wherever training data was expected within the input graph.
                if (entryPointNode.GetInputVariable(nameof(input.TrainingData)) is VariableBinding vb)
                {
                    vb.Rename(remapperOutNode.OutputData.VarName);
                }

                // Change node to use the main context.
                entryPointNode.SetContext(node.Context);
            }

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Make sure we found the output variable for this model.
            if (!foundOutput)
            {
                throw new Exception("Invalid input graph. Does not output predictor model.");
            }

            // Add training subgraph to our context.
            macroNodes.AddRange(subGraphNodes);

            return(new Tuple <List <EntryPointNode>, Var <IPredictorModel> >(macroNodes, predModelVar));
        }
        public static CommonOutputs.MacroOutput <Output> OneVersusAll(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.Assert(input.Nodes.Count > 0);

            var numClasses    = GetNumberOfClasses(env, input, out var label);
            var predModelVars = new Var <IPredictorModel> [numClasses];

            // This will be the final resulting list of nodes that is returned from the macro.
            var macroNodes = new List <EntryPointNode>();

            // Instantiate the subgraph for each label value.
            for (int k = 0; k < numClasses; k++)
            {
                var result = ProcessClass(env, k, label, input, node);
                predModelVars[k] = result.Item2;
                macroNodes.AddRange(result.Item1);
            }

            // Use OVA model combiner to combine these models into one.
            // Takes in array of models that are binary predictor models and
            // produces single multiclass predictor model.
            var macroExperiment = new Experiment(env);
            var combinerNode    = new Legacy.Models.OvaModelCombiner
            {
                ModelArray   = new ArrayVar <IPredictorModel>(predModelVars),
                TrainingData = new Var <IDataView> {
                    VarName = node.GetInputVariable(nameof(input.TrainingData)).VariableName
                },
                Caching           = (Legacy.Models.CachingOptions)input.Caching,
                FeatureColumn     = input.FeatureColumn,
                NormalizeFeatures = (Legacy.Models.NormalizeOption)input.NormalizeFeatures,
                LabelColumn       = input.LabelColumn,
                UseProbabilities  = input.UseProbabilities
            };

            // Get output model variable.
            if (!node.OutputMap.TryGetValue(nameof(Output.PredictorModel), out var outVariableName))
            {
                throw new Exception("Cannot find OVA model output.");
            }

            // Map macro's output back to OVA combiner (so OVA combiner will set the value on our output variable).
            var combinerOutput = new Legacy.Models.OvaModelCombiner.Output {
                PredictorModel = new Var <IPredictorModel> {
                    VarName = outVariableName
                }
            };

            // Add to experiment (must be done AFTER we assign variable name to output).
            macroExperiment.Add(combinerNode, combinerOutput);

            // Add nodes to main experiment.
            var nodes    = macroExperiment.GetNodes();
            var expNodes = EntryPointNode.ValidateNodes(env, node.Context, nodes);

            macroNodes.AddRange(expNodes);

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = macroNodes
            });
        }
        public static CommonOutputs.MacroOutput <Output> CrossValidate(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.CheckValue(input, nameof(input));

            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            //the input transform model
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            // Split the input data into folds.
            var splitArgs = new CVSplit.Input();

            splitArgs.NumFolds             = input.NumFolds;
            splitArgs.StratificationColumn = input.StratificationColumn;
            var inputBindingMap           = new Dictionary <string, List <ParameterBinding> >();
            var inputMap                  = new Dictionary <ParameterBinding, VariableBinding>();
            var inputData                 = node.GetInputVariable(nameof(splitArgs.Data));
            ParameterBinding paramBinding = new SimpleParameterBinding(nameof(splitArgs.Data));

            inputBindingMap.Add(nameof(splitArgs.Data), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, inputData);
            var outputMap            = new Dictionary <string, string>();
            var splitOutputTrainData = new ArrayVar <IDataView>();
            var splitOutputTestData  = new ArrayVar <IDataView>();

            outputMap.Add(nameof(CVSplit.Output.TrainData), splitOutputTrainData.VarName);
            outputMap.Add(nameof(CVSplit.Output.TestData), splitOutputTestData.VarName);
            var splitNode = EntryPointNode.Create(env, "Models.CrossValidatorDatasetSplitter", splitArgs,
                                                  node.Context, inputBindingMap, inputMap, outputMap);

            subGraphNodes.Add(splitNode);

            var predModelVars           = new Var <PredictorModel> [input.NumFolds];
            var inputTransformModelVars = new Var <PredictorModel> [input.NumFolds];
            var warningsVars            = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars      = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars     = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars     = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestMacro.Arguments
                {
                    Nodes          = new JArray(graph.Select(n => n.ToJson()).ToArray()),
                    TransformModel = null,
                    LabelColumn    = input.LabelColumn,
                    GroupColumn    = input.GroupColumn,
                    WeightColumn   = input.WeightColumn,
                    NameColumn     = input.NameColumn
                };

                if (transformModelVarName != null)
                {
                    args.TransformModel = new Var <TransformModel> {
                        VarName = transformModelVarName.VariableName
                    }
                }
                ;

                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };
                args.Outputs.PredictorModel = new Var <PredictorModel>
                {
                    VarName = mapping[input.Outputs.PredictorModel.VarName]
                };

                // Set train/test trainer kind to match.
                args.Kind = input.Kind;

                // Set the input bindings for the TrainTest entry point.
                inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(splitOutputTrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(splitOutputTestData.VarName, k));
                outputMap = new Dictionary <string, string>();
                var transformModelVar = new Var <TransformModel>();
                var predModelVar      = new Var <PredictorModel>();
                outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName);
                predModelVars[k] = predModelVar;
                if (transformModelVarName != null && transformModelVarName.VariableName != null)
                {
                    var combineModelsArgs = new ModelOperations.SimplePredictorModelInput();
                    inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                    inputMap        = new Dictionary <ParameterBinding, VariableBinding>();

                    var inputTransformModel = new SimpleVariableBinding(transformModelVarName.VariableName);
                    var inputPredictorModel = new SimpleVariableBinding(predModelVar.VarName);
                    paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.TransformModel));
                    inputBindingMap.Add(nameof(combineModelsArgs.TransformModel), new List <ParameterBinding>()
                    {
                        paramBinding
                    });
                    inputMap.Add(paramBinding, inputTransformModel);
                    paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.PredictorModel));
                    inputBindingMap.Add(nameof(combineModelsArgs.PredictorModel), new List <ParameterBinding>()
                    {
                        paramBinding
                    });
                    inputMap.Add(paramBinding, inputPredictorModel);
                    outputMap = new Dictionary <string, string>();

                    var combineNodeOutputPredictorModel = new Var <PredictorModel>();
                    predModelVars[k] = combineNodeOutputPredictorModel;
                    outputMap.Add(nameof(ModelOperations.PredictorModelOutput.PredictorModel), combineNodeOutputPredictorModel.VarName);
                    EntryPointNode combineNode = EntryPointNode.Create(env, "Transforms.TwoHeterogeneousModelCombiner", combineModelsArgs,
                                                                       node.Context, inputBindingMap, inputMap, outputMap);
                    subGraphNodes.Add(combineNode);
                }

                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator";
                subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Context, inputBindingMap, inputMap, outputMap));
            }

            // Convert the predictor models to an array of predictor models.
            MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, subGraphNodes, predModelVars, node.GetOutputVariableName(nameof(Output.PredictorModel)));

            // Convert the warnings, overall, per instance and confusion matrix data views into an array.
            var warningsArrayVar = new ArrayVar <IDataView>();
            var overallArrayVar  = new ArrayVar <IDataView>();
            var instanceArrayVar = new ArrayVar <IDataView>();
            ArrayVar <IDataView> confusionMatrixArrayVar = null;

            MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, warningsVars, warningsArrayVar.VarName);
            MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, overallMetricsVars, overallArrayVar.VarName);
            MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, instanceMetricsVars, instanceArrayVar.VarName);
            if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer ||
                input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer)
            {
                confusionMatrixArrayVar = new ArrayVar <IDataView>();
                MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, confusionMatrixVars, confusionMatrixArrayVar.VarName);
            }

            var combineArgs = new CombineMetricsInput();

            combineArgs.Kind         = input.Kind;
            combineArgs.LabelColumn  = input.LabelColumn;
            combineArgs.WeightColumn = input.WeightColumn;
            combineArgs.GroupColumn  = input.GroupColumn;
            combineArgs.NameColumn   = input.NameColumn;

            // Set the input bindings for the CombineMetrics entry point.
            var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            var combineInputMap        = new Dictionary <ParameterBinding, VariableBinding>();

            var warningsArray = new SimpleParameterBinding(nameof(combineArgs.Warnings));

            combineInputBindingMap.Add(nameof(combineArgs.Warnings), new List <ParameterBinding> {
                warningsArray
            });
            combineInputMap.Add(warningsArray, new SimpleVariableBinding(warningsArrayVar.VarName));
            var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> {
                overallArray
            });
            combineInputMap.Add(overallArray, new SimpleVariableBinding(overallArrayVar.VarName));
            var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> {
                combinePerInstArray
            });
            combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceArrayVar.VarName));
            if (confusionMatrixArrayVar != null)
            {
                var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix));
                combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> {
                    combineConfArray
                });
                combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatrixArrayVar.VarName));
            }

            var combineOutputMap  = new Dictionary <string, string>();
            var combineWarningVar = new Var <IDataView>();

            combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName);
            var combineOverallMetric = new Var <IDataView>();

            combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName);
            var combineInstanceMetric = new Var <IDataView>();

            combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName);
            if (confusionMatrixArrayVar != null)
            {
                var combineConfusionMatrix = new Var <IDataView>();
                combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
                combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName);
            }
            var combineMetricsNode = EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner",
                                                           combineArgs, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap);

            subGraphNodes.Add(combineMetricsNode);
            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
Exemplo n.º 12
0
        private static Var <PredictorModel> ProcessClass(IHostEnvironment env, List <EntryPointNode> macroNodes, int k, string label, Arguments input, EntryPointNode node)
        {
            Contracts.AssertValue(macroNodes);

            // Convert label into T,F based on k.
            var labelIndicatorArgs = new LabelIndicatorTransform.Arguments();

            labelIndicatorArgs.ClassIndex = k;
            labelIndicatorArgs.Column     = new[] { new LabelIndicatorTransform.Column()
                                                    {
                                                        Name = label, Source = label
                                                    } };

            var inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            var inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
            var paramBinding    = new SimpleParameterBinding(nameof(labelIndicatorArgs.Data));

            inputBindingMap.Add(nameof(labelIndicatorArgs.Data), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, node.GetInputVariable(nameof(input.TrainingData)));

            var outputMap        = new Dictionary <string, string>();
            var remappedLabelVar = new Var <IDataView>();

            outputMap.Add(nameof(CommonOutputs.TransformOutput.OutputData), remappedLabelVar.VarName);
            var labelIndicatorNode = EntryPointNode.Create(env, "Transforms.LabelIndicator", labelIndicatorArgs, node.Context,
                                                           inputBindingMap, inputMap, outputMap);

            macroNodes.Add(labelIndicatorNode);

            // Parse the nodes in input.Nodes into a temporary run context.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes);

            // Rename all the variables such that they don't conflict with the ones in the outer run context.
            var  mapping     = new Dictionary <string, string>();
            bool foundOutput = false;
            Var <PredictorModel> predModelVar = null;

            foreach (var entryPointNode in subGraphNodes)
            {
                // Rename variables in input/output maps, and in subgraph context.
                entryPointNode.RenameAllVariables(mapping);
                foreach (var kvp in mapping)
                {
                    subGraphRunContext.RenameContextVariable(kvp.Key, kvp.Value);
                }

                // Grab a hold of output model from this subgraph.
                if (entryPointNode.GetOutputVariableName("PredictorModel") is string mvn)
                {
                    predModelVar = new Var <PredictorModel> {
                        VarName = mvn
                    };
                    foundOutput = true;
                }

                // Connect label remapper output to wherever training data was expected within the input graph.
                if (entryPointNode.GetInputVariable(nameof(input.TrainingData)) is VariableBinding vb)
                {
                    vb.Rename(remappedLabelVar.VarName);
                }

                // Change node to use the main context.
                entryPointNode.SetContext(node.Context);
            }

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Make sure we found the output variable for this model.
            if (!foundOutput)
            {
                throw new Exception("Invalid input graph. Does not output predictor model.");
            }

            // Add training subgraph to our context.
            macroNodes.AddRange(subGraphNodes);
            return(predModelVar);
        }
Exemplo n.º 13
0
 public void AddNode(EntryPointNode node)
 {
     _host.CheckValue(node, nameof(node));
     node.SetContext(_context);
     _nodes.Add(node);
 }
Exemplo n.º 14
0
        public static CommonOutputs.MacroOutput <Output> TrainTest(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // Create default pipeline ID if one not given.
            input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N");

            // Parse the subgraph.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, label: input.LabelColumn,
                                                                  input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null,
                                                                  input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null,
                                                                  input.NameColumn.IsExplicit ? input.NameColumn.Value : null);

            // Change the subgraph to use the training data as input.
            var             varName = input.Inputs.Data.VarName;
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            var trainingVar = node.GetInputVariable(nameof(input.TrainingData));

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar);
            }
            subGraphRunContext.RemoveVariable(dataVariable);

            // Change the subgraph to use the model variable as output.
            varName = input.Outputs.PredictorModel.VarName;
            if (!subGraphRunContext.TryGetVariable(varName, out dataVariable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }

            string predictorModelVarName = node.GetOutputVariableName(nameof(Output.PredictorModel));

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameOutputVariable(dataVariable.Name, predictorModelVarName);
            }
            subGraphRunContext.RemoveVariable(dataVariable);

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Change all the subgraph nodes to use the main context.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.SetContext(node.Context);
            }

            // Testing using test data set
            var testingVar = node.GetInputVariable(nameof(input.TestingData));
            //var exp = new Experiment(env);

            Dictionary <string, List <ParameterBinding> >  inputBindingMap;
            Dictionary <ParameterBinding, VariableBinding> inputMap;
            ParameterBinding            paramBinding;
            Dictionary <string, string> outputMap;

            //combine the predictor model with any potential transfrom model passed from the outer graph
            if (transformModelVarName != null && transformModelVarName.VariableName != null)
            {
                var combineArgs = new ModelOperations.SimplePredictorModelInput();
                inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                inputMap        = new Dictionary <ParameterBinding, VariableBinding>();

                var inputTransformModel = new SimpleVariableBinding(transformModelVarName.VariableName);
                var inputPredictorModel = new SimpleVariableBinding(predictorModelVarName);
                paramBinding = new SimpleParameterBinding(nameof(combineArgs.TransformModel));
                inputBindingMap.Add(nameof(combineArgs.TransformModel), new List <ParameterBinding>()
                {
                    paramBinding
                });
                inputMap.Add(paramBinding, inputTransformModel);
                paramBinding = new SimpleParameterBinding(nameof(combineArgs.PredictorModel));
                inputBindingMap.Add(nameof(combineArgs.PredictorModel), new List <ParameterBinding>()
                {
                    paramBinding
                });
                inputMap.Add(paramBinding, inputPredictorModel);
                outputMap = new Dictionary <string, string>();

                var combineNodeOutputPredictorModel = new Var <PredictorModel>();
                predictorModelVarName = combineNodeOutputPredictorModel.VarName;
                outputMap.Add(nameof(ModelOperations.PredictorModelOutput.PredictorModel), combineNodeOutputPredictorModel.VarName);
                EntryPointNode combineNode = EntryPointNode.Create(env, "Transforms.TwoHeterogeneousModelCombiner", combineArgs,
                                                                   node.Context, inputBindingMap, inputMap, outputMap);
                subGraphNodes.Add(combineNode);
            }

            // Add the scoring node for testing.
            var args = new ScoreModel.Input();

            inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
            paramBinding    = new SimpleParameterBinding(nameof(args.Data));
            inputBindingMap.Add(nameof(args.Data), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, testingVar);
            var scoreNodeInputPredictorModel = new SimpleVariableBinding(predictorModelVarName);

            paramBinding = new SimpleParameterBinding(nameof(args.PredictorModel));
            inputBindingMap.Add(nameof(args.PredictorModel), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, scoreNodeInputPredictorModel);

            var scoreNodeOutputScoredData       = new Var <IDataView>();
            var scoreNodeOutputScoringTransform = new Var <TransformModel>();

            outputMap = new Dictionary <string, string>();
            outputMap.Add(nameof(ScoreModel.Output.ScoredData), scoreNodeOutputScoredData.VarName);
            outputMap.Add(nameof(ScoreModel.Output.ScoringTransform), scoreNodeOutputScoringTransform.VarName);

            EntryPointNode scoreNode = EntryPointNode.Create(env, "Transforms.DatasetScorer", args,
                                                             node.Context, inputBindingMap, inputMap, outputMap);

            subGraphNodes.Add(scoreNode);
            var evalDataVarName = scoreNodeOutputScoredData.VarName;

            // REVIEW: add similar support for FeatureColumn.
            var settings = new MacroUtils.EvaluatorSettings
            {
                LabelColumn  = input.LabelColumn,
                WeightColumn = input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null,
                GroupColumn  = input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null,
                NameColumn   = input.NameColumn.IsExplicit ? input.NameColumn.Value : null
            };

            if (input.IncludeTrainingMetrics)
            {
                string evalTrainingDataVarName;
                args            = new ScoreModel.Input();
                inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                paramBinding    = new SimpleParameterBinding(nameof(args.Data));
                inputBindingMap.Add(nameof(args.Data), new List <ParameterBinding>()
                {
                    paramBinding
                });
                inputMap.Add(paramBinding, trainingVar);
                scoreNodeInputPredictorModel = new SimpleVariableBinding(predictorModelVarName);
                paramBinding = new SimpleParameterBinding(nameof(args.PredictorModel));
                inputBindingMap.Add(nameof(args.PredictorModel), new List <ParameterBinding>()
                {
                    paramBinding
                });
                inputMap.Add(paramBinding, scoreNodeInputPredictorModel);

                scoreNodeOutputScoredData       = new Var <IDataView>();
                scoreNodeOutputScoringTransform = new Var <TransformModel>();
                outputMap = new Dictionary <string, string>();
                outputMap.Add(nameof(ScoreModel.Output.ScoredData), scoreNodeOutputScoredData.VarName);
                outputMap.Add(nameof(ScoreModel.Output.ScoringTransform), scoreNodeOutputScoringTransform.VarName);

                scoreNode = EntryPointNode.Create(env, "Transforms.DatasetScorer", args,
                                                  node.Context, inputBindingMap, inputMap, outputMap);
                subGraphNodes.Add(scoreNode);
                evalTrainingDataVarName = scoreNodeOutputScoredData.VarName;

                // Add the evaluator node for training.
                var evalTrainingArgs = MacroUtils.GetEvaluatorArgs(input.Kind, out var evalTrainingEntryPointName, settings);
                inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var evalTrainingNodeInputData = new SimpleVariableBinding(evalTrainingDataVarName);
                paramBinding = new SimpleParameterBinding(nameof(evalTrainingArgs.Data));
                inputBindingMap.Add(nameof(evalTrainingArgs.Data), new List <ParameterBinding>()
                {
                    paramBinding
                });
                inputMap.Add(paramBinding, evalTrainingNodeInputData);

                outputMap = new Dictionary <string, string>();
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out var outTrainingVariableName))
                {
                    outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.Warnings), outTrainingVariableName);
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outTrainingVariableName))
                {
                    outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.OverallMetrics), outTrainingVariableName);
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outTrainingVariableName))
                {
                    outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.PerInstanceMetrics), outTrainingVariableName);
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outTrainingVariableName))
                {
                    outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.ConfusionMatrix), outTrainingVariableName);
                }
                EntryPointNode evalTrainingNode = EntryPointNode.Create(env, evalTrainingEntryPointName, evalTrainingArgs, node.Context, inputBindingMap, inputMap, outputMap);
                subGraphNodes.Add(evalTrainingNode);
            }

            // Add the evaluator node for testing.
            var evalArgs = MacroUtils.GetEvaluatorArgs(input.Kind, out var evalEntryPointName, settings);

            inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
            var evalNodeInputData = new SimpleVariableBinding(evalDataVarName);

            paramBinding = new SimpleParameterBinding(nameof(evalArgs.Data));
            inputBindingMap.Add(nameof(evalArgs.Data), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, evalNodeInputData);

            outputMap = new Dictionary <string, string>();
            if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out var outVariableName))
            {
                outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.Warnings), outVariableName);
            }
            if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName))
            {
                outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.OverallMetrics), outVariableName);
            }
            if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName))
            {
                outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.PerInstanceMetrics), outVariableName);
            }
            if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName))
            {
                outputMap.Add(nameof(CommonOutputs.ClassificationEvaluateOutput.ConfusionMatrix), outVariableName);
            }
            EntryPointNode evalNode = EntryPointNode.Create(env, evalEntryPointName, evalArgs, node.Context, inputBindingMap, inputMap, outputMap);

            subGraphNodes.Add(evalNode);

            // Marks as an atomic unit that can be run in
            // a distributed fashion.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.StageId = input.PipelineId;
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
Exemplo n.º 15
0
        public static CommonOutputs.MacroOutput <Output> CrossValidate(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.CheckValue(input, nameof(input));

            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            //the input transform model
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            // Split the input data into folds.
            var exp     = new Experiment(env);
            var cvSplit = new Models.CrossValidatorDatasetSplitter();

            cvSplit.Data.VarName         = node.GetInputVariable("Data").ToJson();
            cvSplit.NumFolds             = input.NumFolds;
            cvSplit.StratificationColumn = input.StratificationColumn;
            var cvSplitOutput = exp.Add(cvSplit);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));

            var predModelVars           = new Var <IPredictorModel> [input.NumFolds];
            var transformModelVars      = new Var <ITransformModel> [input.NumFolds];
            var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds];
            var warningsVars            = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars      = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars     = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars     = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestMacro.Arguments
                {
                    Nodes          = new JArray(graph.Select(n => n.ToJson()).ToArray()),
                    TransformModel = null,
                    LabelColumn    = input.LabelColumn,
                    GroupColumn    = input.GroupColumn,
                    WeightColumn   = input.WeightColumn
                };

                if (transformModelVarName != null)
                {
                    args.TransformModel = new Var <ITransformModel> {
                        VarName = transformModelVarName.VariableName
                    }
                }
                ;

                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };

                if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName))
                {
                    args.Outputs.PredictorModel = new Var <IPredictorModel>
                    {
                        VarName = mapping[input.Outputs.PredictorModel.VarName]
                    };
                }
                else
                {
                    args.Outputs.PredictorModel = null;
                }

                if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName))
                {
                    args.Outputs.TransformModel = new Var <ITransformModel>
                    {
                        VarName = mapping[input.Outputs.TransformModel.VarName]
                    };
                }
                else
                {
                    args.Outputs.TransformModel = null;
                }

                // Set train/test trainer kind to match.
                args.Kind = input.Kind;

                // Set the input bindings for the TrainTest entry point.
                var inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                var inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData    = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k));
                var outputMap         = new Dictionary <string, string>();
                var transformModelVar = new Var <ITransformModel>();
                var predModelVar      = new Var <IPredictorModel>();
                if (input.Outputs.PredictorModel == null)
                {
                    outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName);
                    transformModelVars[k] = transformModelVar;
                    ML.Transforms.ModelCombiner.Output modelCombineOutput = null;
                    if (transformModelVarName != null && transformModelVarName.VariableName != null)
                    {
                        var modelCombine = new ML.Transforms.ModelCombiner
                        {
                            Models = new ArrayVar <ITransformModel>(
                                new Var <ITransformModel>[] {
                                new Var <ITransformModel> {
                                    VarName = transformModelVarName.VariableName
                                },
                                transformModelVar
                            }
                                )
                        };

                        exp.Reset();
                        modelCombineOutput = exp.Add(modelCombine);
                        subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
                        transformModelVars[k] = modelCombineOutput.OutputModel;
                    }
                }
                else
                {
                    outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName);
                    predModelVars[k] = predModelVar;
                    ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null;
                    if (transformModelVarName != null && transformModelVarName.VariableName != null)
                    {
                        var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner
                        {
                            TransformModel = { VarName = transformModelVarName.VariableName },
                            PredictorModel = predModelVar
                        };

                        exp.Reset();
                        modelCombineOutput = exp.Add(modelCombine);
                        subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
                        predModelVars[k] = modelCombineOutput.PredictorModel;
                    }
                }

                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator";
                subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap));
            }

            exp.Reset();

            // Convert predictors from all folds into an array of predictors.

            if (input.Outputs.PredictorModel == null)
            {
                var outModels = new ML.Data.TransformModelArrayConverter
                {
                    TransformModel = new ArrayVar <ITransformModel>(transformModelVars)
                };
                var outModelsOutput = new ML.Data.TransformModelArrayConverter.Output();
                outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel));
                exp.Add(outModels, outModelsOutput);
            }
            else
            {
                var outModels = new ML.Data.PredictorModelArrayConverter
                {
                    Model = new ArrayVar <IPredictorModel>(predModelVars)
                };
                var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output();
                outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel));
                exp.Add(outModels, outModelsOutput);
            }

            // Convert warnings data views from all folds into an array of data views.
            var warnings = new ML.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(warningsVars)
            };
            var warningsOutput = new ML.Data.IDataViewArrayConverter.Output();

            exp.Add(warnings, warningsOutput);

            // Convert overall metrics data views from all folds into an array of data views.
            var overallMetrics = new ML.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(overallMetricsVars)
            };
            var overallMetricsOutput = new ML.Data.IDataViewArrayConverter.Output();

            exp.Add(overallMetrics, overallMetricsOutput);

            // Convert per instance data views from all folds into an array of data views.
            var instanceMetrics = new ML.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(instanceMetricsVars)
            };
            var instanceMetricsOutput = new ML.Data.IDataViewArrayConverter.Output();

            exp.Add(instanceMetrics, instanceMetricsOutput);

            ML.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null;
            if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer ||
                input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer)
            {
                // Convert confusion matrix data views from all folds into an array of data views.
                var confusionMatrices = new ML.Data.IDataViewArrayConverter
                {
                    Data = new ArrayVar <IDataView>(confusionMatrixVars)
                };
                confusionMatricesOutput = new ML.Data.IDataViewArrayConverter.Output();
                exp.Add(confusionMatrices, confusionMatricesOutput);
            }

            var combineArgs = new CombineMetricsInput();

            combineArgs.Kind         = input.Kind;
            combineArgs.LabelColumn  = input.LabelColumn;
            combineArgs.WeightColumn = input.WeightColumn;
            combineArgs.GroupColumn  = input.GroupColumn;

            // Set the input bindings for the CombineMetrics entry point.
            var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            var combineInputMap        = new Dictionary <ParameterBinding, VariableBinding>();
            var overallArray           = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> {
                overallArray
            });
            combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName));
            var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> {
                combinePerInstArray
            });
            combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName));
            if (confusionMatricesOutput != null)
            {
                var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix));
                combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> {
                    combineConfArray
                });
                combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName));
            }

            var combineOutputMap  = new Dictionary <string, string>();
            var combineWarningVar = new Var <IDataView>();

            combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName);
            var combineOverallMetric = new Var <IDataView>();

            combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName);
            var combineInstanceMetric = new Var <IDataView>();

            combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName);
            if (confusionMatricesOutput != null)
            {
                var combineConfusionMatrix = new Var <IDataView>();
                combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
                combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName);
            }
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
            subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap));
            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
Exemplo n.º 16
0
        public static CommonOutputs.MacroOutput <Output> TrainTest(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // Create default pipeline ID if one not given.
            input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N");

            // Parse the subgraph.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, node.Catalog);

            // Change the subgraph to use the training data as input.
            var             varName = input.Inputs.Data.VarName;
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            var trainingVar = node.GetInputVariable(nameof(input.TrainingData));

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar);
            }
            subGraphRunContext.RemoveVariable(dataVariable);

            // Change the subgraph to use the model variable as output.
            varName = input.Outputs.Model.VarName;
            if (!subGraphRunContext.TryGetVariable(varName, out dataVariable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            string outputVarName = node.GetOutputVariableName(nameof(Output.PredictorModel));

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameOutputVariable(dataVariable.Name, outputVarName);
            }
            subGraphRunContext.RemoveVariable(dataVariable);

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Change all the subgraph nodes to use the main context.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.SetContext(node.Context);
            }

            // Testing using test data set
            var testingVar = node.GetInputVariable(nameof(input.TestingData));
            var exp        = new Experiment(env);

            //combine the predictor model with any potential transfrom model passed from the outer graph
            if (transformModelVarName != null && transformModelVarName.VariableName != null)
            {
                var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner
                {
                    TransformModel = { VarName = transformModelVarName.VariableName },
                    PredictorModel = { VarName = outputVarName }
                };

                var modelCombineOutput = exp.Add(modelCombine);
                outputVarName = modelCombineOutput.PredictorModel.VarName;
            }

            // Add the scoring node for testing.
            var scoreNode = new ML.Transforms.DatasetScorer
            {
                Data           = { VarName = testingVar.ToJson() },
                PredictorModel = { VarName = outputVarName }
            };
            var scoreNodeOutput = exp.Add(scoreNode);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));

            // Do not double-add previous nodes.
            exp.Reset();

            // REVIEW: we need to extract the proper label column name here to pass to the evaluators.
            // This is where you would add code to do it.
            var settings = new MacroUtils.EvaluatorSettings
            {
                LabelColumn = DefaultColumnNames.Label
            };

            string outVariableName;

            if (input.IncludeTrainingMetrics)
            {
                // Add the scoring node for training.
                var scoreNodeTraining = new ML.Transforms.DatasetScorer
                {
                    Data           = { VarName = trainingVar.ToJson() },
                    PredictorModel = { VarName = outputVarName }
                };
                var scoreNodeTrainingOutput = exp.Add(scoreNodeTraining);
                subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));

                // Do not double-add previous nodes.
                exp.Reset();

                // Add the evaluator node for training.
                var evalInputOutputTraining = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings);
                var evalNodeTraining        = evalInputOutputTraining.Item1;
                var evalOutputTraining      = evalInputOutputTraining.Item2;
                evalNodeTraining.Data.VarName = scoreNodeTrainingOutput.ScoredData.VarName;

                if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out outVariableName))
                {
                    evalOutputTraining.Warnings.VarName = outVariableName;
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outVariableName))
                {
                    evalOutputTraining.OverallMetrics.VarName = outVariableName;
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outVariableName))
                {
                    evalOutputTraining.PerInstanceMetrics.VarName = outVariableName;
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outVariableName) &&
                    evalOutputTraining is CommonOutputs.IClassificationEvaluatorOutput eoTraining)
                {
                    eoTraining.ConfusionMatrix.VarName = outVariableName;
                }

                exp.Add(evalNodeTraining, evalOutputTraining);
                subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
            }

            // Do not double-add previous nodes.
            exp.Reset();

            // Add the evaluator node for testing.
            var evalInputOutput = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings);
            var evalNode        = evalInputOutput.Item1;
            var evalOutput      = evalInputOutput.Item2;

            evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName;

            if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out outVariableName))
            {
                evalOutput.Warnings.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName))
            {
                evalOutput.OverallMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName))
            {
                evalOutput.PerInstanceMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName) &&
                evalOutput is CommonOutputs.IClassificationEvaluatorOutput eo)
            {
                eo.ConfusionMatrix.VarName = outVariableName;
            }

            exp.Add(evalNode, evalOutput);
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));

            // Marks as an atomic unit that can be run in
            // a distributed fashion.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.StageId = input.PipelineId;
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
Exemplo n.º 17
0
        public static CommonOutputs.MacroOutput <Output> OneVersusAll(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.Assert(input.Nodes.Count > 0);

            var numClasses    = GetNumberOfClasses(env, input, out var label);
            var predModelVars = new Var <PredictorModel> [numClasses];

            // This will be the final resulting list of nodes that is returned from the macro.
            var macroNodes = new List <EntryPointNode>();

            // Instantiate the subgraph for each label value.
            for (int k = 0; k < numClasses; k++)
            {
                predModelVars[k] = ProcessClass(env, macroNodes, k, label, input, node);
            }

            // Convert the predictor models to an array of predictor models.
            var modelsArray = new Var <PredictorModel[]>();

            MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, macroNodes, predModelVars, modelsArray.VarName);

            // Use OVA model combiner to combine these models into one.
            // Takes in array of models that are binary predictor models and
            // produces single multiclass predictor model.
            var combineArgs = new ModelOperations.CombineOvaPredictorModelsInput();

            combineArgs.Caching           = input.Caching;
            combineArgs.FeatureColumn     = input.FeatureColumn;
            combineArgs.LabelColumn       = input.LabelColumn;
            combineArgs.NormalizeFeatures = input.NormalizeFeatures;
            combineArgs.UseProbabilities  = input.UseProbabilities;

            var inputBindingMap            = new Dictionary <string, List <ParameterBinding> >();
            var inputMap                   = new Dictionary <ParameterBinding, VariableBinding>();
            var combineNodeModelArrayInput = new SimpleVariableBinding(modelsArray.VarName);
            var paramBinding               = new SimpleParameterBinding(nameof(combineArgs.ModelArray));

            inputBindingMap.Add(nameof(combineArgs.ModelArray), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, combineNodeModelArrayInput);
            paramBinding = new SimpleParameterBinding(nameof(combineArgs.TrainingData));
            inputBindingMap.Add(nameof(combineArgs.TrainingData), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, node.GetInputVariable(nameof(input.TrainingData)));

            var outputMap = new Dictionary <string, string>();

            outputMap.Add(nameof(Output.PredictorModel), node.GetOutputVariableName(nameof(Output.PredictorModel)));
            var combineModelsNode = EntryPointNode.Create(env, "Models.OvaModelCombiner",
                                                          combineArgs, node.Context, inputBindingMap, inputMap, outputMap);

            macroNodes.Add(combineModelsNode);

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = macroNodes
            });
        }