예제 #1
0
        public static CommonOutputs.MacroOutput <Output> CrossValidateBinary(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            // Split the input data into folds.
            var exp     = new Experiment(env);
            var cvSplit = new Legacy.Models.CrossValidatorDatasetSplitter();

            cvSplit.Data.VarName         = node.GetInputVariable("Data").ToJson();
            cvSplit.NumFolds             = input.NumFolds;
            cvSplit.StratificationColumn = input.StratificationColumn;
            var cvSplitOutput = exp.Add(cvSplit);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            var predModelVars       = new Var <PredictorModel> [input.NumFolds];
            var warningsVars        = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars  = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestBinaryMacro.Arguments
                {
                    Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray())
                };
                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };
                args.Outputs.Model = new Var <PredictorModel>
                {
                    VarName = mapping[input.Outputs.Model.VarName]
                };

                // Set the input bindings for the TrainTest entry point.
                var inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                var inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData    = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k));
                var outputMap    = new Dictionary <string, string>();
                var predModelVar = new Var <PredictorModel>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.PredictorModel), predModelVar.VarName);
                predModelVars[k] = predModelVar;
                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestBinaryMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                subGraphNodes.Add(EntryPointNode.Create(env, "Models.TrainTestBinaryEvaluator", args, node.Context, inputBindingMap, inputMap, outputMap));
            }

            exp.Reset();

            var outModels = new Legacy.Data.PredictorModelArrayConverter
            {
                Model = new ArrayVar <PredictorModel>(predModelVars)
            };
            var outModelsOutput = new Legacy.Data.PredictorModelArrayConverter.Output();

            outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel));
            exp.Add(outModels, outModelsOutput);

            var warnings = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(warningsVars)
            };
            var warningsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            warningsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            exp.Add(warnings, warningsOutput);

            var overallMetrics = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(overallMetricsVars)
            };
            var overallMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            overallMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            exp.Add(overallMetrics, overallMetricsOutput);

            var instanceMetrics = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(instanceMetricsVars)
            };
            var instanceMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            instanceMetricsOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            exp.Add(instanceMetrics, instanceMetricsOutput);

            var confusionMatrices = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(confusionMatrixVars)
            };
            var confusionMatricesOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            confusionMatricesOutput.OutputData.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
            exp.Add(confusionMatrices, confusionMatricesOutput);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
예제 #2
0
        public static CommonOutputs.MacroOutput <Output> CrossValidate(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.CheckValue(input, nameof(input));

            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            //the input transform model
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            // Split the input data into folds.
            var exp     = new Experiment(env);
            var cvSplit = new Legacy.Models.CrossValidatorDatasetSplitter();

            cvSplit.Data.VarName         = node.GetInputVariable("Data").ToJson();
            cvSplit.NumFolds             = input.NumFolds;
            cvSplit.StratificationColumn = input.StratificationColumn;
            var cvSplitOutput = exp.Add(cvSplit);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));

            var predModelVars           = new Var <IPredictorModel> [input.NumFolds];
            var transformModelVars      = new Var <ITransformModel> [input.NumFolds];
            var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds];
            var warningsVars            = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars      = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars     = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars     = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestMacro.Arguments
                {
                    Nodes          = new JArray(graph.Select(n => n.ToJson()).ToArray()),
                    TransformModel = null,
                    LabelColumn    = input.LabelColumn,
                    GroupColumn    = input.GroupColumn,
                    WeightColumn   = input.WeightColumn,
                    NameColumn     = input.NameColumn
                };

                if (transformModelVarName != null)
                {
                    args.TransformModel = new Var <ITransformModel> {
                        VarName = transformModelVarName.VariableName
                    }
                }
                ;

                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };

                if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName))
                {
                    args.Outputs.PredictorModel = new Var <IPredictorModel>
                    {
                        VarName = mapping[input.Outputs.PredictorModel.VarName]
                    };
                }
                else
                {
                    args.Outputs.PredictorModel = null;
                }

                if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName))
                {
                    args.Outputs.TransformModel = new Var <ITransformModel>
                    {
                        VarName = mapping[input.Outputs.TransformModel.VarName]
                    };
                }
                else
                {
                    args.Outputs.TransformModel = null;
                }

                // Set train/test trainer kind to match.
                args.Kind = input.Kind;

                // Set the input bindings for the TrainTest entry point.
                var inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                var inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData    = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k));
                var outputMap         = new Dictionary <string, string>();
                var transformModelVar = new Var <ITransformModel>();
                var predModelVar      = new Var <IPredictorModel>();
                if (input.Outputs.PredictorModel == null)
                {
                    outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName);
                    transformModelVars[k] = transformModelVar;
                    Legacy.Transforms.ModelCombiner.Output modelCombineOutput = null;
                    if (transformModelVarName != null && transformModelVarName.VariableName != null)
                    {
                        var modelCombine = new Legacy.Transforms.ModelCombiner
                        {
                            Models = new ArrayVar <ITransformModel>(
                                new Var <ITransformModel>[] {
                                new Var <ITransformModel> {
                                    VarName = transformModelVarName.VariableName
                                },
                                transformModelVar
                            }
                                )
                        };

                        exp.Reset();
                        modelCombineOutput = exp.Add(modelCombine);
                        subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
                        transformModelVars[k] = modelCombineOutput.OutputModel;
                    }
                }
                else
                {
                    outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName);
                    predModelVars[k] = predModelVar;
                    Legacy.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null;
                    if (transformModelVarName != null && transformModelVarName.VariableName != null)
                    {
                        var modelCombine = new Legacy.Transforms.TwoHeterogeneousModelCombiner
                        {
                            TransformModel = { VarName = transformModelVarName.VariableName },
                            PredictorModel = predModelVar
                        };

                        exp.Reset();
                        modelCombineOutput = exp.Add(modelCombine);
                        subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
                        predModelVars[k] = modelCombineOutput.PredictorModel;
                    }
                }

                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator";
                subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap));
            }

            exp.Reset();

            // Convert predictors from all folds into an array of predictors.

            if (input.Outputs.PredictorModel == null)
            {
                var outModels = new Legacy.Data.TransformModelArrayConverter
                {
                    TransformModel = new ArrayVar <ITransformModel>(transformModelVars)
                };
                var outModelsOutput = new Legacy.Data.TransformModelArrayConverter.Output();
                outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel));
                exp.Add(outModels, outModelsOutput);
            }
            else
            {
                var outModels = new Legacy.Data.PredictorModelArrayConverter
                {
                    Model = new ArrayVar <IPredictorModel>(predModelVars)
                };
                var outModelsOutput = new Legacy.Data.PredictorModelArrayConverter.Output();
                outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel));
                exp.Add(outModels, outModelsOutput);
            }

            // Convert warnings data views from all folds into an array of data views.
            var warnings = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(warningsVars)
            };
            var warningsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            exp.Add(warnings, warningsOutput);

            // Convert overall metrics data views from all folds into an array of data views.
            var overallMetrics = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(overallMetricsVars)
            };
            var overallMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            exp.Add(overallMetrics, overallMetricsOutput);

            // Convert per instance data views from all folds into an array of data views.
            var instanceMetrics = new Legacy.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(instanceMetricsVars)
            };
            var instanceMetricsOutput = new Legacy.Data.IDataViewArrayConverter.Output();

            exp.Add(instanceMetrics, instanceMetricsOutput);

            Legacy.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null;
            if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer ||
                input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer)
            {
                // Convert confusion matrix data views from all folds into an array of data views.
                var confusionMatrices = new Legacy.Data.IDataViewArrayConverter
                {
                    Data = new ArrayVar <IDataView>(confusionMatrixVars)
                };
                confusionMatricesOutput = new Legacy.Data.IDataViewArrayConverter.Output();
                exp.Add(confusionMatrices, confusionMatricesOutput);
            }

            var combineArgs = new CombineMetricsInput();

            combineArgs.Kind         = input.Kind;
            combineArgs.LabelColumn  = input.LabelColumn;
            combineArgs.WeightColumn = input.WeightColumn;
            combineArgs.GroupColumn  = input.GroupColumn;
            combineArgs.NameColumn   = input.NameColumn;

            // Set the input bindings for the CombineMetrics entry point.
            var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            var combineInputMap        = new Dictionary <ParameterBinding, VariableBinding>();

            var warningsArray = new SimpleParameterBinding(nameof(combineArgs.Warnings));

            combineInputBindingMap.Add(nameof(combineArgs.Warnings), new List <ParameterBinding> {
                warningsArray
            });
            combineInputMap.Add(warningsArray, new SimpleVariableBinding(warningsOutput.OutputData.VarName));
            var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> {
                overallArray
            });
            combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName));
            var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> {
                combinePerInstArray
            });
            combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName));
            if (confusionMatricesOutput != null)
            {
                var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix));
                combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> {
                    combineConfArray
                });
                combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName));
            }

            var combineOutputMap  = new Dictionary <string, string>();
            var combineWarningVar = new Var <IDataView>();

            combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName);
            var combineOverallMetric = new Var <IDataView>();

            combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName);
            var combineInstanceMetric = new Var <IDataView>();

            combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName);
            if (confusionMatricesOutput != null)
            {
                var combineConfusionMatrix = new Var <IDataView>();
                combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
                combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName);
            }
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
            subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap));
            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }