public void TestOvaMacroWithUncalibratedLearner() { var dataPath = GetDataPath(@"iris.txt"); var env = new MLContext(42); // Specify subgraph for OVA var subGraph = env.CreateExperiment(); var learnerInput = new Legacy.Trainers.AveragedPerceptronBinaryClassifier { Shuffle = false }; var learnerOutput = subGraph.Add(learnerInput); // Create pipeline with OVA and multiclass scoring. var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); importInput.Arguments.Column = new TextLoaderColumn[] { new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, new TextLoaderColumn { Name = "Features", Source = new[] { new TextLoaderRange(1, 4) } } }; var importOutput = experiment.Add(importInput); var oneVersusAll = new Legacy.Models.OneVersusAll { TrainingData = importOutput.Data, Nodes = subGraph, UseProbabilities = true, }; var ovaOutput = experiment.Add(oneVersusAll); var scoreInput = new Legacy.Transforms.DatasetScorer { Data = importOutput.Data, PredictorModel = ovaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); var evalInput = new Legacy.Models.ClassificationEvaluator { Data = scoreOutput.ScoredData }; var evalOutput = experiment.Add(evalInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == accCol)) { var getter = cursor.GetGetter <double>(accCol); b = cursor.MoveNext(); Assert.True(b); double acc = 0; getter(ref acc); Assert.Equal(0.71, acc, 2); b = cursor.MoveNext(); Assert.False(b); } }
public static CommonOutputs.MacroOutput <Output> TrainTestBinary( IHostEnvironment env, Arguments input, EntryPointNode node) { // Parse the subgraph. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes); // Change the subgraph to use the training data as input. var varName = input.Inputs.Data.VarName; EntryPointVariable variable; if (!subGraphRunContext.TryGetVariable(varName, out variable)) { throw env.Except($"Invalid variable name '{varName}'."); } var trainingVar = node.GetInputVariable("TrainingData"); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameInputVariable(variable.Name, trainingVar); } subGraphRunContext.RemoveVariable(variable); // Change the subgraph to use the model variable as output. varName = input.Outputs.Model.VarName; if (!subGraphRunContext.TryGetVariable(varName, out variable)) { throw env.Except($"Invalid variable name '{varName}'."); } string outputVarName = node.GetOutputVariableName("PredictorModel"); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameOutputVariable(variable.Name, outputVarName); } subGraphRunContext.RemoveVariable(variable); // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Change all the subgraph nodes to use the main context. foreach (var subGraphNode in subGraphNodes) { subGraphNode.SetContext(node.Context); } // Add the scoring node. var testingVar = node.GetInputVariable("TestingData"); var exp = new Experiment(env); var scoreNode = new Legacy.Transforms.DatasetScorer(); scoreNode.Data.VarName = testingVar.ToJson(); scoreNode.PredictorModel.VarName = outputVarName; var scoreNodeOutput = exp.Add(scoreNode); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); // Add the evaluator node. exp.Reset(); var evalNode = new Legacy.Models.BinaryClassificationEvaluator(); evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName; var evalOutput = new Legacy.Models.BinaryClassificationEvaluator.Output(); string outVariableName; if (node.OutputMap.TryGetValue("Warnings", out outVariableName)) { evalOutput.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue("OverallMetrics", out outVariableName)) { evalOutput.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue("PerInstanceMetrics", out outVariableName)) { evalOutput.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue("ConfusionMatrix", out outVariableName)) { evalOutput.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNode, evalOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); var stageId = Guid.NewGuid().ToString("N"); foreach (var subGraphNode in subGraphNodes) { subGraphNode.StageId = stageId; } return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
public void TestSimpleTrainExperiment() { var dataPath = GetDataPath("adult.tiny.with-schema.txt"); var env = new MLContext(); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer { Data = importOutput.Data }; catInput.AddColumn("Categories"); var catOutput = experiment.Add(catInput); var concatInput = new Legacy.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = experiment.Add(concatInput); var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = concatOutput.OutputData, LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, NumThreads = 1, Shuffle = false }; var sdcaOutput = experiment.Add(sdcaInput); var scoreInput = new Legacy.Transforms.DatasetScorer { Data = concatOutput.OutputData, PredictorModel = sdcaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); var evalInput = new Legacy.Models.BinaryClassificationEvaluator { Data = scoreOutput.ScoredData }; var evalOutput = experiment.Add(evalInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.93, auc, 2); b = cursor.MoveNext(); Assert.False(b); } }
public static CommonOutputs.MacroOutput <Output> TrainTest( IHostEnvironment env, Arguments input, EntryPointNode node) { // Create default pipeline ID if one not given. input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N"); // Parse the subgraph. var subGraphRunContext = new RunContext(env); var subGraphNodes = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, label: input.LabelColumn, input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null, input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null, input.NameColumn.IsExplicit ? input.NameColumn.Value : null); // Change the subgraph to use the training data as input. var varName = input.Inputs.Data.VarName; VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } var trainingVar = node.GetInputVariable(nameof(input.TrainingData)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar); } subGraphRunContext.RemoveVariable(dataVariable); // Change the subgraph to use the model variable as output. varName = input.Outputs.PredictorModel == null ? input.Outputs.TransformModel.VarName : input.Outputs.PredictorModel.VarName; if (!subGraphRunContext.TryGetVariable(varName, out dataVariable)) { throw env.Except($"Invalid variable name '{varName}'."); } string outputVarName = input.Outputs.PredictorModel == null?node.GetOutputVariableName(nameof(Output.TransformModel)) : node.GetOutputVariableName(nameof(Output.PredictorModel)); foreach (var subGraphNode in subGraphNodes) { subGraphNode.RenameOutputVariable(dataVariable.Name, outputVarName); } subGraphRunContext.RemoveVariable(dataVariable); // Move the variables from the subcontext to the main context. node.Context.AddContextVariables(subGraphRunContext); // Change all the subgraph nodes to use the main context. foreach (var subGraphNode in subGraphNodes) { subGraphNode.SetContext(node.Context); } // Testing using test data set var testingVar = node.GetInputVariable(nameof(input.TestingData)); var exp = new Experiment(env); Legacy.Transforms.DatasetScorer.Output scoreNodeOutput = null; Legacy.Models.DatasetTransformer.Output datasetTransformNodeOutput = null; if (input.Outputs.PredictorModel == null) { //combine the predictor model with any potential transfrom model passed from the outer graph if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Legacy.Transforms.ModelCombiner { Models = new ArrayVar <TransformModel>( new Var <TransformModel>[] { new Var <TransformModel> { VarName = transformModelVarName.VariableName }, new Var <TransformModel> { VarName = outputVarName } } ) }; var modelCombineOutput = exp.Add(modelCombine); outputVarName = modelCombineOutput.OutputModel.VarName; } var datasetTransformerNode = new Legacy.Models.DatasetTransformer { Data = { VarName = testingVar.ToJson() }, TransformModel = { VarName = outputVarName } }; datasetTransformNodeOutput = exp.Add(datasetTransformerNode); } else { //combine the predictor model with any potential transfrom model passed from the outer graph if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new Legacy.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = { VarName = outputVarName } }; var modelCombineOutput = exp.Add(modelCombine); outputVarName = modelCombineOutput.PredictorModel.VarName; } // Add the scoring node for testing. var scoreNode = new Legacy.Transforms.DatasetScorer { Data = { VarName = testingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; scoreNodeOutput = exp.Add(scoreNode); } subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); // Do not double-add previous nodes. exp.Reset(); // REVIEW: add similar support for NameColumn and FeatureColumn. var settings = new MacroUtils.EvaluatorSettings { LabelColumn = input.LabelColumn, WeightColumn = input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null, GroupColumn = input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null, NameColumn = input.NameColumn.IsExplicit ? input.NameColumn.Value : null }; string outVariableName; if (input.IncludeTrainingMetrics) { Legacy.Transforms.DatasetScorer.Output scoreNodeTrainingOutput = null; Legacy.Models.DatasetTransformer.Output datasetTransformNodeTrainingOutput = null; if (input.Outputs.PredictorModel == null) { var datasetTransformerNode = new Legacy.Models.DatasetTransformer { Data = { VarName = testingVar.ToJson() }, TransformModel = { VarName = outputVarName } }; datasetTransformNodeTrainingOutput = exp.Add(datasetTransformerNode); } else { // Add the scoring node for training. var scoreNodeTraining = new Legacy.Transforms.DatasetScorer { Data = { VarName = trainingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; scoreNodeTrainingOutput = exp.Add(scoreNodeTraining); } subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); // Do not double-add previous nodes. exp.Reset(); // Add the evaluator node for training. var evalInputOutputTraining = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNodeTraining = evalInputOutputTraining.Item1; var evalOutputTraining = evalInputOutputTraining.Item2; evalNodeTraining.Data.VarName = input.Outputs.PredictorModel == null ? datasetTransformNodeTrainingOutput.OutputData.VarName : scoreNodeTrainingOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out outVariableName)) { evalOutputTraining.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outVariableName)) { evalOutputTraining.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outVariableName)) { evalOutputTraining.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outVariableName) && evalOutputTraining is CommonOutputs.IClassificationEvaluatorOutput eoTraining) { eoTraining.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNodeTraining, evalOutputTraining); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); } // Do not double-add previous nodes. exp.Reset(); // Add the evaluator node for testing. var evalInputOutput = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNode = evalInputOutput.Item1; var evalOutput = evalInputOutput.Item2; evalNode.Data.VarName = input.Outputs.PredictorModel == null ? datasetTransformNodeOutput.OutputData.VarName : scoreNodeOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out outVariableName)) { evalOutput.Warnings.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName)) { evalOutput.OverallMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName)) { evalOutput.PerInstanceMetrics.VarName = outVariableName; } if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName) && evalOutput is CommonOutputs.IClassificationEvaluatorOutput eo) { eo.ConfusionMatrix.VarName = outVariableName; } exp.Add(evalNode, evalOutput); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes())); // Marks as an atomic unit that can be run in // a distributed fashion. foreach (var subGraphNode in subGraphNodes) { subGraphNode.StageId = input.PipelineId; } return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }