示例#1
0
        public void TestOvaMacroWithUncalibratedLearner()
        {
            var dataPath = GetDataPath(@"iris.txt");
            var env      = new MLContext(42);
            // Specify subgraph for OVA
            var subGraph     = env.CreateExperiment();
            var learnerInput = new Legacy.Trainers.AveragedPerceptronBinaryClassifier {
                Shuffle = false
            };
            var learnerOutput = subGraph.Add(learnerInput);
            // Create pipeline with OVA and multiclass scoring.
            var experiment  = env.CreateExperiment();
            var importInput = new Legacy.Data.TextLoader(dataPath);

            importInput.Arguments.Column = new TextLoaderColumn[]
            {
                new TextLoaderColumn {
                    Name = "Label", Source = new[] { new TextLoaderRange(0) }
                },
                new TextLoaderColumn {
                    Name = "Features", Source = new[] { new TextLoaderRange(1, 4) }
                }
            };
            var importOutput = experiment.Add(importInput);
            var oneVersusAll = new Legacy.Models.OneVersusAll
            {
                TrainingData     = importOutput.Data,
                Nodes            = subGraph,
                UseProbabilities = true,
            };
            var ovaOutput  = experiment.Add(oneVersusAll);
            var scoreInput = new Legacy.Transforms.DatasetScorer
            {
                Data           = importOutput.Data,
                PredictorModel = ovaOutput.PredictorModel
            };
            var scoreOutput = experiment.Add(scoreInput);
            var evalInput   = new Legacy.Models.ClassificationEvaluator
            {
                Data = scoreOutput.ScoredData
            };
            var evalOutput = experiment.Add(evalInput);

            experiment.Compile();
            experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
            experiment.Run();

            var data   = experiment.GetOutput(evalOutput.OverallMetrics);
            var schema = data.Schema;
            var b      = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol);

            Assert.True(b);
            using (var cursor = data.GetRowCursor(col => col == accCol))
            {
                var getter = cursor.GetGetter <double>(accCol);
                b = cursor.MoveNext();
                Assert.True(b);
                double acc = 0;
                getter(ref acc);
                Assert.Equal(0.71, acc, 2);
                b = cursor.MoveNext();
                Assert.False(b);
            }
        }
        public static CommonOutputs.MacroOutput <Output> TrainTestBinary(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // Parse the subgraph.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes);

            // Change the subgraph to use the training data as input.
            var varName = input.Inputs.Data.VarName;
            EntryPointVariable variable;

            if (!subGraphRunContext.TryGetVariable(varName, out variable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            var trainingVar = node.GetInputVariable("TrainingData");

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameInputVariable(variable.Name, trainingVar);
            }
            subGraphRunContext.RemoveVariable(variable);

            // Change the subgraph to use the model variable as output.
            varName = input.Outputs.Model.VarName;
            if (!subGraphRunContext.TryGetVariable(varName, out variable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            string outputVarName = node.GetOutputVariableName("PredictorModel");

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameOutputVariable(variable.Name, outputVarName);
            }
            subGraphRunContext.RemoveVariable(variable);

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Change all the subgraph nodes to use the main context.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.SetContext(node.Context);
            }

            // Add the scoring node.
            var testingVar = node.GetInputVariable("TestingData");
            var exp        = new Experiment(env);
            var scoreNode  = new Legacy.Transforms.DatasetScorer();

            scoreNode.Data.VarName           = testingVar.ToJson();
            scoreNode.PredictorModel.VarName = outputVarName;
            var scoreNodeOutput = exp.Add(scoreNode);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            // Add the evaluator node.
            exp.Reset();
            var evalNode = new Legacy.Models.BinaryClassificationEvaluator();

            evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName;
            var    evalOutput = new Legacy.Models.BinaryClassificationEvaluator.Output();
            string outVariableName;

            if (node.OutputMap.TryGetValue("Warnings", out outVariableName))
            {
                evalOutput.Warnings.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue("OverallMetrics", out outVariableName))
            {
                evalOutput.OverallMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue("PerInstanceMetrics", out outVariableName))
            {
                evalOutput.PerInstanceMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue("ConfusionMatrix", out outVariableName))
            {
                evalOutput.ConfusionMatrix.VarName = outVariableName;
            }
            exp.Add(evalNode, evalOutput);
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            var stageId = Guid.NewGuid().ToString("N");

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.StageId = stageId;
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
示例#3
0
        public void TestSimpleTrainExperiment()
        {
            var dataPath   = GetDataPath("adult.tiny.with-schema.txt");
            var env        = new MLContext();
            var experiment = env.CreateExperiment();

            var importInput  = new Legacy.Data.TextLoader(dataPath);
            var importOutput = experiment.Add(importInput);

            var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer
            {
                Data = importOutput.Data
            };

            catInput.AddColumn("Categories");
            var catOutput = experiment.Add(catInput);

            var concatInput = new Legacy.Transforms.ColumnConcatenator
            {
                Data = catOutput.OutputData
            };

            concatInput.AddColumn("Features", "Categories", "NumericFeatures");
            var concatOutput = experiment.Add(concatInput);

            var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
            {
                TrainingData = concatOutput.OutputData,
                LossFunction = new HingeLossSDCAClassificationLossFunction()
                {
                    Margin = 1.1f
                },
                NumThreads = 1,
                Shuffle    = false
            };
            var sdcaOutput = experiment.Add(sdcaInput);

            var scoreInput = new Legacy.Transforms.DatasetScorer
            {
                Data           = concatOutput.OutputData,
                PredictorModel = sdcaOutput.PredictorModel
            };
            var scoreOutput = experiment.Add(scoreInput);

            var evalInput = new Legacy.Models.BinaryClassificationEvaluator
            {
                Data = scoreOutput.ScoredData
            };
            var evalOutput = experiment.Add(evalInput);

            experiment.Compile();
            experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
            experiment.Run();
            var data = experiment.GetOutput(evalOutput.OverallMetrics);

            var schema = data.Schema;
            var b      = schema.TryGetColumnIndex("AUC", out int aucCol);

            Assert.True(b);
            using (var cursor = data.GetRowCursor(col => col == aucCol))
            {
                var getter = cursor.GetGetter <double>(aucCol);
                b = cursor.MoveNext();
                Assert.True(b);
                double auc = 0;
                getter(ref auc);
                Assert.Equal(0.93, auc, 2);
                b = cursor.MoveNext();
                Assert.False(b);
            }
        }
示例#4
0
        public static CommonOutputs.MacroOutput <Output> TrainTest(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            // Create default pipeline ID if one not given.
            input.PipelineId = input.PipelineId ?? Guid.NewGuid().ToString("N");

            // Parse the subgraph.
            var subGraphRunContext = new RunContext(env);
            var subGraphNodes      = EntryPointNode.ValidateNodes(env, subGraphRunContext, input.Nodes, label: input.LabelColumn,
                                                                  input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null,
                                                                  input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null,
                                                                  input.NameColumn.IsExplicit ? input.NameColumn.Value : null);

            // Change the subgraph to use the training data as input.
            var             varName = input.Inputs.Data.VarName;
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            if (!subGraphRunContext.TryGetVariable(varName, out var dataVariable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }
            var trainingVar = node.GetInputVariable(nameof(input.TrainingData));

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameInputVariable(dataVariable.Name, trainingVar);
            }
            subGraphRunContext.RemoveVariable(dataVariable);

            // Change the subgraph to use the model variable as output.
            varName = input.Outputs.PredictorModel == null ? input.Outputs.TransformModel.VarName : input.Outputs.PredictorModel.VarName;
            if (!subGraphRunContext.TryGetVariable(varName, out dataVariable))
            {
                throw env.Except($"Invalid variable name '{varName}'.");
            }

            string outputVarName = input.Outputs.PredictorModel == null?node.GetOutputVariableName(nameof(Output.TransformModel)) :
                                       node.GetOutputVariableName(nameof(Output.PredictorModel));

            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.RenameOutputVariable(dataVariable.Name, outputVarName);
            }
            subGraphRunContext.RemoveVariable(dataVariable);

            // Move the variables from the subcontext to the main context.
            node.Context.AddContextVariables(subGraphRunContext);

            // Change all the subgraph nodes to use the main context.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.SetContext(node.Context);
            }

            // Testing using test data set
            var testingVar = node.GetInputVariable(nameof(input.TestingData));
            var exp        = new Experiment(env);

            Legacy.Transforms.DatasetScorer.Output  scoreNodeOutput            = null;
            Legacy.Models.DatasetTransformer.Output datasetTransformNodeOutput = null;
            if (input.Outputs.PredictorModel == null)
            {
                //combine the predictor model with any potential transfrom model passed from the outer graph
                if (transformModelVarName != null && transformModelVarName.VariableName != null)
                {
                    var modelCombine = new ML.Legacy.Transforms.ModelCombiner
                    {
                        Models = new ArrayVar <TransformModel>(
                            new Var <TransformModel>[] {
                            new Var <TransformModel> {
                                VarName = transformModelVarName.VariableName
                            },
                            new Var <TransformModel> {
                                VarName = outputVarName
                            }
                        }
                            )
                    };

                    var modelCombineOutput = exp.Add(modelCombine);
                    outputVarName = modelCombineOutput.OutputModel.VarName;
                }

                var datasetTransformerNode = new Legacy.Models.DatasetTransformer
                {
                    Data           = { VarName = testingVar.ToJson() },
                    TransformModel = { VarName = outputVarName }
                };

                datasetTransformNodeOutput = exp.Add(datasetTransformerNode);
            }
            else
            {
                //combine the predictor model with any potential transfrom model passed from the outer graph
                if (transformModelVarName != null && transformModelVarName.VariableName != null)
                {
                    var modelCombine = new Legacy.Transforms.TwoHeterogeneousModelCombiner
                    {
                        TransformModel = { VarName = transformModelVarName.VariableName },
                        PredictorModel = { VarName = outputVarName }
                    };

                    var modelCombineOutput = exp.Add(modelCombine);
                    outputVarName = modelCombineOutput.PredictorModel.VarName;
                }

                // Add the scoring node for testing.
                var scoreNode = new Legacy.Transforms.DatasetScorer
                {
                    Data           = { VarName = testingVar.ToJson() },
                    PredictorModel = { VarName = outputVarName }
                };

                scoreNodeOutput = exp.Add(scoreNode);
            }

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            // Do not double-add previous nodes.
            exp.Reset();

            // REVIEW: add similar support for NameColumn and FeatureColumn.
            var settings = new MacroUtils.EvaluatorSettings
            {
                LabelColumn  = input.LabelColumn,
                WeightColumn = input.WeightColumn.IsExplicit ? input.WeightColumn.Value : null,
                GroupColumn  = input.GroupColumn.IsExplicit ? input.GroupColumn.Value : null,
                NameColumn   = input.NameColumn.IsExplicit ? input.NameColumn.Value : null
            };

            string outVariableName;

            if (input.IncludeTrainingMetrics)
            {
                Legacy.Transforms.DatasetScorer.Output  scoreNodeTrainingOutput            = null;
                Legacy.Models.DatasetTransformer.Output datasetTransformNodeTrainingOutput = null;
                if (input.Outputs.PredictorModel == null)
                {
                    var datasetTransformerNode = new Legacy.Models.DatasetTransformer
                    {
                        Data           = { VarName = testingVar.ToJson() },
                        TransformModel = { VarName = outputVarName }
                    };

                    datasetTransformNodeTrainingOutput = exp.Add(datasetTransformerNode);
                }
                else
                {
                    // Add the scoring node for training.
                    var scoreNodeTraining = new Legacy.Transforms.DatasetScorer
                    {
                        Data           = { VarName = trainingVar.ToJson() },
                        PredictorModel = { VarName = outputVarName }
                    };
                    scoreNodeTrainingOutput = exp.Add(scoreNodeTraining);
                }

                subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

                // Do not double-add previous nodes.
                exp.Reset();

                // Add the evaluator node for training.
                var evalInputOutputTraining = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings);
                var evalNodeTraining        = evalInputOutputTraining.Item1;
                var evalOutputTraining      = evalInputOutputTraining.Item2;
                evalNodeTraining.Data.VarName = input.Outputs.PredictorModel == null ? datasetTransformNodeTrainingOutput.OutputData.VarName :
                                                scoreNodeTrainingOutput.ScoredData.VarName;

                if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out outVariableName))
                {
                    evalOutputTraining.Warnings.VarName = outVariableName;
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingOverallMetrics), out outVariableName))
                {
                    evalOutputTraining.OverallMetrics.VarName = outVariableName;
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingPerInstanceMetrics), out outVariableName))
                {
                    evalOutputTraining.PerInstanceMetrics.VarName = outVariableName;
                }
                if (node.OutputMap.TryGetValue(nameof(Output.TrainingConfusionMatrix), out outVariableName) &&
                    evalOutputTraining is CommonOutputs.IClassificationEvaluatorOutput eoTraining)
                {
                    eoTraining.ConfusionMatrix.VarName = outVariableName;
                }

                exp.Add(evalNodeTraining, evalOutputTraining);
                subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));
            }

            // Do not double-add previous nodes.
            exp.Reset();

            // Add the evaluator node for testing.
            var evalInputOutput = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings);
            var evalNode        = evalInputOutput.Item1;
            var evalOutput      = evalInputOutput.Item2;

            evalNode.Data.VarName = input.Outputs.PredictorModel == null ? datasetTransformNodeOutput.OutputData.VarName : scoreNodeOutput.ScoredData.VarName;

            if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out outVariableName))
            {
                evalOutput.Warnings.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue(nameof(Output.OverallMetrics), out outVariableName))
            {
                evalOutput.OverallMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue(nameof(Output.PerInstanceMetrics), out outVariableName))
            {
                evalOutput.PerInstanceMetrics.VarName = outVariableName;
            }
            if (node.OutputMap.TryGetValue(nameof(Output.ConfusionMatrix), out outVariableName) &&
                evalOutput is CommonOutputs.IClassificationEvaluatorOutput eo)
            {
                eo.ConfusionMatrix.VarName = outVariableName;
            }

            exp.Add(evalNode, evalOutput);
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes()));

            // Marks as an atomic unit that can be run in
            // a distributed fashion.
            foreach (var subGraphNode in subGraphNodes)
            {
                subGraphNode.StageId = input.PipelineId;
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }