protected override IEnumerable <SuggestedRecipe> ApplyCore(Type predictorType, TransformInference.SuggestedTransform[] transforms) { SuggestedRecipe.SuggestedLearner learner = new SuggestedRecipe.SuggestedLearner(); if (predictorType == typeof(SignatureMultiClassClassifierTrainer)) { learner.LoadableClassInfo = ComponentCatalog.GetLoadableClassInfo <SignatureTrainer>(Learners.SdcaMultiClassTrainer.LoadNameValue); } else { learner.LoadableClassInfo = ComponentCatalog.GetLoadableClassInfo <SignatureTrainer>(Learners.LinearClassificationTrainer.LoadNameValue); var epInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier(); learner.PipelineNode = new TrainerPipelineNode(epInput); } learner.Settings = ""; yield return(new SuggestedRecipe(ToString(), transforms, new[] { learner })); }
public void TestOvaMacro() { var dataPath = GetDataPath(@"iris.txt"); var env = new MLContext(42); // Specify subgraph for OVA var subGraph = env.CreateExperiment(); var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { NumThreads = 1 }; var learnerOutput = subGraph.Add(learnerInput); // Create pipeline with OVA and multiclass scoring. var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); importInput.Arguments.Column = new TextLoaderColumn[] { new TextLoaderColumn { Name = "Label", Source = new[] { new TextLoaderRange(0) } }, new TextLoaderColumn { Name = "Features", Source = new[] { new TextLoaderRange(1, 4) } } }; var importOutput = experiment.Add(importInput); var oneVersusAll = new Legacy.Models.OneVersusAll { TrainingData = importOutput.Data, Nodes = subGraph, UseProbabilities = true, }; var ovaOutput = experiment.Add(oneVersusAll); var scoreInput = new Legacy.Transforms.DatasetScorer { Data = importOutput.Data, PredictorModel = ovaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); var evalInput = new Legacy.Models.ClassificationEvaluator { Data = scoreOutput.ScoredData }; var evalOutput = experiment.Add(evalInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex(MultiClassClassifierEvaluator.AccuracyMacro, out int accCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == accCol)) { var getter = cursor.GetGetter <double>(accCol); b = cursor.MoveNext(); Assert.True(b); double acc = 0; getter(ref acc); Assert.Equal(0.96, acc, 2); b = cursor.MoveNext(); Assert.False(b); } }
public void TestCrossValidationMacroWithStratification() { var dataPath = GetDataPath(@"breast-cancer.txt"); var env = new MLContext(42); var subGraph = env.CreateExperiment(); var nop = new Legacy.Transforms.NoOperation(); var nopOutput = subGraph.Add(nop); var learnerInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = nopOutput.OutputData, NumThreads = 1 }; var learnerOutput = subGraph.Add(learnerInput); var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <TransformModel>(nopOutput.Model), PredictorModel = learnerOutput.PredictorModel }; var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); importInput.Arguments.Column = new Legacy.Data.TextLoaderColumn[] { new Legacy.Data.TextLoaderColumn { Name = "Label", Source = new[] { new Legacy.Data.TextLoaderRange(0) } }, new Legacy.Data.TextLoaderColumn { Name = "Strat", Source = new[] { new Legacy.Data.TextLoaderRange(1) } }, new Legacy.Data.TextLoaderColumn { Name = "Features", Source = new[] { new Legacy.Data.TextLoaderRange(2, 9) } } }; var importOutput = experiment.Add(importInput); var crossValidate = new Legacy.Models.CrossValidator { Data = importOutput.Data, Nodes = subGraph, TransformModel = null, StratificationColumn = "Strat" }; crossValidate.Inputs.Data = nop.Data; crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; var crossValidateOutput = experiment.Add(crossValidate); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(crossValidateOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int metricCol); Assert.True(b); b = schema.TryGetColumnIndex("Fold Index", out int foldCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == metricCol || col == foldCol)) { var getter = cursor.GetGetter <double>(metricCol); var foldGetter = cursor.GetGetter <ReadOnlyMemory <char> >(foldCol); ReadOnlyMemory <char> fold = default; // Get the verage. b = cursor.MoveNext(); Assert.True(b); double avg = 0; getter(ref avg); foldGetter(ref fold); Assert.True(ReadOnlyMemoryUtils.EqualsStr("Average", fold)); // Get the standard deviation. b = cursor.MoveNext(); Assert.True(b); double stdev = 0; getter(ref stdev); foldGetter(ref fold); Assert.True(ReadOnlyMemoryUtils.EqualsStr("Standard Deviation", fold)); Assert.Equal(0.00488, stdev, 5); double sum = 0; double val = 0; for (int f = 0; f < 2; f++) { b = cursor.MoveNext(); Assert.True(b); getter(ref val); foldGetter(ref fold); sum += val; Assert.True(ReadOnlyMemoryUtils.EqualsStr("Fold " + f, fold)); } Assert.Equal(avg, sum / 2); b = cursor.MoveNext(); Assert.False(b); } }
public void TestSimpleTrainExperiment() { var dataPath = GetDataPath("adult.tiny.with-schema.txt"); var env = new MLContext(); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer { Data = importOutput.Data }; catInput.AddColumn("Categories"); var catOutput = experiment.Add(catInput); var concatInput = new Legacy.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = experiment.Add(concatInput); var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = concatOutput.OutputData, LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, NumThreads = 1, Shuffle = false }; var sdcaOutput = experiment.Add(sdcaInput); var scoreInput = new Legacy.Transforms.DatasetScorer { Data = concatOutput.OutputData, PredictorModel = sdcaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); var evalInput = new Legacy.Models.BinaryClassificationEvaluator { Data = scoreOutput.ScoredData }; var evalOutput = experiment.Add(evalInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.93, auc, 2); b = cursor.MoveNext(); Assert.False(b); } }
public void TestTrainTestMacro() { var dataPath = GetDataPath("adult.tiny.with-schema.txt"); using (var env = new ConsoleEnvironment()) { var subGraph = env.CreateExperiment(); var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer(); catInput.AddColumn("Categories"); var catOutput = subGraph.Add(catInput); var concatInput = new Legacy.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = subGraph.Add(concatInput); var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = concatOutput.OutputData, LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, NumThreads = 1, Shuffle = false }; var sdcaOutput = subGraph.Add(sdcaInput); var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model), PredictorModel = sdcaOutput.PredictorModel }; var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var trainTestInput = new Legacy.Models.TrainTestBinaryEvaluator { TrainingData = importOutput.Data, TestingData = importOutput.Data, Nodes = subGraph }; trainTestInput.Inputs.Data = catInput.Data; trainTestInput.Outputs.Model = modelCombineOutput.PredictorModel; var trainTestOutput = experiment.Add(trainTestInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(trainTestOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.93, auc, 2); b = cursor.MoveNext(); Assert.False(b); } } }