public void TestSimpleTrainExperiment() { var dataPath = GetDataPath(@"adult.tiny.with-schema.txt"); using (var env = new TlcEnvironment()) { var experiment = env.CreateExperiment(); var importInput = new ML.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var catInput = new ML.Transforms.CategoricalOneHotVectorizer { Data = importOutput.Data }; catInput.AddColumn("Categories"); var catOutput = experiment.Add(catInput); var concatInput = new ML.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = experiment.Add(concatInput); var sdcaInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = concatOutput.OutputData, LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, NumThreads = 1, Shuffle = false }; var sdcaOutput = experiment.Add(sdcaInput); var scoreInput = new ML.Transforms.DatasetScorer { Data = concatOutput.OutputData, PredictorModel = sdcaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); var evalInput = new ML.Models.BinaryClassificationEvaluator { Data = scoreOutput.ScoredData }; var evalOutput = experiment.Add(evalInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.93, auc, 2); b = cursor.MoveNext(); Assert.False(b); } } }
public void TestCrossValidationMacroWithStratification() { var dataPath = GetDataPath(@"breast-cancer.txt"); using (var env = new TlcEnvironment()) { var subGraph = env.CreateExperiment(); var nop = new ML.Transforms.NoOperation(); var nopOutput = subGraph.Add(nop); var learnerInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = nopOutput.OutputData, NumThreads = 1 }; var learnerOutput = subGraph.Add(learnerInput); var modelCombine = new ML.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <ITransformModel>(nopOutput.Model), PredictorModel = learnerOutput.PredictorModel }; var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); var importInput = new ML.Data.TextLoader(dataPath); importInput.Arguments.Column = new ML.Data.TextLoaderColumn[] { new ML.Data.TextLoaderColumn { Name = "Label", Source = new[] { new ML.Data.TextLoaderRange(0) } }, new ML.Data.TextLoaderColumn { Name = "Strat", Source = new[] { new ML.Data.TextLoaderRange(1) } }, new ML.Data.TextLoaderColumn { Name = "Features", Source = new[] { new ML.Data.TextLoaderRange(2, 9) } } }; var importOutput = experiment.Add(importInput); var crossValidate = new ML.Models.CrossValidator { Data = importOutput.Data, Nodes = subGraph, TransformModel = null, StratificationColumn = "Strat" }; crossValidate.Inputs.Data = nop.Data; crossValidate.Outputs.Model = modelCombineOutput.PredictorModel; var crossValidateOutput = experiment.Add(crossValidate); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int metricCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == metricCol)) { var getter = cursor.GetGetter <double>(metricCol); b = cursor.MoveNext(); Assert.True(b); double val = 0; getter(ref val); Assert.Equal(0.99, val, 2); b = cursor.MoveNext(); Assert.False(b); } } }