public void TestSimpleTrainExperiment() { var dataPath = GetDataPath("adult.tiny.with-schema.txt"); var env = new MLContext(); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer { Data = importOutput.Data }; catInput.AddColumn("Categories"); var catOutput = experiment.Add(catInput); var concatInput = new Legacy.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = experiment.Add(concatInput); var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = concatOutput.OutputData, LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, NumThreads = 1, Shuffle = false }; var sdcaOutput = experiment.Add(sdcaInput); var scoreInput = new Legacy.Transforms.DatasetScorer { Data = concatOutput.OutputData, PredictorModel = sdcaOutput.PredictorModel }; var scoreOutput = experiment.Add(scoreInput); var evalInput = new Legacy.Models.BinaryClassificationEvaluator { Data = scoreOutput.ScoredData }; var evalOutput = experiment.Add(evalInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(evalOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.93, auc, 2); b = cursor.MoveNext(); Assert.False(b); } }
public void TestTrainTestMacro() { var dataPath = GetDataPath("adult.tiny.with-schema.txt"); using (var env = new ConsoleEnvironment()) { var subGraph = env.CreateExperiment(); var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer(); catInput.AddColumn("Categories"); var catOutput = subGraph.Add(catInput); var concatInput = new Legacy.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = subGraph.Add(concatInput); var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier { TrainingData = concatOutput.OutputData, LossFunction = new HingeLossSDCAClassificationLossFunction() { Margin = 1.1f }, NumThreads = 1, Shuffle = false }; var sdcaOutput = subGraph.Add(sdcaInput); var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model), PredictorModel = sdcaOutput.PredictorModel }; var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var trainTestInput = new Legacy.Models.TrainTestBinaryEvaluator { TrainingData = importOutput.Data, TestingData = importOutput.Data, Nodes = subGraph }; trainTestInput.Inputs.Data = catInput.Data; trainTestInput.Outputs.Model = modelCombineOutput.PredictorModel; var trainTestOutput = experiment.Add(trainTestInput); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); experiment.Run(); var data = experiment.GetOutput(trainTestOutput.OverallMetrics); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.93, auc, 2); b = cursor.MoveNext(); Assert.False(b); } } }
public void TestCrossValidationBinaryMacro() { var dataPath = GetDataPath("adult.tiny.with-schema.txt"); using (var env = new ConsoleEnvironment()) { var subGraph = env.CreateExperiment(); var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer(); catInput.AddColumn("Categories"); var catOutput = subGraph.Add(catInput); var concatInput = new Legacy.Transforms.ColumnConcatenator { Data = catOutput.OutputData }; concatInput.AddColumn("Features", "Categories", "NumericFeatures"); var concatOutput = subGraph.Add(concatInput); var lrInput = new Legacy.Trainers.LogisticRegressionBinaryClassifier { TrainingData = concatOutput.OutputData, NumThreads = 1 }; var lrOutput = subGraph.Add(lrInput); var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner { TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model), PredictorModel = lrOutput.PredictorModel }; var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); var importInput = new Legacy.Data.TextLoader(dataPath); var importOutput = experiment.Add(importInput); var crossValidateBinary = new Legacy.Models.BinaryCrossValidator { Data = importOutput.Data, Nodes = subGraph }; crossValidateBinary.Inputs.Data = catInput.Data; crossValidateBinary.Outputs.Model = modelCombineOutput.PredictorModel; var crossValidateOutput = experiment.Add(crossValidateBinary); experiment.Compile(); importInput.SetInput(env, experiment); experiment.Run(); var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]); var schema = data.Schema; var b = schema.TryGetColumnIndex("AUC", out int aucCol); Assert.True(b); using (var cursor = data.GetRowCursor(col => col == aucCol)) { var getter = cursor.GetGetter <double>(aucCol); b = cursor.MoveNext(); Assert.True(b); double auc = 0; getter(ref auc); Assert.Equal(0.87, auc, 1); b = cursor.MoveNext(); Assert.False(b); } } }
private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { Experiment experiment = s_environment.CreateExperiment(); var importData = new Legacy.Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments { Separator = new[] { ',' }, HasHeader = true, Column = new[] { new TextLoaderColumn() { Name = "Id", Source = new [] { new TextLoaderRange(0) }, Type = Legacy.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Date", Source = new [] { new TextLoaderRange(1) }, Type = Legacy.Data.DataKind.Text }, new TextLoaderColumn() { Name = "Label", Source = new [] { new TextLoaderRange(2) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bedrooms", Source = new [] { new TextLoaderRange(3) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Bathrooms", Source = new [] { new TextLoaderRange(4) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving", Source = new [] { new TextLoaderRange(5) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot", Source = new [] { new TextLoaderRange(6) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Floors", Source = new [] { new TextLoaderRange(7) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Waterfront", Source = new [] { new TextLoaderRange(8) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "View", Source = new [] { new TextLoaderRange(9) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Condition", Source = new [] { new TextLoaderRange(10) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Grade", Source = new [] { new TextLoaderRange(11) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftAbove", Source = new [] { new TextLoaderRange(12) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftBasement", Source = new [] { new TextLoaderRange(13) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearBuilt", Source = new [] { new TextLoaderRange(14) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "YearRenovated", Source = new [] { new TextLoaderRange(15) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Zipcode", Source = new [] { new TextLoaderRange(16) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Lat", Source = new [] { new TextLoaderRange(17) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "Long", Source = new [] { new TextLoaderRange(18) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLiving15", Source = new [] { new TextLoaderRange(19) }, Type = Legacy.Data.DataKind.Num }, new TextLoaderColumn() { Name = "SqftLot15", Source = new [] { new TextLoaderRange(20) }, Type = Legacy.Data.DataKind.Num }, } } //new Data.CustomTextLoader(); // importData.CustomSchema = dataSchema; // }; Legacy.Data.TextLoader.Output imported = experiment.Add(importData); var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator(); numericalConcatenate.Data = imported.Data; numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15"); Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate); var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator(); categoryConcatenate.Data = numericalConcatenated.OutputData; categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode"); Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate); var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer(); categorize.AddColumn("CategoryFeatures"); categorize.Data = categoryConcatenated.OutputData; Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize); var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator(); featuresConcatenate.Data = categorized.OutputData; featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures"); Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate); var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor(); learner.TrainingData = featuresConcatenated.OutputData; learner.NumThreads = 1; Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner); var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner(); combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model); combineModels.PredictorModel = learnerOutput.PredictorModel; Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels); var scorer = new Legacy.Transforms.Scorer { PredictorModel = combinedModels.PredictorModel }; var scorerOutput = experiment.Add(scorer); experiment.Compile(); experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false)); experiment.Run(); return(experiment.GetOutput(scorerOutput.ScoringTransform)); }