public void TestSimpleTrainExperiment()
        {
            var dataPath   = GetDataPath("adult.tiny.with-schema.txt");
            var env        = new MLContext();
            var experiment = env.CreateExperiment();

            var importInput  = new Legacy.Data.TextLoader(dataPath);
            var importOutput = experiment.Add(importInput);

            var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer
            {
                Data = importOutput.Data
            };

            catInput.AddColumn("Categories");
            var catOutput = experiment.Add(catInput);

            var concatInput = new Legacy.Transforms.ColumnConcatenator
            {
                Data = catOutput.OutputData
            };

            concatInput.AddColumn("Features", "Categories", "NumericFeatures");
            var concatOutput = experiment.Add(concatInput);

            var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
            {
                TrainingData = concatOutput.OutputData,
                LossFunction = new HingeLossSDCAClassificationLossFunction()
                {
                    Margin = 1.1f
                },
                NumThreads = 1,
                Shuffle    = false
            };
            var sdcaOutput = experiment.Add(sdcaInput);

            var scoreInput = new Legacy.Transforms.DatasetScorer
            {
                Data           = concatOutput.OutputData,
                PredictorModel = sdcaOutput.PredictorModel
            };
            var scoreOutput = experiment.Add(scoreInput);

            var evalInput = new Legacy.Models.BinaryClassificationEvaluator
            {
                Data = scoreOutput.ScoredData
            };
            var evalOutput = experiment.Add(evalInput);

            experiment.Compile();
            experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
            experiment.Run();
            var data = experiment.GetOutput(evalOutput.OverallMetrics);

            var schema = data.Schema;
            var b      = schema.TryGetColumnIndex("AUC", out int aucCol);

            Assert.True(b);
            using (var cursor = data.GetRowCursor(col => col == aucCol))
            {
                var getter = cursor.GetGetter <double>(aucCol);
                b = cursor.MoveNext();
                Assert.True(b);
                double auc = 0;
                getter(ref auc);
                Assert.Equal(0.93, auc, 2);
                b = cursor.MoveNext();
                Assert.False(b);
            }
        }
Beispiel #2
0
        public void TestTrainTestMacro()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer();
                catInput.AddColumn("Categories");
                var catOutput = subGraph.Add(catInput);

                var concatInput = new Legacy.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = subGraph.Add(concatInput);

                var sdcaInput = new Legacy.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    LossFunction = new HingeLossSDCAClassificationLossFunction()
                    {
                        Margin = 1.1f
                    },
                    NumThreads = 1,
                    Shuffle    = false
                };
                var sdcaOutput = subGraph.Add(sdcaInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model),
                    PredictorModel  = sdcaOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment = env.CreateExperiment();

                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var trainTestInput = new Legacy.Models.TrainTestBinaryEvaluator
                {
                    TrainingData = importOutput.Data,
                    TestingData  = importOutput.Data,
                    Nodes        = subGraph
                };
                trainTestInput.Inputs.Data   = catInput.Data;
                trainTestInput.Outputs.Model = modelCombineOutput.PredictorModel;
                var trainTestOutput = experiment.Add(trainTestInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(trainTestOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.93, auc, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
Beispiel #3
0
        public void TestCrossValidationBinaryMacro()
        {
            var dataPath = GetDataPath("adult.tiny.with-schema.txt");

            using (var env = new ConsoleEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var catInput = new Legacy.Transforms.CategoricalOneHotVectorizer();
                catInput.AddColumn("Categories");
                var catOutput = subGraph.Add(catInput);

                var concatInput = new Legacy.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = subGraph.Add(concatInput);

                var lrInput = new Legacy.Trainers.LogisticRegressionBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    NumThreads   = 1
                };
                var lrOutput = subGraph.Add(lrInput);

                var modelCombine = new Legacy.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model),
                    PredictorModel  = lrOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment = env.CreateExperiment();

                var importInput  = new Legacy.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var crossValidateBinary = new Legacy.Models.BinaryCrossValidator
                {
                    Data  = importOutput.Data,
                    Nodes = subGraph
                };
                crossValidateBinary.Inputs.Data   = catInput.Data;
                crossValidateBinary.Outputs.Model = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidateBinary);

                experiment.Compile();
                importInput.SetInput(env, experiment);
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.87, auc, 1);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
Beispiel #4
0
        private static ITransformModel CreateKcHousePricePredictorModel(string dataPath)
        {
            Experiment experiment = s_environment.CreateExperiment();
            var        importData = new Legacy.Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { ',' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Id",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Legacy.Data.DataKind.Text
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Date",
                            Source = new [] { new TextLoaderRange(1) },
                            Type   = Legacy.Data.DataKind.Text
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(2) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Bedrooms",
                            Source = new [] { new TextLoaderRange(3) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Bathrooms",
                            Source = new [] { new TextLoaderRange(4) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLiving",
                            Source = new [] { new TextLoaderRange(5) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLot",
                            Source = new [] { new TextLoaderRange(6) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Floors",
                            Source = new [] { new TextLoaderRange(7) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Waterfront",
                            Source = new [] { new TextLoaderRange(8) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "View",
                            Source = new [] { new TextLoaderRange(9) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Condition",
                            Source = new [] { new TextLoaderRange(10) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Grade",
                            Source = new [] { new TextLoaderRange(11) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftAbove",
                            Source = new [] { new TextLoaderRange(12) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftBasement",
                            Source = new [] { new TextLoaderRange(13) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "YearBuilt",
                            Source = new [] { new TextLoaderRange(14) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "YearRenovated",
                            Source = new [] { new TextLoaderRange(15) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Zipcode",
                            Source = new [] { new TextLoaderRange(16) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Lat",
                            Source = new [] { new TextLoaderRange(17) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "Long",
                            Source = new [] { new TextLoaderRange(18) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLiving15",
                            Source = new [] { new TextLoaderRange(19) },
                            Type   = Legacy.Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SqftLot15",
                            Source = new [] { new TextLoaderRange(20) },
                            Type   = Legacy.Data.DataKind.Num
                        },
                    }
                }

                //new Data.CustomTextLoader();
                // importData.CustomSchema = dataSchema;
                //
            };

            Legacy.Data.TextLoader.Output imported = experiment.Add(importData);
            var numericalConcatenate = new Legacy.Transforms.ColumnConcatenator();

            numericalConcatenate.Data = imported.Data;
            numericalConcatenate.AddColumn("NumericalFeatures", "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15");
            Legacy.Transforms.ColumnConcatenator.Output numericalConcatenated = experiment.Add(numericalConcatenate);

            var categoryConcatenate = new Legacy.Transforms.ColumnConcatenator();

            categoryConcatenate.Data = numericalConcatenated.OutputData;
            categoryConcatenate.AddColumn("CategoryFeatures", "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode");
            Legacy.Transforms.ColumnConcatenator.Output categoryConcatenated = experiment.Add(categoryConcatenate);

            var categorize = new Legacy.Transforms.CategoricalOneHotVectorizer();

            categorize.AddColumn("CategoryFeatures");
            categorize.Data = categoryConcatenated.OutputData;
            Legacy.Transforms.CategoricalOneHotVectorizer.Output categorized = experiment.Add(categorize);

            var featuresConcatenate = new Legacy.Transforms.ColumnConcatenator();

            featuresConcatenate.Data = categorized.OutputData;
            featuresConcatenate.AddColumn("Features", "NumericalFeatures", "CategoryFeatures");
            Legacy.Transforms.ColumnConcatenator.Output featuresConcatenated = experiment.Add(featuresConcatenate);

            var learner = new Legacy.Trainers.StochasticDualCoordinateAscentRegressor();

            learner.TrainingData = featuresConcatenated.OutputData;
            learner.NumThreads   = 1;
            Legacy.Trainers.StochasticDualCoordinateAscentRegressor.Output learnerOutput = experiment.Add(learner);

            var combineModels = new Legacy.Transforms.ManyHeterogeneousModelCombiner();

            combineModels.TransformModels = new ArrayVar <ITransformModel>(numericalConcatenated.Model, categoryConcatenated.Model, categorized.Model, featuresConcatenated.Model);
            combineModels.PredictorModel  = learnerOutput.PredictorModel;
            Legacy.Transforms.ManyHeterogeneousModelCombiner.Output combinedModels = experiment.Add(combineModels);

            var scorer = new Legacy.Transforms.Scorer
            {
                PredictorModel = combinedModels.PredictorModel
            };

            var scorerOutput = experiment.Add(scorer);

            experiment.Compile();
            experiment.SetInput(importData.InputFile, new SimpleFileHandle(s_environment, dataPath, false, false));
            experiment.Run();

            return(experiment.GetOutput(scorerOutput.ScoringTransform));
        }