Exemplo n.º 1
0
        static async Task Main(string[] args)
        {
            // Create a learning pipeline
            var pipeline = new LearningPipeline();

            // Load training data and add it to the pipeline
            string dataPath = @".\data\titanic.training.csv";
            var    data     = new TextLoader(dataPath).CreateFrom <TitanicData>(useHeader: true, separator: ',');

            pipeline.Add(data);

            // Transform any text feature to numeric values
            pipeline.Add(new CategoricalOneHotVectorizer(
                             "Sex",
                             "Ticket",
                             "Fare",
                             "Cabin",
                             "Embarked"));

            // Put all features into a vector
            pipeline.Add(new ColumnConcatenator(
                             "Features",
                             "Pclass",
                             "Sex",
                             "Age",
                             "SibSp",
                             "Parch",
                             "Ticket",
                             "Fare",
                             "Cabin",
                             "Embarked"));

            // Add a learning algorithm to the pipeline.
            // This is a classification scenario (Did this passenger survive?)
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            // Train your model based on the data set
            Console.WriteLine($"Training Titanic.ML model...");
            var model = pipeline.Train <TitanicData, TitanicPrediction>();

            // Save the model to a file
            var modelPath = @".\data\titanic.model";
            await model.WriteAsync(modelPath);

            // Use your model to make a prediction
            var prediction = model.Predict(new TitanicData()
            {
                Pclass   = 3f,
                Name     = "Braund, Mr. Owen Harris",
                Sex      = "male",
                Age      = 31,
                SibSp    = 0,
                Parch    = 0,
                Ticket   = "335097",
                Fare     = "7.75",
                Cabin    = "",
                Embarked = "Q"
            });

            Console.WriteLine($"Did this passenger survive? {(prediction.Survived ? "Yes" : "No")}");

            // Evaluate the model using the test data
            Console.WriteLine($"Evaluating Titanic.ML model...");
            dataPath = @".\data\titanic.csv";
            data     = new TextLoader(dataPath).CreateFrom <TitanicData>(useHeader: true, separator: ',');
            var evaluator = new Microsoft.ML.Models.BinaryClassificationEvaluator();
            var metrics   = evaluator.Evaluate(model, data);

            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.Auc:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
        }
Exemplo n.º 2
0
        public void TestSimpleTrainExperiment()
        {
            var dataPath = GetDataPath(@"adult.tiny.with-schema.txt");

            using (var env = new TlcEnvironment())
            {
                var experiment = env.CreateExperiment();

                var importInput  = new ML.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var catInput = new ML.Transforms.CategoricalOneHotVectorizer
                {
                    Data = importOutput.Data
                };
                catInput.AddColumn("Categories");
                var catOutput = experiment.Add(catInput);

                var concatInput = new ML.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = experiment.Add(concatInput);

                var sdcaInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    LossFunction = new HingeLossSDCAClassificationLossFunction()
                    {
                        Margin = 1.1f
                    },
                    NumThreads = 1,
                    Shuffle    = false
                };
                var sdcaOutput = experiment.Add(sdcaInput);

                var scoreInput = new ML.Transforms.DatasetScorer
                {
                    Data           = concatOutput.OutputData,
                    PredictorModel = sdcaOutput.PredictorModel
                };
                var scoreOutput = experiment.Add(scoreInput);

                var evalInput = new ML.Models.BinaryClassificationEvaluator
                {
                    Data = scoreOutput.ScoredData
                };
                var evalOutput = experiment.Add(evalInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(evalOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.93, auc, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }