Beispiel #1
0
        static void Main(string[] args)
        {
            string           dataPath = "wikipedia-detox-250-line-data.tsv";
            LearningPipeline pipeline = new LearningPipeline();
            TextLoader       testData = new TextLoader(dataPath).CreateFrom <SentimentData>(separator: '\t');

            pipeline.Add(testData);
            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
            pipeline.Add(new FastTreeBinaryClassifier());
            PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>();

            Console.WriteLine(Environment.NewLine + Environment.NewLine);

            while (true)
            {
                Console.WriteLine("Please enter some statement:" + Environment.NewLine);

                // Positive: He is the best, and the article should say that.
                // Negative: Please refrain from adding nonsense to Wikipedia.

                var input = Console.ReadLine();

                SentimentData data = new SentimentData
                {
                    SentimentText = input
                };

                SentimentPrediction prediction = model.Predict(data);
                string text = "Prediction: " + prediction.Sentiment + Environment.NewLine;
                Console.WriteLine(text);
            }
        }
Beispiel #2
0
        public void TestSimpleExperiment()
        {
            var dataPath = GetDataPath(@"adult.tiny.with-schema.txt");

            using (var env = new TlcEnvironment())
            {
                var experiment = env.CreateExperiment();

                var importInput  = new ML.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var normalizeInput = new ML.Transforms.MinMaxNormalizer
                {
                    Data = importOutput.Data
                };
                normalizeInput.AddColumn("NumericFeatures");
                var normalizeOutput = experiment.Add(normalizeInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(normalizeOutput.OutputData);

                var schema = data.Schema;
                Assert.Equal(5, schema.ColumnCount);
                var expected = new[] { "Label", "Workclass", "Categories", "NumericFeatures", "NumericFeatures" };
                for (int i = 0; i < schema.ColumnCount; i++)
                {
                    Assert.Equal(expected[i], schema.GetColumnName(i));
                }
            }
        }
Beispiel #3
0
        static void Evaluate(PredictionModel <WineData, WinePrediction> model)
        {
            var testData  = new Microsoft.ML.Data.TextLoader(TestDataPath).CreateFrom <WineData>(useHeader: true, separator: ',', trimWhitespace: false);
            var evaluator = new Microsoft.ML.Models.RegressionEvaluator();
            var metrics   = evaluator.Evaluate(model, testData);

            Console.WriteLine("Rms=" + metrics.Rms);
            Console.WriteLine("LossFn=" + metrics.LossFn);
            Console.WriteLine("RSquared = " + metrics.RSquared);
        }
Beispiel #4
0
        private static void Evaluate(PredictionModel <DatosGenerales, Prediccion> model)
        {
            var testData  = new Microsoft.ML.Data.TextLoader(testDataPath).CreateFrom <DatosGenerales>(useHeader: true, separator: ',');
            var evaluator = new Microsoft.ML.Models.RegressionEvaluator();

            RegressionMetrics regressionMetrics = evaluator.Evaluate(model, testData);

            Console.WriteLine($"R Model Score: {regressionMetrics.Rms}");
            Console.WriteLine($"R Squared: {regressionMetrics.RSquared}");
        }
Beispiel #5
0
        public void TestSimpleTrainExperiment()
        {
            var dataPath = GetDataPath(@"adult.tiny.with-schema.txt");

            using (var env = new TlcEnvironment())
            {
                var experiment = env.CreateExperiment();

                var importInput  = new ML.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var catInput = new ML.Transforms.CategoricalOneHotVectorizer
                {
                    Data = importOutput.Data
                };
                catInput.AddColumn("Categories");
                var catOutput = experiment.Add(catInput);

                var concatInput = new ML.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = experiment.Add(concatInput);

                var sdcaInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    LossFunction = new HingeLossSDCAClassificationLossFunction()
                    {
                        Margin = 1.1f
                    },
                    NumThreads = 1,
                    Shuffle    = false
                };
                var sdcaOutput = experiment.Add(sdcaInput);

                var scoreInput = new ML.Transforms.DatasetScorer
                {
                    Data           = concatOutput.OutputData,
                    PredictorModel = sdcaOutput.PredictorModel
                };
                var scoreOutput = experiment.Add(scoreInput);

                var evalInput = new ML.Models.BinaryClassificationEvaluator
                {
                    Data = scoreOutput.ScoredData
                };
                var evalOutput = experiment.Add(evalInput);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(evalOutput.OverallMetrics);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.93, auc, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
Beispiel #6
0
        public void TestCrossValidationMacro()
        {
            var dataPath = GetDataPath(@"housing.txt");

            using (var env = new TlcEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var nop       = new ML.Transforms.NoOperation();
                var nopOutput = subGraph.Add(nop);

                var learnerInput = new ML.Trainers.StochasticDualCoordinateAscentRegressor
                {
                    TrainingData = nopOutput.OutputData,
                    NumThreads   = 1
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var modelCombine = new ML.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(nopOutput.Model),
                    PredictorModel  = learnerOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment   = env.CreateExperiment();
                var importInput  = new ML.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var crossValidate = new ML.Models.CrossValidator
                {
                    Data           = importOutput.Data,
                    Nodes          = subGraph,
                    Kind           = ML.Models.MacroUtilsTrainerKinds.SignatureRegressorTrainer,
                    TransformModel = null
                };
                crossValidate.Inputs.Data   = nop.Data;
                crossValidate.Outputs.Model = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("L1(avg)", out int metricCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == metricCol))
                {
                    var getter = cursor.GetGetter <double>(metricCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double val = 0;
                    getter(ref val);
                    Assert.Equal(3.32, val, 1);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
Beispiel #7
0
        public void TestCrossValidationBinaryMacro()
        {
            var dataPath = GetDataPath(@"adult.tiny.with-schema.txt");

            using (var env = new TlcEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var catInput = new ML.Transforms.CategoricalOneHotVectorizer();
                catInput.AddColumn("Categories");
                var catOutput = subGraph.Add(catInput);

                var concatInput = new ML.Transforms.ColumnConcatenator
                {
                    Data = catOutput.OutputData
                };
                concatInput.AddColumn("Features", "Categories", "NumericFeatures");
                var concatOutput = subGraph.Add(concatInput);

                var lrInput = new ML.Trainers.LogisticRegressionBinaryClassifier
                {
                    TrainingData = concatOutput.OutputData,
                    NumThreads   = 1
                };
                var lrOutput = subGraph.Add(lrInput);

                var modelCombine = new ML.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(catOutput.Model, concatOutput.Model),
                    PredictorModel  = lrOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment = env.CreateExperiment();

                var importInput  = new ML.Data.TextLoader(dataPath);
                var importOutput = experiment.Add(importInput);

                var crossValidateBinary = new ML.Models.BinaryCrossValidator
                {
                    Data  = importOutput.Data,
                    Nodes = subGraph
                };
                crossValidateBinary.Inputs.Data   = catInput.Data;
                crossValidateBinary.Outputs.Model = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidateBinary);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int aucCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == aucCol))
                {
                    var getter = cursor.GetGetter <double>(aucCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double auc = 0;
                    getter(ref auc);
                    Assert.Equal(0.87, auc, 1);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
        public void TestCrossValidationMacroWithStratification()
        {
            var dataPath = GetDataPath(@"breast-cancer.txt");

            using (var env = new TlcEnvironment())
            {
                var subGraph = env.CreateExperiment();

                var nop       = new ML.Transforms.NoOperation();
                var nopOutput = subGraph.Add(nop);

                var learnerInput = new ML.Trainers.StochasticDualCoordinateAscentBinaryClassifier
                {
                    TrainingData = nopOutput.OutputData,
                    NumThreads   = 1
                };
                var learnerOutput = subGraph.Add(learnerInput);

                var modelCombine = new ML.Transforms.ManyHeterogeneousModelCombiner
                {
                    TransformModels = new ArrayVar <ITransformModel>(nopOutput.Model),
                    PredictorModel  = learnerOutput.PredictorModel
                };
                var modelCombineOutput = subGraph.Add(modelCombine);

                var experiment  = env.CreateExperiment();
                var importInput = new ML.Data.TextLoader(dataPath);
                importInput.Arguments.Column = new ML.Data.TextLoaderColumn[]
                {
                    new ML.Data.TextLoaderColumn {
                        Name = "Label", Source = new[] { new ML.Data.TextLoaderRange(0) }
                    },
                    new ML.Data.TextLoaderColumn {
                        Name = "Strat", Source = new[] { new ML.Data.TextLoaderRange(1) }
                    },
                    new ML.Data.TextLoaderColumn {
                        Name = "Features", Source = new[] { new ML.Data.TextLoaderRange(2, 9) }
                    }
                };
                var importOutput = experiment.Add(importInput);

                var crossValidate = new ML.Models.CrossValidator
                {
                    Data                 = importOutput.Data,
                    Nodes                = subGraph,
                    TransformModel       = null,
                    StratificationColumn = "Strat"
                };
                crossValidate.Inputs.Data   = nop.Data;
                crossValidate.Outputs.Model = modelCombineOutput.PredictorModel;
                var crossValidateOutput = experiment.Add(crossValidate);

                experiment.Compile();
                experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false));
                experiment.Run();
                var data = experiment.GetOutput(crossValidateOutput.OverallMetrics[0]);

                var schema = data.Schema;
                var b      = schema.TryGetColumnIndex("AUC", out int metricCol);
                Assert.True(b);
                using (var cursor = data.GetRowCursor(col => col == metricCol))
                {
                    var getter = cursor.GetGetter <double>(metricCol);
                    b = cursor.MoveNext();
                    Assert.True(b);
                    double val = 0;
                    getter(ref val);
                    Assert.Equal(0.99, val, 2);
                    b = cursor.MoveNext();
                    Assert.False(b);
                }
            }
        }
Beispiel #9
0
        static void Predict(PredictionModel <WineData, WinePrediction> model)
        {
            using (var environment = new TlcEnvironment())
            {
                var textLoader = new Microsoft.ML.Data.TextLoader(TestDataPath).CreateFrom <WineData>(useHeader: true, separator: ',', trimWhitespace: false);
                var experiment = environment.CreateExperiment();
                var output     = textLoader.ApplyStep(null, experiment) as ILearningPipelineDataStep;

                experiment.Compile();
                textLoader.SetInput(environment, experiment);
                experiment.Run();
                var data      = experiment.GetOutput(output.Data);
                var wineDatas = new List <WineData>();
                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var getters = new ValueGetter <float>[] {
                        cursor.GetGetter <float>(0),
                        cursor.GetGetter <float>(1),
                        cursor.GetGetter <float>(2),
                        cursor.GetGetter <float>(3),
                        cursor.GetGetter <float>(4),
                        cursor.GetGetter <float>(5),
                        cursor.GetGetter <float>(6),
                        cursor.GetGetter <float>(7),
                        cursor.GetGetter <float>(8),
                        cursor.GetGetter <float>(9),
                        cursor.GetGetter <float>(10),
                        cursor.GetGetter <float>(11),
                        cursor.GetGetter <float>(12)
                    };

                    while (cursor.MoveNext())
                    {
                        float value0  = 0;
                        float value1  = 0;
                        float value2  = 0;
                        float value3  = 0;
                        float value4  = 0;
                        float value5  = 0;
                        float value6  = 0;
                        float value7  = 0;
                        float value8  = 0;
                        float value9  = 0;
                        float value10 = 0;
                        float value11 = 0;
                        float value12 = 0;
                        getters[0](ref value0);
                        getters[1](ref value1);
                        getters[2](ref value2);
                        getters[3](ref value3);
                        getters[4](ref value4);
                        getters[5](ref value5);
                        getters[6](ref value6);
                        getters[7](ref value7);
                        getters[8](ref value8);
                        getters[9](ref value9);
                        getters[10](ref value10);
                        getters[11](ref value11);
                        getters[12](ref value12);

                        var wdata = new WineData()
                        {
                            FixedAcidity       = value0,
                            VolatileAcidity    = value1,
                            CitricACID         = value2,
                            ResidualSugar      = value3,
                            Chlorides          = value4,
                            FreeSulfurDioxide  = value5,
                            TotalSulfurDioxide = value6,
                            Density            = value7,
                            PH        = value8,
                            Sulphates = value9,
                            Alcohol   = value10,
                            Quality   = value11,
                            Id        = value12,
                        };
                        wineDatas.Add(wdata);
                    }
                }
                var predictions = model.Predict(wineDatas);

                var wineDataAndPredictions = wineDatas.Zip(predictions, (wineData, prediction) => (wineData, prediction));
                Console.WriteLine($"Wine Id: {wineDataAndPredictions.Last().wineData.Id}, Quality: {wineDataAndPredictions.Last().wineData.Quality} | Prediction: {  wineDataAndPredictions.Last().prediction.PredictionQuality}");
                Console.WriteLine();
            }
        }
Beispiel #10
0
        static IEnumerable <PokerHandData> LoadData(string path)
        {
            using (var environment = new TlcEnvironment())
            {
                var pokerHandData = new List <PokerHandData>();
                var textLoader    = new Microsoft.ML.Data.TextLoader(path).CreateFrom <PokerHandData>(useHeader: false, separator: ',', trimWhitespace: false);
                var experiment    = environment.CreateExperiment();
                var output        = textLoader.ApplyStep(null, experiment) as ILearningPipelineDataStep;

                experiment.Compile();
                textLoader.SetInput(environment, experiment);
                experiment.Run();

                var data = experiment.GetOutput(output.Data);

                using (var cursor = data.GetRowCursor((a => true)))
                {
                    var getters = new ValueGetter <float>[] {
                        cursor.GetGetter <float>(0),
                        cursor.GetGetter <float>(1),
                        cursor.GetGetter <float>(2),
                        cursor.GetGetter <float>(3),
                        cursor.GetGetter <float>(4),
                        cursor.GetGetter <float>(5),
                        cursor.GetGetter <float>(6),
                        cursor.GetGetter <float>(7),
                        cursor.GetGetter <float>(8),
                        cursor.GetGetter <float>(9),
                        cursor.GetGetter <float>(10)
                    };

                    while (cursor.MoveNext())
                    {
                        float value0  = 0;
                        float value1  = 0;
                        float value2  = 0;
                        float value3  = 0;
                        float value4  = 0;
                        float value5  = 0;
                        float value6  = 0;
                        float value7  = 0;
                        float value8  = 0;
                        float value9  = 0;
                        float value10 = 0;
                        getters[0](ref value0);
                        getters[1](ref value1);
                        getters[2](ref value2);
                        getters[3](ref value3);
                        getters[4](ref value4);
                        getters[5](ref value5);
                        getters[6](ref value6);
                        getters[7](ref value7);
                        getters[8](ref value8);
                        getters[9](ref value9);
                        getters[10](ref value10);

                        var hands = new PokerHandData()
                        {
                            S1    = value0,
                            C1    = value1,
                            S2    = value2,
                            C2    = value3,
                            S3    = value4,
                            C3    = value5,
                            S4    = value6,
                            C4    = value7,
                            S5    = value8,
                            C5    = value9,
                            Power = value10
                        };
                        hands.Init();
                        pokerHandData.Add(hands);
                    }
                }

                return(pokerHandData);
            }
        }