public void New_SimpleTrainAndPredict()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                var reader = new TextLoader(env, MakeSentimentTextLoaderArgs());
                var data   = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));
                // Pipeline.
                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(data);

                // Create prediction engine and test predictions.
                var engine = model.MakePredictionFunction <SentimentData, SentimentPrediction>(env);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename)))
                               .AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = engine.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
        void New_MultithreadedPrediction()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                var reader = new TextLoader(env, MakeSentimentTextLoaderArgs());
                var data   = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));

                // Pipeline.
                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(data);

                // Create prediction engine and test predictions.
                var engine = model.MakePredictionFunction <SentimentData, SentimentPrediction>(env);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename)))
                               .AsEnumerable <SentimentData>(env, false);

                Parallel.ForEach(testData, (input) =>
                {
                    lock (engine)
                    {
                        var prediction = engine.Predict(input);
                    }
                });
            }
        }
        public void SetupSentimentPipeline()
        {
            _sentimentExample = new SentimentData()
            {
                SentimentText = "Not a big fan of this."
            };

            string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv");

            using (var env = new ConsoleEnvironment(seed: 1, conc: 1, verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "\t",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.BL, 0),
                        new TextLoader.Column("SentimentText", DataKind.Text, 1)
                    }
                });

                IDataView data = reader.Read(_sentimentDataPath);

                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; }));

                var model = pipeline.Fit(data);

                _sentimentModel = model.MakePredictionFunction <SentimentData, SentimentPrediction>(env);
            }
        }
        public void New_TrainSaveModelAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var reader = new TextLoader(env, MakeSentimentTextLoaderArgs());
                var data   = reader.Read(new MultiFileSource(dataPath));

                // Pipeline.
                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(data);

                ITransformer loadedModel;
                using (var file = env.CreateTempFile())
                {
                    // Save model.
                    using (var fs = file.CreateWriteStream())
                        model.SaveTo(env, fs);

                    // Load model.
                    loadedModel = TransformerChain.LoadFrom(env, file.OpenReadStream());
                }

                // Create prediction engine and test predictions.
                var engine = loadedModel.MakePredictionFunction <SentimentData, SentimentPrediction>(env);

                // Take a couple examples out of the test data and run predictions on top.
                var testData = reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath)))
                               .AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = engine.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
Exemple #5
0
        public void New_IntrospectiveTraining()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                var data = new TextLoader(env, MakeSentimentTextLoaderArgs())
                           .Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));

                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => s.NumThreads = 1));

                // Train.
                var model = pipeline.Fit(data);

                // Get feature weights.
                VBuffer <float> weights = default;
                model.LastTransformer.Model.GetFeatureWeights(ref weights);
            }
        }
Exemple #6
0
        public void New_TrainWithValidationSet()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var reader   = new TextLoader(env, MakeSentimentTextLoaderArgs());
                var pipeline = new TextTransform(env, "SentimentText", "Features");

                // Train the pipeline, prepare train and validation set.
                var data       = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));
                var preprocess = pipeline.Fit(data);
                var trainData  = preprocess.Transform(data);
                var validData  = preprocess.Transform(reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename))));

                // Train model with validation set.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(), "Features", "Label");
                var model   = trainer.Train(trainData, validData);
            }
        }
Exemple #7
0
        public void New_IntrospectiveTraining()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var data = new TextLoader(env, MakeSentimentTextLoaderArgs())
                           .Read(new MultiFileSource(dataPath));

                var pipeline = new TextTransform(env, "SentimentText", "Features")
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label"));

                // Train.
                var model = pipeline.Fit(data);

                // Get feature weights.
                VBuffer <float> weights = default;
                model.LastTransformer.Model.GetFeatureWeights(ref weights);
            }
        }
        public void TextFeaturizerWorkout()
        {
            string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv");
            var    data = TextLoader.CreateReader(Env, ctx => (
                                                      label: ctx.LoadBool(0),
                                                      text: ctx.LoadText(1)), hasHeader: true)
                          .Read(new MultiFileSource(sentimentDataPath));

            var invalidData = TextLoader.CreateReader(Env, ctx => (
                                                          label: ctx.LoadBool(0),
                                                          text: ctx.LoadFloat(1)), hasHeader: true)
                              .Read(new MultiFileSource(sentimentDataPath))
                              .AsDynamic;

            //var feat = Estimator.MakeNew(data)
            //     .Append(row => row.text.FeaturizeText(advancedSettings: s => { s.OutputTokens = true; }));
            var feat = new TextTransform(Env, "text", "Data", advancedSettings: s => { s.OutputTokens = true; });

            TestEstimatorCore(feat, data.AsDynamic, invalidInput: invalidData);

            var outputPath = GetOutputPath("Text", "featurized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                IDataView savedData = TakeFilter.Create(Env, feat.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = new ChooseColumnsTransform(Env, savedData, "Data", "Data_TransformedText");

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("Text", "featurized.tsv");
            Done();
        }
        static void Main(string[] args)
        {
            //1. Create ML.NET context/environment
            using (var env = new LocalEnvironment())
            {
                //2. Create DataReader with data schema mapped to file's columns
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "tab",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.Bool, 0),
                        new TextLoader.Column("Text", DataKind.Text, 1)
                    }
                });

                //Load training data
                IDataView trainingDataView = reader.Read(new MultiFileSource(TrainDataPath));


                //3.Create a flexible pipeline (composed by a chain of estimators) for creating/traing the model.

                var pipeline = new TextTransform(env, "Text", "Features")  //Convert the text column to numeric vectors (Features column)
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(),
                                                                       "Features",
                                                                       "Label"));
                //.Append(new LinearClassificationTrainer(env, "Features", "Label")); //(Simpler in ML.NET v0.7)



                //4. Create and train the model
                Console.WriteLine("=============== Create and Train the Model ===============");

                var model = pipeline.Fit(trainingDataView);

                Console.WriteLine("=============== End of training ===============");
                Console.WriteLine();


                //5. Evaluate the model and show accuracy stats

                //Load evaluation/test data
                IDataView testDataView = reader.Read(new MultiFileSource(TestDataPath));

                Console.WriteLine("=============== Evaluating Model's accuracy with Test data===============");
                var predictions = model.Transform(testDataView);

                var binClassificationCtx = new BinaryClassificationContext(env);
                var metrics = binClassificationCtx.Evaluate(predictions, "Label");

                Console.WriteLine();
                Console.WriteLine("Model quality metrics evaluation");
                Console.WriteLine("------------------------------------------");
                Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
                Console.WriteLine($"Auc: {metrics.Auc:P2}");
                Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
                Console.WriteLine("=============== End of Model's evaluation ===============");
                Console.WriteLine();


                //6. Test Sentiment Prediction with one sample text
                var predictionFunct = model.MakePredictionFunction <SentimentIssue, SentimentPrediction>(env);

                SentimentIssue sampleStatement = new SentimentIssue
                {
                    Text = "This is a very rude movie"
                };

                var resultprediction = predictionFunct.Predict(sampleStatement);

                Console.WriteLine();
                Console.WriteLine("=============== Test of model with a sample ===============");

                Console.WriteLine($"Text: {sampleStatement.Text} | Prediction: {(Convert.ToBoolean(resultprediction.Prediction) ? "Toxic" : "Nice")} sentiment | Probability: {resultprediction.Probability} ");

                // Save model to .ZIP file
                SaveModelAsFile(env, model);

                // Predict again but now testing the model loading from the .ZIP file
                PredictWithModelLoadedFromFile(sampleStatement);

                Console.WriteLine("=============== End of process, hit any key to finish ===============");
                Console.ReadKey();
            }
        }