public void New_SimpleTrainAndPredict() { using (var env = new LocalEnvironment(seed: 1, conc: 1)) { var reader = new TextLoader(env, MakeSentimentTextLoaderArgs()); var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename))); // Pipeline. var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(data); // Create prediction engine and test predictions. var engine = model.MakePredictionFunction <SentimentData, SentimentPrediction>(env); // Take a couple examples out of the test data and run predictions on top. var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename))) .AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = engine.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
void New_MultithreadedPrediction() { using (var env = new LocalEnvironment(seed: 1, conc: 1)) { var reader = new TextLoader(env, MakeSentimentTextLoaderArgs()); var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename))); // Pipeline. var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(data); // Create prediction engine and test predictions. var engine = model.MakePredictionFunction <SentimentData, SentimentPrediction>(env); // Take a couple examples out of the test data and run predictions on top. var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename))) .AsEnumerable <SentimentData>(env, false); Parallel.ForEach(testData, (input) => { lock (engine) { var prediction = engine.Predict(input); } }); } }
public void SetupSentimentPipeline() { _sentimentExample = new SentimentData() { SentimentText = "Not a big fan of this." }; string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv"); using (var env = new ConsoleEnvironment(seed: 1, conc: 1, verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) { var reader = new TextLoader(env, new TextLoader.Arguments() { Separator = "\t", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.BL, 0), new TextLoader.Column("SentimentText", DataKind.Text, 1) } }); IDataView data = reader.Read(_sentimentDataPath); var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); var model = pipeline.Fit(data); _sentimentModel = model.MakePredictionFunction <SentimentData, SentimentPrediction>(env); } }
public void New_TrainSaveModelAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var reader = new TextLoader(env, MakeSentimentTextLoaderArgs()); var data = reader.Read(new MultiFileSource(dataPath)); // Pipeline. var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(data); ITransformer loadedModel; using (var file = env.CreateTempFile()) { // Save model. using (var fs = file.CreateWriteStream()) model.SaveTo(env, fs); // Load model. loadedModel = TransformerChain.LoadFrom(env, file.OpenReadStream()); } // Create prediction engine and test predictions. var engine = loadedModel.MakePredictionFunction <SentimentData, SentimentPrediction>(env); // Take a couple examples out of the test data and run predictions on top. var testData = reader.Read(new MultiFileSource(GetDataPath(SentimentTestPath))) .AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = engine.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
public void New_IntrospectiveTraining() { using (var env = new LocalEnvironment(seed: 1, conc: 1)) { var data = new TextLoader(env, MakeSentimentTextLoaderArgs()) .Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename))); var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => s.NumThreads = 1)); // Train. var model = pipeline.Fit(data); // Get feature weights. VBuffer <float> weights = default; model.LastTransformer.Model.GetFeatureWeights(ref weights); } }
public void New_TrainWithValidationSet() { using (var env = new LocalEnvironment(seed: 1, conc: 1)) { // Pipeline. var reader = new TextLoader(env, MakeSentimentTextLoaderArgs()); var pipeline = new TextTransform(env, "SentimentText", "Features"); // Train the pipeline, prepare train and validation set. var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename))); var preprocess = pipeline.Fit(data); var trainData = preprocess.Transform(data); var validData = preprocess.Transform(reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename)))); // Train model with validation set. var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(), "Features", "Label"); var model = trainer.Train(trainData, validData); } }
public void New_IntrospectiveTraining() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { var data = new TextLoader(env, MakeSentimentTextLoaderArgs()) .Read(new MultiFileSource(dataPath)); var pipeline = new TextTransform(env, "SentimentText", "Features") .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label")); // Train. var model = pipeline.Fit(data); // Get feature weights. VBuffer <float> weights = default; model.LastTransformer.Model.GetFeatureWeights(ref weights); } }
public void TextFeaturizerWorkout() { string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); var data = TextLoader.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(new MultiFileSource(sentimentDataPath)); var invalidData = TextLoader.CreateReader(Env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(new MultiFileSource(sentimentDataPath)) .AsDynamic; //var feat = Estimator.MakeNew(data) // .Append(row => row.text.FeaturizeText(advancedSettings: s => { s.OutputTokens = true; })); var feat = new TextTransform(Env, "text", "Data", advancedSettings: s => { s.OutputTokens = true; }); TestEstimatorCore(feat, data.AsDynamic, invalidInput: invalidData); var outputPath = GetOutputPath("Text", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); IDataView savedData = TakeFilter.Create(Env, feat.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = new ChooseColumnsTransform(Env, savedData, "Data", "Data_TransformedText"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("Text", "featurized.tsv"); Done(); }
static void Main(string[] args) { //1. Create ML.NET context/environment using (var env = new LocalEnvironment()) { //2. Create DataReader with data schema mapped to file's columns var reader = new TextLoader(env, new TextLoader.Arguments() { Separator = "tab", HasHeader = true, Column = new[] { new TextLoader.Column("Label", DataKind.Bool, 0), new TextLoader.Column("Text", DataKind.Text, 1) } }); //Load training data IDataView trainingDataView = reader.Read(new MultiFileSource(TrainDataPath)); //3.Create a flexible pipeline (composed by a chain of estimators) for creating/traing the model. var pipeline = new TextTransform(env, "Text", "Features") //Convert the text column to numeric vectors (Features column) .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(), "Features", "Label")); //.Append(new LinearClassificationTrainer(env, "Features", "Label")); //(Simpler in ML.NET v0.7) //4. Create and train the model Console.WriteLine("=============== Create and Train the Model ==============="); var model = pipeline.Fit(trainingDataView); Console.WriteLine("=============== End of training ==============="); Console.WriteLine(); //5. Evaluate the model and show accuracy stats //Load evaluation/test data IDataView testDataView = reader.Read(new MultiFileSource(TestDataPath)); Console.WriteLine("=============== Evaluating Model's accuracy with Test data==============="); var predictions = model.Transform(testDataView); var binClassificationCtx = new BinaryClassificationContext(env); var metrics = binClassificationCtx.Evaluate(predictions, "Label"); Console.WriteLine(); Console.WriteLine("Model quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($"Auc: {metrics.Auc:P2}"); Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); Console.WriteLine("=============== End of Model's evaluation ==============="); Console.WriteLine(); //6. Test Sentiment Prediction with one sample text var predictionFunct = model.MakePredictionFunction <SentimentIssue, SentimentPrediction>(env); SentimentIssue sampleStatement = new SentimentIssue { Text = "This is a very rude movie" }; var resultprediction = predictionFunct.Predict(sampleStatement); Console.WriteLine(); Console.WriteLine("=============== Test of model with a sample ==============="); Console.WriteLine($"Text: {sampleStatement.Text} | Prediction: {(Convert.ToBoolean(resultprediction.Prediction) ? "Toxic" : "Nice")} sentiment | Probability: {resultprediction.Probability} "); // Save model to .ZIP file SaveModelAsFile(env, model); // Predict again but now testing the model loading from the .ZIP file PredictWithModelLoadedFromFile(sampleStatement); Console.WriteLine("=============== End of process, hit any key to finish ==============="); Console.ReadKey(); } }