/// <summary> /// The main program entry point. /// </summary> /// <param name="args">The command line parameters.</param> static void Main(string[] args) { // set up a machine learning context var context = new MLContext(); // load the spam dataset in memory var data = context.Data.LoadFromTextFile <SpamInput>( path: dataPath, hasHeader: true, separatorChar: '\t'); // use 80% for training and 20% for testing var partitions = context.Data.TrainTestSplit( data, testFraction: 0.2); // set up a training pipeline // step 1: transform the 'spam' and 'ham' values to true and false var pipeline = context.Transforms.CustomMapping <FromLabel, ToLabel>( mapAction: (input, output) => { output.Label = input.RawLabel == "spam" ? true : false; }, contractName: "MyLambda") // step 2: featureize the input text .Append(context.Transforms.Text.FeaturizeText( outputColumnName: "Features", inputColumnName: nameof(SpamInput.Message))) // step 3: use a stochastic dual coordinate ascent learner .Append(context.BinaryClassification.Trainers.SdcaLogisticRegression()); // test the full data set by performing k-fold cross validation Console.WriteLine("Performing cross validation..."); var cvResults = context.BinaryClassification.CrossValidate( data: partitions.TrainSet, estimator: pipeline, numberOfFolds: 5); // report the results foreach (var r in cvResults) { Console.WriteLine($" Fold: {r.Fold}, AUC: {r.Metrics.AreaUnderRocCurve}"); } Console.WriteLine($" Average AUC: {cvResults.Average(r => r.Metrics.AreaUnderRocCurve)}"); Console.WriteLine(); // train the model on the training set Console.WriteLine("Training the model..."); var model = pipeline.Fit(partitions.TrainSet); // evaluate the model on the test set Console.WriteLine("Evaluating the model..."); var predictions = model.Transform(partitions.TestSet); var metrics = context.BinaryClassification.Evaluate( data: predictions, labelColumnName: "Label", scoreColumnName: "Score"); // report the results Console.WriteLine($" Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($" Auc: {metrics.AreaUnderRocCurve:P2}"); Console.WriteLine($" Auprc: {metrics.AreaUnderPrecisionRecallCurve:P2}"); Console.WriteLine($" F1Score: {metrics.F1Score:P2}"); Console.WriteLine($" LogLoss: {metrics.LogLoss:0.##}"); Console.WriteLine($" LogLossReduction: {metrics.LogLossReduction:0.##}"); Console.WriteLine($" PositivePrecision: {metrics.PositivePrecision:0.##}"); Console.WriteLine($" PositiveRecall: {metrics.PositiveRecall:0.##}"); Console.WriteLine($" NegativePrecision: {metrics.NegativePrecision:0.##}"); Console.WriteLine($" NegativeRecall: {metrics.NegativeRecall:0.##}"); Console.WriteLine(); //set up a prediction engine Console.WriteLine("Predicting spam probabilities for a sample messages..."); var predictionEngine = context.Model.CreatePredictionEngine <SpamInput, SpamPrediction>(model); // create sample messages var messages = new SpamInput[] { new SpamInput() { Message = "Hi, wanna grab lunch together today?" }, new SpamInput() { Message = "Win a Nokia, PSP, or €25 every week. Txt YEAHIWANNA now to join" }, new SpamInput() { Message = "Home in 30 mins. Need anything from store?" }, new SpamInput() { Message = "CONGRATS U WON LOTERY CLAIM UR 1 MILIONN DOLARS PRIZE" }, }; // make the prediction var myPredictions = from m in messages select(Message : m.Message, Prediction : predictionEngine.Predict(m)); // show the results foreach (var p in myPredictions) { Console.WriteLine($" [{p.Prediction.Probability:P2}] {p.Message}"); } }
static void Main(string[] args) { var context = new MLContext(); var data = context.Data.LoadFromTextFile <SpamInput>(filePath, hasHeader: false, separatorChar: '\t'); var trainTestPartition = context.Data.TrainTestSplit(data, testFraction: 0.2); var pipeline = context.Transforms.CustomMapping <FromLabel, ToLabel>((inp, output) => { output.Label = inp.RawLabel.Equals("spam", StringComparison.InvariantCultureIgnoreCase); }, contractName: "SpamClassification") .Append(context.Transforms.Text.FeaturizeText("Features", nameof(SpamInput.Message))) .Append(context.BinaryClassification.Trainers.SdcaLogisticRegression()); Console.WriteLine("Performing cross validations..."); var cvResults = context.BinaryClassification.CrossValidate(trainTestPartition.TrainSet, pipeline, numberOfFolds: 5); foreach (var result in cvResults) { Console.WriteLine($"Fold: {result.Fold}, AUC: {result.Metrics.AreaUnderRocCurve}"); } Console.WriteLine($"Average AUC: {cvResults.Average(r => r.Metrics.AreaUnderRocCurve)}"); Console.WriteLine(); Console.WriteLine("Training the model..."); var model = pipeline.Fit(trainTestPartition.TrainSet); Console.WriteLine("Evaluating the model..."); var predictions = model.Transform(trainTestPartition.TestSet); var metrics = context.BinaryClassification.Evaluate(predictions, "Label", "Score"); // report the results Console.WriteLine($" Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($" Auc: {metrics.AreaUnderRocCurve:P2}"); Console.WriteLine($" Auprc: {metrics.AreaUnderPrecisionRecallCurve:P2}"); Console.WriteLine($" F1Score: {metrics.F1Score:P2}"); Console.WriteLine($" LogLoss: {metrics.LogLoss:0.##}"); Console.WriteLine($" LogLossReduction: {metrics.LogLossReduction:0.##}"); Console.WriteLine($" PositivePrecision: {metrics.PositivePrecision:0.##}"); Console.WriteLine($" PositiveRecall: {metrics.PositiveRecall:0.##}"); Console.WriteLine($" NegativePrecision: {metrics.NegativePrecision:0.##}"); Console.WriteLine($" NegativeRecall: {metrics.NegativeRecall:0.##}"); Console.WriteLine(); // set up a prediction engine Console.WriteLine("Predicting spam probabilities for a sample messages..."); var predictionEngine = context.Model.CreatePredictionEngine <SpamInput, SpamPrediction>(model); // create sample messages var messages = new SpamInput[] { new SpamInput() { Message = "Hi, wanna grab lunch together today?" }, new SpamInput() { Message = "Win a Nokia, PSP, or €25 every week. Txt YEAHIWANNA now to join" }, new SpamInput() { Message = "Home in 30 mins. Need anything from store?" }, new SpamInput() { Message = "CONGRATS U WON LOTERY CLAIM UR 1 MILIONN DOLARS PRIZE" }, }; // make the prediction var myPredictions = from m in messages select(Message : m.Message, Prediction : predictionEngine.Predict(m)); // show the results foreach (var p in myPredictions) { Console.WriteLine($" [{p.Prediction.Probability:P2}] {p.Message}"); } }