public override void Run(object[] args) { // get labeled data BinarySvm classifierInst = BinarySvm.RunInstanceNull(args); var labeledData = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"]; // convert dataset to binary vector var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false); // cross validation ...with the convenience class var validation = new CrossValidator <string, BinaryVector> { NumFolds = 10, // default IsStratified = true, // default ExpName = "", // default Dataset = ds, OnAfterTrain = (sender, foldN, model, trainSet) => { var m = (NaiveBayesClassifier <string>)model; // do stuff after model is trained for a fold... }, OnAfterPrediction = (sender, foldN, model, ex, le, prediction) => { Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore); return(true); }, OnAfterFold = (sender, foldN, trainSet, foldPredictions) => { PerfMatrix <string> foldMatrix = sender.PerfData.GetPerfMatrix(sender.ExpName, sender.GetModelName(0), foldN); Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy()); } }; validation.Models.Add(new NaiveBayesClassifier <string>()); validation.Run(); Output.WriteLine("Sum confusion matrix:"); PerfMatrix <string> sumPerfMatrix = validation.PerfData.GetSumPerfMatrix("", validation.GetModelName(0)); Output.WriteLine(sumPerfMatrix.ToString()); Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy()); foreach (string label in validation.PerfData.GetLabels("", validation.GetModelName(0))) { double stdDev; Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label, validation.PerfData.GetAvg("", validation.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev); } }
private Model TrainModel(LabeledDataset <SentimentLabel, SparseVector <double> > dataset, SentimentLabel label, SentimentLabel otherLabel1, SentimentLabel otherLabel2) { IModel <SentimentLabel, SparseVector <double> > model = CreateModel(); var otherLabelWeight1 = (double)dataset.Count(le => le.Label == otherLabel1) / dataset.Count(le => le.Label != label); var otherLabelWeight2 = (double)dataset.Count(le => le.Label == otherLabel2) / dataset.Count(le => le.Label != label); dataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le => new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == label ? label : otherLabel1, le.Example))); var scores = new List <double>(); var scoresOthers = new List <double>(); var validation = new CrossValidator <SentimentLabel, SparseVector <double> > { NumFolds = NumTrainFolds, Dataset = dataset, OnAfterPrediction = (sender, foldN, m, ex, le, prediction) => { if (le.Label == prediction.BestClassLabel) { if (prediction.BestClassLabel == label) { scores.Add(prediction.BestScore); } else { scoresOthers.Add(prediction.BestScore); } } return(true); } }; validation.Models.Add(model); validation.Run(); // train model model.Train(dataset); return(new Model { InnerModel = model, Weight = validation.PerfData.GetSumPerfMatrix(validation.ExpName, validation.GetModelName(model)).GetMacroF1(), Label = label, OtherLabel1 = otherLabel1, OtherLabelWeight1 = otherLabelWeight1, OtherLabel2 = otherLabel2, OtherLabelWeight2 = otherLabelWeight2, Scores = scores.OrderBy(s => s).ToArray(), ScoresOthers = scoresOthers.OrderBy(s => s).ToArray() }); }
private Model TrainModel(LabeledDataset <SentimentLabel, SparseVector <double> > dataset, SentimentLabel label1, SentimentLabel label2) { IModel <SentimentLabel, SparseVector <double> > model = CreateModel(); var scores1 = new List <double>(); var scores2 = new List <double>(); var validation = new CrossValidator <SentimentLabel, SparseVector <double> > { NumFolds = NumTrainFolds, Dataset = dataset, OnAfterPrediction = (sender, foldN, m, ex, le, prediction) => { if (le.Label == prediction.BestClassLabel) { if (prediction.BestClassLabel == label1) { scores1.Add(prediction.BestScore); } else if (prediction.BestClassLabel == label2) { scores2.Add(prediction.BestScore); } } return(true); } }; validation.Models.Add(model); validation.Run(); // train model model.Train(dataset); return(new Model { InnerModel = model, Label1 = label1, Label2 = label2, Scores1 = scores1.OrderBy(s => s).ToArray(), Scores2 = scores2.OrderBy(s => s).ToArray(), Weight = validation.PerfData.GetSumPerfMatrix(validation.ExpName, validation.GetModelName(model)).GetMacroF1() }); }