Example #1
0
        public override void Run(object[] args)
        {
            // get labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            // convert dataset to binary vector
            var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false);

            // cross validation ...with the convenience class
            var validation = new CrossValidator <string, BinaryVector>
            {
                NumFolds     = 10,   // default
                IsStratified = true, // default
                ExpName      = "",   // default

                Dataset      = ds,
                OnAfterTrain = (sender, foldN, model, trainSet) =>
                {
                    var m = (NaiveBayesClassifier <string>)model;
                    // do stuff after model is trained for a fold...
                },
                OnAfterPrediction = (sender, foldN, model, ex, le, prediction) =>
                {
                    Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore);
                    return(true);
                },
                OnAfterFold = (sender, foldN, trainSet, foldPredictions) =>
                {
                    PerfMatrix <string> foldMatrix = sender.PerfData.GetPerfMatrix(sender.ExpName, sender.GetModelName(0), foldN);
                    Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy());
                }
            };

            validation.Models.Add(new NaiveBayesClassifier <string>());
            validation.Run();

            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = validation.PerfData.GetSumPerfMatrix("", validation.GetModelName(0));

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            foreach (string label in validation.PerfData.GetLabels("", validation.GetModelName(0)))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 validation.PerfData.GetAvg("", validation.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
        private Model TrainModel(LabeledDataset <SentimentLabel, SparseVector <double> > dataset,
                                 SentimentLabel label, SentimentLabel otherLabel1, SentimentLabel otherLabel2)
        {
            IModel <SentimentLabel, SparseVector <double> > model = CreateModel();

            var otherLabelWeight1 = (double)dataset.Count(le => le.Label == otherLabel1) / dataset.Count(le => le.Label != label);
            var otherLabelWeight2 = (double)dataset.Count(le => le.Label == otherLabel2) / dataset.Count(le => le.Label != label);

            dataset = new LabeledDataset <SentimentLabel, SparseVector <double> >(dataset.Select(le =>
                                                                                                 new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label == label ? label : otherLabel1, le.Example)));

            var scores       = new List <double>();
            var scoresOthers = new List <double>();
            var validation   = new CrossValidator <SentimentLabel, SparseVector <double> >
            {
                NumFolds = NumTrainFolds,
                Dataset  = dataset,

                OnAfterPrediction = (sender, foldN, m, ex, le, prediction) =>
                {
                    if (le.Label == prediction.BestClassLabel)
                    {
                        if (prediction.BestClassLabel == label)
                        {
                            scores.Add(prediction.BestScore);
                        }
                        else
                        {
                            scoresOthers.Add(prediction.BestScore);
                        }
                    }
                    return(true);
                }
            };

            validation.Models.Add(model);
            validation.Run();

            // train model
            model.Train(dataset);

            return(new Model
            {
                InnerModel = model,
                Weight = validation.PerfData.GetSumPerfMatrix(validation.ExpName, validation.GetModelName(model)).GetMacroF1(),
                Label = label,
                OtherLabel1 = otherLabel1,
                OtherLabelWeight1 = otherLabelWeight1,
                OtherLabel2 = otherLabel2,
                OtherLabelWeight2 = otherLabelWeight2,
                Scores = scores.OrderBy(s => s).ToArray(),
                ScoresOthers = scoresOthers.OrderBy(s => s).ToArray()
            });
        }
        private Model TrainModel(LabeledDataset <SentimentLabel, SparseVector <double> > dataset,
                                 SentimentLabel label1, SentimentLabel label2)
        {
            IModel <SentimentLabel, SparseVector <double> > model = CreateModel();
            var scores1 = new List <double>();
            var scores2 = new List <double>();

            var validation = new CrossValidator <SentimentLabel, SparseVector <double> >
            {
                NumFolds = NumTrainFolds,
                Dataset  = dataset,

                OnAfterPrediction = (sender, foldN, m, ex, le, prediction) =>
                {
                    if (le.Label == prediction.BestClassLabel)
                    {
                        if (prediction.BestClassLabel == label1)
                        {
                            scores1.Add(prediction.BestScore);
                        }
                        else if (prediction.BestClassLabel == label2)
                        {
                            scores2.Add(prediction.BestScore);
                        }
                    }
                    return(true);
                }
            };

            validation.Models.Add(model);
            validation.Run();

            // train model
            model.Train(dataset);
            return(new Model
            {
                InnerModel = model,
                Label1 = label1,
                Label2 = label2,
                Scores1 = scores1.OrderBy(s => s).ToArray(),
                Scores2 = scores2.OrderBy(s => s).ToArray(),
                Weight = validation.PerfData.GetSumPerfMatrix(validation.ExpName, validation.GetModelName(model)).GetMacroF1()
            });
        }