Esempio n. 1
0
        public override void Run(object[] args)
        {
            // get labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            // convert dataset to binary vector
            var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false);

            // cross validation ...with the convenience class
            var validation = new CrossValidator <string, BinaryVector>
            {
                NumFolds     = 10,   // default
                IsStratified = true, // default
                ExpName      = "",   // default

                Dataset      = ds,
                OnAfterTrain = (sender, foldN, model, trainSet) =>
                {
                    var m = (NaiveBayesClassifier <string>)model;
                    // do stuff after model is trained for a fold...
                },
                OnAfterPrediction = (sender, foldN, model, ex, le, prediction) =>
                {
                    Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore);
                    return(true);
                },
                OnAfterFold = (sender, foldN, trainSet, foldPredictions) =>
                {
                    PerfMatrix <string> foldMatrix = sender.PerfData.GetPerfMatrix(sender.ExpName, sender.GetModelName(0), foldN);
                    Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy());
                }
            };

            validation.Models.Add(new NaiveBayesClassifier <string>());
            validation.Run();

            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = validation.PerfData.GetSumPerfMatrix("", validation.GetModelName(0));

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            foreach (string label in validation.PerfData.GetLabels("", validation.GetModelName(0)))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 validation.PerfData.GetAvg("", validation.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
Esempio n. 2
0
        private double?Calibrate(bool doPosPlane, LabeledDataset <SentimentLabel, SparseVector <double> > dataset)
        {
            BiasCalibration calibration = doPosPlane ? PosBiasCalibration : NegBiasCalibration;

            if (calibration == null)
            {
                return(null);
            }

            Preconditions.CheckArgument(calibration.BiasStep > 0);
            Preconditions.CheckNotNull(calibration.OptimizationFunc);

            double maxScore       = double.MinValue;
            double optimalBias    = 0;
            var    biasScorePairs = calibration.IsSaveBiasScorePairs ? new List <Tuple <double, double> >() : null;

            for (double bias = calibration.BiasLowerBound; bias <= calibration.BiasUpperBound; bias += calibration.BiasStep)
            {
                var matrix = new PerfMatrix <SentimentLabel>(null);
                foreach (LabeledExample <SentimentLabel, SparseVector <double> > le in dataset)
                {
                    Prediction <SentimentLabel> prediction = PredictInternal(le.Example, doPosPlane ? bias : 0, doPosPlane ? 0 : bias);
                    matrix.AddCount(le.Label, prediction.BestClassLabel);
                }
                double score = calibration.OptimizationFunc(matrix);
                if (score > maxScore)
                {
                    maxScore    = score;
                    optimalBias = bias;
                }
                if (biasScorePairs != null)
                {
                    biasScorePairs.Add(new Tuple <double, double>(bias, score));
                }
                Console.WriteLine("{0}\t{1:0.000}\t{2:0.000}", doPosPlane, bias, score);
            }
            if (biasScorePairs != null)
            {
                calibration.BiasScorePairs = biasScorePairs.ToArray();
            }

            return(optimalBias);
        }
Esempio n. 3
0
        public override void Run(object[] args)
        {
            // get labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            // convert dataset to binary vector
            var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false);

            // cross validation with task validator
            var validator = new TaskCrossValidator <string, BinaryVector>(new System.Func <IModel <string, BinaryVector> >[]
            {
                // model instances are constructed on the fly
                () => new NaiveBayesClassifier <string>()
            })
            {
                NumFolds     = 10,   // default
                IsStratified = true, // default
                ExpName      = "",   // default

                Dataset      = ds,
                OnAfterTrain = (sender, foldN, model, trainSet) =>
                {
                    var m = (NaiveBayesClassifier <string>)model;
                    // do stuff after model is trained for a fold...
                },
                OnAfterPrediction = (sender, foldN, model, ex, le, prediction) =>
                {
                    lock (Output) Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore);
                    return(true);
                }
            };


            var cores = (int)(Math.Round(Environment.ProcessorCount * 0.9) - 1); // use 90% of cpu cores

            Output.WriteLine("Multi-threaded using {0} cores\n", cores);
            Output.Flush();


            // using .net framework

            // model level parallelization
            Parallel.ForEach(
                validator.GetFoldAndModelTasks(),
                new ParallelOptions {
                MaxDegreeOfParallelism = cores
            },
                foldTask => Parallel.ForEach(
                    foldTask(),
                    new ParallelOptions {
                MaxDegreeOfParallelism = cores
            },
                    modelTask => modelTask()
                    )
                );

            // fold level

/*
 *          Parallel.ForEach(validator.GetFoldTasks(), new ParallelOptions { MaxDegreeOfParallelism = cores }, t => t());
 */



            // for some serious workload better use SmartThreadPool
            // requires reference to package https://www.nuget.org/packages/SmartThreadPool.dll/

            var exceptions = new List <Exception>();

            // model level parallelization

/*
 *          var threadPool = new SmartThreadPool { MaxThreads = cores };
 *          foreach (System.Func<Action[]> foldTask in validator.GetFoldAndModelTasks())
 *          {
 *              System.Func<Action[]> ft = foldTask;
 *              threadPool.QueueWorkItem(o =>
 *              {
 *                  foreach (Action modelTask in ft())
 *                  {
 *                      Action mt = modelTask;
 *                      threadPool.QueueWorkItem(p =>
 *                      {
 *                          mt();
 *                          return null;
 *                      }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } });
 *                  }
 *                  return null;
 *              }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } });
 *          }
 *          threadPool.WaitForIdle();
 *          threadPool.Shutdown();
 */

            // fold level

/*
 *          var threadPool = new SmartThreadPool { MaxThreads = cores };
 *          foreach (Action foldTask in validator.GetFoldTasks())
 *          {
 *              Action ft = foldTask;
 *              threadPool.QueueWorkItem(o =>
 *              {
 *                  ft();
 *                  return null;
 *              }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } });
 *          }
 *          threadPool.WaitForIdle();
 *          threadPool.Shutdown();
 */

            foreach (Exception exception in exceptions)
            {
                throw new Exception("Error during validation", exception);
            }



            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = validator.PerfData.GetSumPerfMatrix("", validator.GetModelName(0));

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            foreach (string label in validator.PerfData.GetLabels("", validator.GetModelName(0)))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 validator.PerfData.GetAvg("", validator.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
Esempio n. 4
0
        public override void Run(object[] args)
        {
            int foldCount = args.Any() ? (int)args[0] : 10;

            args = args.Skip(1).ToArray();

            // get classifier and labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       classifier     = (SvmBinaryClassifier <string>)classifierInst.Result["classifier"];
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            bool stratified = true;

            // cross validation
            if (stratified)
            {
                labeledData.GroupLabels(true);
            }
            else
            {
                labeledData.Shuffle(new Random(1));
            }

            var perfData = new PerfData <string>();

            foreach (var g in labeledData.GroupBy(le => le.Label))
            {
                Output.WriteLine("total {0} {1}\t {2:0.00}", g.Key, g.Count(), (double)g.Count() / labeledData.Count);
            }

            Output.WriteLine("Performing {0}{1}-fold cross validation...", stratified ? "stratified " : "", foldCount);
            for (int i = 0; i < foldCount; i++)
            {
                int foldN = i + 1;
                LabeledDataset <string, SparseVector <double> > testSet;
                LabeledDataset <string, SparseVector <double> > trainSet;

                if (stratified)
                {
                    labeledData.SplitForStratifiedCrossValidation(foldCount, foldN, out trainSet, out testSet);
                }
                else
                {
                    labeledData.SplitForCrossValidation(foldCount, foldN, out trainSet, out testSet);
                }

                classifier.Train(trainSet);

                PerfMatrix <string> foldMatrix = perfData.GetPerfMatrix("tutorial", "binary svm", foldN);
                foreach (LabeledExample <string, SparseVector <double> > labeledExample in testSet)
                {
                    Prediction <string> prediction = classifier.Predict(labeledExample.Example);
                    foldMatrix.AddCount(labeledExample.Label, prediction.BestClassLabel);
                }
                Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy());
            }

            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = perfData.GetSumPerfMatrix("tutorial", "binary svm");

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            Output.WriteLine();
            Output.WriteLine(sumPerfMatrix.ToString(new PerfMetric[] { }));
            Output.WriteLine(sumPerfMatrix.ToString(perfData.GetLabels("tutorial", "binary svm"), new OrdinalPerfMetric[] { }));
            Output.WriteLine(sumPerfMatrix.ToString(new ClassPerfMetric[] { }));

            foreach (string label in perfData.GetLabels("tutorial", "binary svm"))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 perfData.GetAvg("tutorial", "binary svm", ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
Esempio n. 5
0
        public override void Make(Task task)
        {
            if (Writer != null)
            {
                var sb = new StringBuilder();
                if (Writer.BaseStream.Length == 0)
                {
                    // write header
                    sb.Append("Experiment").Append("\t");
                    sb.Append("Task").Append("\t");
                    sb.Append("Dataset").Append("\t");
                    sb.Append("Volume").Append("\t");
                    sb.Append("Filtered Volume").Append("\t");
                    sb.Append("Model").Append("\t");
                    sb.Append("From").Append("\t");
                    sb.Append("To").Append("\t");
                    sb.Append("Interval Days").Append("\t");
                    sb.Append("Performance Millis").Append("\t");
                    foreach (SentimentLabel label in OrderedLabels)
                    {
                        sb.Append(label).Append(" Actual").Append("\t");
                        sb.Append(label).Append(" Predicted").Append("\t");
                    }
                    foreach (PerfMetric metric in Metrics)
                    {
                        sb.Append(metric).Append("\t");
                    }
                    foreach (ClassPerfMetric metric in ClassMetrics)
                    {
                        foreach (SentimentLabel label in OrderedLabels)
                        {
                            sb.Append(metric).Append(" - ").Append(label).Append("\t");
                        }
                    }
                    foreach (OrdinalPerfMetric metric in OrdinalMetrics)
                    {
                        sb.Append(metric).Append("\t");
                    }

                    foreach (Tuple <string, Func <string> > field in mExtraFields)
                    {
                        sb.Append(field.Item1).Append("\t");
                    }
                    sb.AppendLine();
                    lock (Writer)
                    {
                        Writer.Write(sb);
                        Writer.Flush();
                    }
                    sb = new StringBuilder();
                }

                TaskContext ctx = task.Context;
                for (int i = 0; i < ctx.Models.Length; i++)
                {
                    PerfMatrix <SentimentLabel> perfMatrix = task.PerfData.GetPerfMatrix(task.ExperimentName, task.Context.GetModelName(i), 1);

                    sb.Append(task.ExperimentName ?? "").Append("\t");
                    sb.Append(task.Name ?? "").Append("\t");
                    sb.Append(ctx.DataSource.Name).Append("\t");
                    sb.Append(ctx.DataSource.DataSize).Append("\t");
                    sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetSumAll().ToString("d")).Append("\t");
                    sb.Append(ctx.GetModelName(i)).Append("\t");
                    sb.Append(ctx.DataSource.From == null ? "" : ctx.DataSource.From.Value.ToString("yyyy-MM-dd hh:mm:ss")).Append("\t");
                    sb.Append(ctx.DataSource.To == null ? "" : ctx.DataSource.To.Value.ToString("yyyy-MM-dd hh:mm:ss")).Append("\t");
                    sb.Append(ctx.DataSource.From == null || ctx.DataSource.To == null ? "0" :
                              (ctx.DataSource.To.Value - ctx.DataSource.From.Value).TotalDays.ToString("f1")).Append("\t");
                    sb.Append((task.PerformDuration ?? TimeSpan.Zero).TotalMilliseconds.ToString("f1")).Append("\t");


                    if (perfMatrix != null)
                    {
                        perfMatrix.AddLabels(OrderedLabels); // some ordered metrics require this
                    }

                    foreach (SentimentLabel label in OrderedLabels)
                    {
                        sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetActual(label).ToString("d")).Append("\t");
                        sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetPredicted(label).ToString("d")).Append("\t");
                    }
                    foreach (PerfMetric metric in Metrics)
                    {
                        sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetScore(metric).ToString("n3")).Append("\t");
                    }
                    foreach (ClassPerfMetric metric in ClassMetrics)
                    {
                        foreach (SentimentLabel label in OrderedLabels)
                        {
                            sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetScore(metric, label).ToString("n3")).Append("\t");
                        }
                    }
                    foreach (OrdinalPerfMetric metric in OrdinalMetrics)
                    {
                        sb.Append(perfMatrix == null ? "NaN" : perfMatrix.GetScore(metric, OrderedLabels).ToString("n3")).Append("\t");
                    }

                    foreach (Tuple <string, Func <string> > field in mExtraFields)
                    {
                        sb.Append(field.Item2()).Append("\t");
                    }
                    sb.AppendLine();
                }
                lock (Writer)
                {
                    Writer.Write(sb);
                    Writer.Flush();
                }
            }
        }