示例#1
0
        public static MultiClassSvm <TValue, TLabel> Train <TValue, TLabel>(
            IEnumerable <Observation <TValue, TLabel> > observations,
            IKernel <TValue> k,
            IEqualityComparer <TLabel> equalityComparer = null)
        {
            var classToObservations =
                new Dictionary <TLabel, List <TValue> >(equalityComparer ?? EqualityComparer <TLabel> .Default);

            var idToLabelMap = new Dictionary <int, TLabel>();
            var labelId      = 0;

            foreach (var labelGroup in observations.GroupBy(t => t.Label))
            {
                idToLabelMap.Add(labelId, labelGroup.Key);
                classToObservations.Add(labelGroup.Key, labelGroup.Select(o => o.Value).ToList());
                labelId++;
            }

            var classifiers = new BinarySvm <TValue> [classToObservations.Count, classToObservations.Count];

            for (int i = 0; i < classToObservations.Count; i++)
            {
                for (int j = i + 1; j < classToObservations.Count; j++)
                {
                    var group1 = classToObservations[idToLabelMap[i]];
                    var group2 = classToObservations[idToLabelMap[j]];
                    var merged = group1.Select(entry => new BinaryObservation <TValue>(entry, -1))
                                 .Concat(group2.Select(entry => new BinaryObservation <TValue>(entry, 1)));
                    classifiers[i, j] = BinarySvmTrainer.Train(merged.ToArray(), k);
                }
            }

            return(new MultiClassSvm <TValue, TLabel>(classifiers, idToLabelMap));
        }
示例#2
0
        static void TestAll(string[] args)
        {
            // data
            DataStructures.RunInstanceNull(args);
            SparseVector.RunInstanceNull(args);
            SparseMatrix.RunInstanceNull(args);
            Stateful.RunInstanceNull(args);
            Cloning.RunInstanceNull(args);
            Serialization.RunInstanceNull(args);

            // model
            Bow.RunInstanceNull(args);
            BinarySvm.RunInstanceNull(args);

            // clustering
            KMeans.RunInstanceNull(args);

            // validation
            NFold.RunInstanceNull(args);
            NFoldClass.RunInstanceNull(args);

            // other
            Searching.RunInstanceNull(args);
            TextProcessing.RunInstanceNull(args);
        }
示例#3
0
        public override void Run(object[] args)
        {
            // get labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            // convert dataset to binary vector
            var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false);

            // cross validation ...with the convenience class
            var validation = new CrossValidator <string, BinaryVector>
            {
                NumFolds     = 10,   // default
                IsStratified = true, // default
                ExpName      = "",   // default

                Dataset      = ds,
                OnAfterTrain = (sender, foldN, model, trainSet) =>
                {
                    var m = (NaiveBayesClassifier <string>)model;
                    // do stuff after model is trained for a fold...
                },
                OnAfterPrediction = (sender, foldN, model, ex, le, prediction) =>
                {
                    Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore);
                    return(true);
                },
                OnAfterFold = (sender, foldN, trainSet, foldPredictions) =>
                {
                    PerfMatrix <string> foldMatrix = sender.PerfData.GetPerfMatrix(sender.ExpName, sender.GetModelName(0), foldN);
                    Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy());
                }
            };

            validation.Models.Add(new NaiveBayesClassifier <string>());
            validation.Run();

            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = validation.PerfData.GetSumPerfMatrix("", validation.GetModelName(0));

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            foreach (string label in validation.PerfData.GetLabels("", validation.GetModelName(0)))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 validation.PerfData.GetAvg("", validation.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
示例#4
0
 internal MultiClassSvm(BinarySvm <TValue>[,] classifiers, Dictionary <int, TLabel> idToLabelMap)
 {
     _classifiers  = classifiers;
     _idToLabelMap = idToLabelMap;
 }
示例#5
0
        public override void Run(object[] args)
        {
            // get labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            // convert dataset to binary vector
            var ds = (LabeledDataset <string, BinaryVector>)labeledData.ConvertDataset(typeof(BinaryVector), false);

            // cross validation with task validator
            var validator = new TaskCrossValidator <string, BinaryVector>(new System.Func <IModel <string, BinaryVector> >[]
            {
                // model instances are constructed on the fly
                () => new NaiveBayesClassifier <string>()
            })
            {
                NumFolds     = 10,   // default
                IsStratified = true, // default
                ExpName      = "",   // default

                Dataset      = ds,
                OnAfterTrain = (sender, foldN, model, trainSet) =>
                {
                    var m = (NaiveBayesClassifier <string>)model;
                    // do stuff after model is trained for a fold...
                },
                OnAfterPrediction = (sender, foldN, model, ex, le, prediction) =>
                {
                    lock (Output) Output.WriteLine("actual: {0} \tpredicted: {1}\t score: {2:0.0000}", le.Label, prediction.BestClassLabel, prediction.BestScore);
                    return(true);
                }
            };


            var cores = (int)(Math.Round(Environment.ProcessorCount * 0.9) - 1); // use 90% of cpu cores

            Output.WriteLine("Multi-threaded using {0} cores\n", cores);
            Output.Flush();


            // using .net framework

            // model level parallelization
            Parallel.ForEach(
                validator.GetFoldAndModelTasks(),
                new ParallelOptions {
                MaxDegreeOfParallelism = cores
            },
                foldTask => Parallel.ForEach(
                    foldTask(),
                    new ParallelOptions {
                MaxDegreeOfParallelism = cores
            },
                    modelTask => modelTask()
                    )
                );

            // fold level

/*
 *          Parallel.ForEach(validator.GetFoldTasks(), new ParallelOptions { MaxDegreeOfParallelism = cores }, t => t());
 */



            // for some serious workload better use SmartThreadPool
            // requires reference to package https://www.nuget.org/packages/SmartThreadPool.dll/

            var exceptions = new List <Exception>();

            // model level parallelization

/*
 *          var threadPool = new SmartThreadPool { MaxThreads = cores };
 *          foreach (System.Func<Action[]> foldTask in validator.GetFoldAndModelTasks())
 *          {
 *              System.Func<Action[]> ft = foldTask;
 *              threadPool.QueueWorkItem(o =>
 *              {
 *                  foreach (Action modelTask in ft())
 *                  {
 *                      Action mt = modelTask;
 *                      threadPool.QueueWorkItem(p =>
 *                      {
 *                          mt();
 *                          return null;
 *                      }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } });
 *                  }
 *                  return null;
 *              }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } });
 *          }
 *          threadPool.WaitForIdle();
 *          threadPool.Shutdown();
 */

            // fold level

/*
 *          var threadPool = new SmartThreadPool { MaxThreads = cores };
 *          foreach (Action foldTask in validator.GetFoldTasks())
 *          {
 *              Action ft = foldTask;
 *              threadPool.QueueWorkItem(o =>
 *              {
 *                  ft();
 *                  return null;
 *              }, null, wi => { if (wi.Exception != null) { exceptions.Add((Exception)wi.Exception); } });
 *          }
 *          threadPool.WaitForIdle();
 *          threadPool.Shutdown();
 */

            foreach (Exception exception in exceptions)
            {
                throw new Exception("Error during validation", exception);
            }



            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = validator.PerfData.GetSumPerfMatrix("", validator.GetModelName(0));

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            foreach (string label in validator.PerfData.GetLabels("", validator.GetModelName(0)))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 validator.PerfData.GetAvg("", validator.GetModelName(0), ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }
示例#6
0
        public override void Run(object[] args)
        {
            int foldCount = args.Any() ? (int)args[0] : 10;

            args = args.Skip(1).ToArray();

            // get classifier and labeled data
            BinarySvm classifierInst = BinarySvm.RunInstanceNull(args);
            var       classifier     = (SvmBinaryClassifier <string>)classifierInst.Result["classifier"];
            var       labeledData    = (LabeledDataset <string, SparseVector <double> >)classifierInst.Result["labeled_data"];

            bool stratified = true;

            // cross validation
            if (stratified)
            {
                labeledData.GroupLabels(true);
            }
            else
            {
                labeledData.Shuffle(new Random(1));
            }

            var perfData = new PerfData <string>();

            foreach (var g in labeledData.GroupBy(le => le.Label))
            {
                Output.WriteLine("total {0} {1}\t {2:0.00}", g.Key, g.Count(), (double)g.Count() / labeledData.Count);
            }

            Output.WriteLine("Performing {0}{1}-fold cross validation...", stratified ? "stratified " : "", foldCount);
            for (int i = 0; i < foldCount; i++)
            {
                int foldN = i + 1;
                LabeledDataset <string, SparseVector <double> > testSet;
                LabeledDataset <string, SparseVector <double> > trainSet;

                if (stratified)
                {
                    labeledData.SplitForStratifiedCrossValidation(foldCount, foldN, out trainSet, out testSet);
                }
                else
                {
                    labeledData.SplitForCrossValidation(foldCount, foldN, out trainSet, out testSet);
                }

                classifier.Train(trainSet);

                PerfMatrix <string> foldMatrix = perfData.GetPerfMatrix("tutorial", "binary svm", foldN);
                foreach (LabeledExample <string, SparseVector <double> > labeledExample in testSet)
                {
                    Prediction <string> prediction = classifier.Predict(labeledExample.Example);
                    foldMatrix.AddCount(labeledExample.Label, prediction.BestClassLabel);
                }
                Output.WriteLine("Accuracy for {0}-fold: {1:0.00}", foldN, foldMatrix.GetAccuracy());
            }

            Output.WriteLine("Sum confusion matrix:");
            PerfMatrix <string> sumPerfMatrix = perfData.GetSumPerfMatrix("tutorial", "binary svm");

            Output.WriteLine(sumPerfMatrix.ToString());
            Output.WriteLine("Average accuracy: {0:0.00}", sumPerfMatrix.GetAccuracy());
            Output.WriteLine();
            Output.WriteLine(sumPerfMatrix.ToString(new PerfMetric[] { }));
            Output.WriteLine(sumPerfMatrix.ToString(perfData.GetLabels("tutorial", "binary svm"), new OrdinalPerfMetric[] { }));
            Output.WriteLine(sumPerfMatrix.ToString(new ClassPerfMetric[] { }));

            foreach (string label in perfData.GetLabels("tutorial", "binary svm"))
            {
                double stdDev;
                Output.WriteLine("Precision for '{0}': {1:0.00} std. dev: {2:0.00}", label,
                                 perfData.GetAvg("tutorial", "binary svm", ClassPerfMetric.Precision, label, out stdDev), stdDev);
            }
        }