Contains scoring statistics for a given model.
Beispiel #1
0
        /// <summary>
        /// Combines and averages metrics across all the given scores.
        /// </summary>
        /// <param name="scores">Scores.</param>
        /// <returns></returns>
        public static Score CombineScores(params Score[] scores)
        {
            if (scores == null) return null;

            Score result = new Score();

            result.Accuracy = scores.Average(s => s.Accuracy);
            result.CoefRMSE = scores.Average(s => s.CoefRMSE);
            result.Examples = scores.Sum(s => s.Examples);

            result.MSE = scores.Sum(s => s.MSE);

            result.MeanAbsError = scores.Average(s => s.MeanAbsError);
            result.NormRMSE = scores.Average(s => s.NormRMSE);
            result.RMSE = scores.Average(s => s.RMSE);

            result.TotalNegatives = scores.Sum(s => s.TotalNegatives);
            result.TotalPositives = scores.Sum(s => s.TotalPositives);

            result.TrueNegatives = scores.Sum(s => s.TrueNegatives);
            result.TruePositives = scores.Sum(s => s.TruePositives);
            result.FalseNegatives = scores.Sum(s => s.FalseNegatives);
            result.FalsePositives = scores.Sum(s => s.FalsePositives);

            return result;
        }
Beispiel #2
0
 /// <summary>
 /// Computes the Root Mean Squared Error for the given inputs.
 /// </summary>
 /// <param name="y1">Predicted values.</param>
 /// <param name="y2">Actual values.</param>
 /// <returns>Double.</returns>
 public static double ComputeRMSE(Vector y1, Vector y2)
 {
     return(System.Math.Sqrt(Score.ComputeMSE(y1, y2)));
 }
Beispiel #3
0
        /// <summary>
        /// Generate a multi-class classification model using a specialist classifier for each class label.
        /// </summary>
        /// <param name="generator">The generator to use for each individual classifier.</param>
        /// <param name="examples">Training examples of any number of classes</param>
        /// <param name="trainingPercentage">Percentage of training examples to use, i.e. 70% = 0.7</param>
        /// <param name="mixingPercentage">Percentage to mix positive and negative exmaples, i.e. 50% will add an additional 50% of 
        ///   <paramref name="trainingPercentage"/> of negative examples into each classifier when training.</param>
        /// <param name="isMultiClass">Determines whether each class is mutually inclusive. 
        ///   <para>For example: If True, each class takes on a number of classes and does not necessarily belong to one specific class.</para>
        ///   <para>The ouput would then be a number of predicted classes for a single prediction.  E.g. A song would be True as it may belong to classes: vocals, rock as well as bass.</para>
        /// </param>
        /// <returns></returns>
        public static ClassificationModel Learn(IGenerator generator, IEnumerable<object> examples, double trainingPercentage, double mixingPercentage = 0.5, bool isMultiClass = true)
        {
            Descriptor descriptor = generator.Descriptor;

            trainingPercentage = (trainingPercentage > 1.0 ? trainingPercentage / 100 : trainingPercentage);
            mixingPercentage = (mixingPercentage > 1.0 ? mixingPercentage / 100 : mixingPercentage);

            var classGroups = examples.Select(s => new
                                                {
                                                    Label = generator.Descriptor.GetValue(s, descriptor.Label),
                                                    Item = s
                                                })
                                       .GroupBy(g => g.Label)
                                       .ToDictionary(k => k.Key, v => v.Select(s => s.Item).ToArray());

            int classes = classGroups.Count();

            Dictionary<object, IClassifier> models = null;

            Score finalScore = new Score();

            if (classes > 2)
            {
                models = new Dictionary<object, IClassifier>(classes);

                Task<Tuple<IClassifier, Score, object>>[] learningTasks = new Task<Tuple<IClassifier, Score, object>>[classes];

                for (int y = 0; y < classes; y++)
                {
                    models.Add(classGroups.ElementAt(y).Key, null);

                    int mix = (int)System.Math.Ceiling(((classGroups.ElementAt(y).Value.Count() * trainingPercentage) * mixingPercentage) / classes);
                    object label = classGroups.ElementAt(y).Key;
                    object[] truthExamples = classGroups.ElementAt(y).Value;
                    object[] falseExamples = classGroups.Where(w => w.Key != classGroups.Keys.ElementAt(y))
                                                        .SelectMany(s => s.Value.Take(mix).ToArray())
                                                        .ToArray();

                    learningTasks[y] = Task.Factory.StartNew(
                            () => MultiClassLearner.GenerateModel(generator, truthExamples, falseExamples, label, trainingPercentage, label)
                        );
                }

                Task.WaitAll(learningTasks);

                Score[] scores = new Score[learningTasks.Count()];

                for (int c = 0; c < learningTasks.Count(); c++)
                {
                    models[learningTasks[c].Result.Item3] = learningTasks[c].Result.Item1;
                    scores[c] = learningTasks[c].Result.Item2;
                }

                finalScore = Score.CombineScores(scores);
            }
            else
            {
                // fallback to single classifier for two class classification

                var dataset = descriptor.Convert(examples, true).ToExamples();
                var positives = examples.Slice(dataset.Item2.Indices(f => f == 1d)).ToArray();
                var negatives = examples.Slice(dataset.Item2.Indices(w => w != 1d)).ToArray();

                var label = generator.Descriptor.GetValue(positives.First(), descriptor.Label);

                var model = MultiClassLearner.GenerateModel(generator, positives, negatives, label, trainingPercentage, label);
                finalScore = model.Item2;

                models = new Dictionary<object, IClassifier>() { { label, model.Item1 } };
            }

            ClassificationModel classificationModel = new ClassificationModel()
            {
                Generator = generator,
                Classifiers = models,
                IsMultiClass = isMultiClass,
                Score = finalScore
            };

            return classificationModel;
        }
Beispiel #4
0
        /// <summary>
        /// Generates and returns a new Tuple of objects: IClassifier, Score and object state
        /// </summary>
        /// <param name="generator">Generator to use for the model.</param>
        /// <param name="truthExamples">True examples.</param>
        /// <param name="falseExamples">False examples.</param>
        /// <param name="truthLabel">Truth label object.</param>
        /// <param name="trainingPct">Training percentage.</param>
        /// <param name="state">Object state</param>
        /// <returns></returns>
        private static Tuple<IClassifier, Score, object> GenerateModel(IGenerator generator, object[] truthExamples, object[] falseExamples, 
                                                                                object truthLabel, double trainingPct, object state = null)
        {
            Descriptor descriptor = generator.Descriptor;

            object[] examples = truthExamples.Union(falseExamples).Shuffle().ToArray(); // changed from .Shuffle()

            int total = examples.Count();

            int trainingCount = (int)System.Math.Floor((double)total * trainingPct);

            //// 100 - trainingPercentage for testing
            int[] testingSlice = Learner.GetTestPoints(total - trainingCount, total).ToArray();
            int[] trainingSlice = Learner.GetTrainingPoints(testingSlice, total).ToArray();

            var training = generator.Descriptor.Convert(examples.Slice(trainingSlice).ToArray(), true).ToExamples();

            // convert label to 1's and 0's
            Vector y = MultiClassLearner.ChangeClassLabels(examples.ToArray(), descriptor, truthLabel);

            IModel model = generator.Generate(training.Item1, y.Slice(trainingSlice));

            Score score = new Score();

            if (testingSlice.Count() > 0)
            {
                object[] testExamples = examples.Slice(testingSlice).ToArray();
                var testing = generator.Descriptor.Convert(testExamples, true).ToExamples();

                Vector y_pred = new Vector(testExamples.Length);

                // make sure labels are 1 / 0 based
                Vector y_test = MultiClassLearner.ChangeClassLabels(testExamples.ToArray(), descriptor, truthLabel);

                for (int i = 0; i < testExamples.Length; i++)
                {
                    double result = model.Predict(testing.Item1[i, VectorType.Row]);

                    y_pred[i] = result;
                }

                score = Score.ScorePredictions(y_pred, y_test);
            }
            return new Tuple<IClassifier, Score, object>((IClassifier)model, score, state);
        }
Beispiel #5
0
        /// <summary>
        /// Trains a single model based on a generator a predefined number of times with the provided
        /// examples and data split and selects the best (or most accurate) model.
        /// </summary>
        /// <param name="examples">Source data.</param>
        /// <param name="trainingPercentage">Data split percentage.</param>
        /// <param name="repeat">Number of repetitions per generator.</param>
        /// <param name="generator">Model generator used.</param>
        /// <returns>Best model for provided generator.</returns>
        public static LearningModel Learn(IEnumerable<object> examples, double trainingPercentage, int repeat, IGenerator generator)
        {
            // count only once
            var total = examples.Count();
            var descriptor = generator.Descriptor;
            var data = descriptor.Convert(examples).ToExamples();

            Matrix x = data.Item1;
            Vector y = data.Item2;

            var models = new IModel[repeat];
            //var accuracy = Vector.Zeros(repeat);
            var scores = new Score[repeat];

            if (trainingPercentage > 1.0) trainingPercentage /= 100.0;

            // safe for parallisation
            // read-only references to the data model
            // and update indices independently
            for (int i = 0; i < models.Length; i++)
            {
                var t = GenerateModel(generator, x, y, examples, trainingPercentage, total);
                models[i] = t.Model;
                scores[i] = t.Score;
            }

            int idx = scores.Select(s => s.RMSE).MinIndex();

            // sanity check, for convergence failures
            if (idx < 0 && trainingPercentage < 1d) throw new Exception("All models failed to initialize properly");
            else if (idx < 0) idx = 0;

            return new LearningModel { Generator = generator, Model = models[idx], Score = scores[idx] };
        }
Beispiel #6
0
        /// <summary>Generates a model.</summary>
        /// <param name="generator">Model generator used.</param>
        /// <param name="x">The Matrix to process.</param>
        /// <param name="y">The Vector to process.</param>
        /// <param name="examples">Source data.</param>
        /// <param name="trainingPct">The training pct.</param>
        /// <param name="total">Number of Examples</param>
        /// <returns>The model.</returns>
        private static LearningModel GenerateModel(IGenerator generator, Matrix x, Vector y, IEnumerable<object> examples, double trainingPct, int total)
        {
            var descriptor = generator.Descriptor;
            //var total = examples.Count();
            var trainingCount = (int)System.Math.Floor(total * trainingPct);

            // 100 - trainingPercentage for testing
            var testingSlice = GetTestPoints(total - trainingCount, total).ToArray();

            // trainingPercentage for training
            var trainingSlice = GetTrainingPoints(testingSlice, total).ToArray();

            // training
            var x_t = x.Slice(trainingSlice);
            var y_t = y.Slice(trainingSlice);

            // generate model
            var model = generator.Generate(x_t, y_t);
            model.Descriptor = descriptor;

            Score score = new Score();

            if (testingSlice.Count() > 0)
            {
                // testing
                object[] test = GetTestExamples(testingSlice, examples);
                Vector y_pred = new Vector(test.Length);
                Vector y_test = descriptor.ToExamples(test).Item2;

                bool isBinary = y_test.IsBinary();
                if (isBinary)
                    y_test = y_test.ToBinary(f => f == 1d, 1.0, 0.0);

                for (int j = 0; j < test.Length; j++)
                {
                    // items under test
                    object o = test[j];

                    // make prediction
                    var features = descriptor.Convert(o, false).ToVector();
                    // --- temp changes ---
                    double val = model.Predict(features);
                    var pred = descriptor.Label.Convert(val);

                    var truth = Ject.Get(o, descriptor.Label.Name);

                    if (truth.Equals(pred))
                        y_pred[j] = y_test[j];
                    else
                        y_pred[j] = (isBinary ? (y_test[j] >= 1d ? 0d : 1d) : val);
                }

                // score predictions
                score = Score.ScorePredictions(y_pred, y_test);
            }

            return new LearningModel { Generator = generator, Model = model, Score = score };
        }