/// <summary> /// Combines and averages metrics across all the given scores. /// </summary> /// <param name="scores">Scores.</param> /// <returns></returns> public static Score CombineScores(params Score[] scores) { if (scores == null) return null; Score result = new Score(); result.Accuracy = scores.Average(s => s.Accuracy); result.CoefRMSE = scores.Average(s => s.CoefRMSE); result.Examples = scores.Sum(s => s.Examples); result.MSE = scores.Sum(s => s.MSE); result.MeanAbsError = scores.Average(s => s.MeanAbsError); result.NormRMSE = scores.Average(s => s.NormRMSE); result.RMSE = scores.Average(s => s.RMSE); result.TotalNegatives = scores.Sum(s => s.TotalNegatives); result.TotalPositives = scores.Sum(s => s.TotalPositives); result.TrueNegatives = scores.Sum(s => s.TrueNegatives); result.TruePositives = scores.Sum(s => s.TruePositives); result.FalseNegatives = scores.Sum(s => s.FalseNegatives); result.FalsePositives = scores.Sum(s => s.FalsePositives); return result; }
/// <summary> /// Computes the Root Mean Squared Error for the given inputs. /// </summary> /// <param name="y1">Predicted values.</param> /// <param name="y2">Actual values.</param> /// <returns>Double.</returns> public static double ComputeRMSE(Vector y1, Vector y2) { return(System.Math.Sqrt(Score.ComputeMSE(y1, y2))); }
/// <summary> /// Generate a multi-class classification model using a specialist classifier for each class label. /// </summary> /// <param name="generator">The generator to use for each individual classifier.</param> /// <param name="examples">Training examples of any number of classes</param> /// <param name="trainingPercentage">Percentage of training examples to use, i.e. 70% = 0.7</param> /// <param name="mixingPercentage">Percentage to mix positive and negative exmaples, i.e. 50% will add an additional 50% of /// <paramref name="trainingPercentage"/> of negative examples into each classifier when training.</param> /// <param name="isMultiClass">Determines whether each class is mutually inclusive. /// <para>For example: If True, each class takes on a number of classes and does not necessarily belong to one specific class.</para> /// <para>The ouput would then be a number of predicted classes for a single prediction. E.g. A song would be True as it may belong to classes: vocals, rock as well as bass.</para> /// </param> /// <returns></returns> public static ClassificationModel Learn(IGenerator generator, IEnumerable<object> examples, double trainingPercentage, double mixingPercentage = 0.5, bool isMultiClass = true) { Descriptor descriptor = generator.Descriptor; trainingPercentage = (trainingPercentage > 1.0 ? trainingPercentage / 100 : trainingPercentage); mixingPercentage = (mixingPercentage > 1.0 ? mixingPercentage / 100 : mixingPercentage); var classGroups = examples.Select(s => new { Label = generator.Descriptor.GetValue(s, descriptor.Label), Item = s }) .GroupBy(g => g.Label) .ToDictionary(k => k.Key, v => v.Select(s => s.Item).ToArray()); int classes = classGroups.Count(); Dictionary<object, IClassifier> models = null; Score finalScore = new Score(); if (classes > 2) { models = new Dictionary<object, IClassifier>(classes); Task<Tuple<IClassifier, Score, object>>[] learningTasks = new Task<Tuple<IClassifier, Score, object>>[classes]; for (int y = 0; y < classes; y++) { models.Add(classGroups.ElementAt(y).Key, null); int mix = (int)System.Math.Ceiling(((classGroups.ElementAt(y).Value.Count() * trainingPercentage) * mixingPercentage) / classes); object label = classGroups.ElementAt(y).Key; object[] truthExamples = classGroups.ElementAt(y).Value; object[] falseExamples = classGroups.Where(w => w.Key != classGroups.Keys.ElementAt(y)) .SelectMany(s => s.Value.Take(mix).ToArray()) .ToArray(); learningTasks[y] = Task.Factory.StartNew( () => MultiClassLearner.GenerateModel(generator, truthExamples, falseExamples, label, trainingPercentage, label) ); } Task.WaitAll(learningTasks); Score[] scores = new Score[learningTasks.Count()]; for (int c = 0; c < learningTasks.Count(); c++) { models[learningTasks[c].Result.Item3] = learningTasks[c].Result.Item1; scores[c] = learningTasks[c].Result.Item2; } finalScore = Score.CombineScores(scores); } else { // fallback to single classifier for two class classification var dataset = descriptor.Convert(examples, true).ToExamples(); var positives = examples.Slice(dataset.Item2.Indices(f => f == 1d)).ToArray(); var negatives = examples.Slice(dataset.Item2.Indices(w => w != 1d)).ToArray(); var label = generator.Descriptor.GetValue(positives.First(), descriptor.Label); var model = MultiClassLearner.GenerateModel(generator, positives, negatives, label, trainingPercentage, label); finalScore = model.Item2; models = new Dictionary<object, IClassifier>() { { label, model.Item1 } }; } ClassificationModel classificationModel = new ClassificationModel() { Generator = generator, Classifiers = models, IsMultiClass = isMultiClass, Score = finalScore }; return classificationModel; }
/// <summary> /// Generates and returns a new Tuple of objects: IClassifier, Score and object state /// </summary> /// <param name="generator">Generator to use for the model.</param> /// <param name="truthExamples">True examples.</param> /// <param name="falseExamples">False examples.</param> /// <param name="truthLabel">Truth label object.</param> /// <param name="trainingPct">Training percentage.</param> /// <param name="state">Object state</param> /// <returns></returns> private static Tuple<IClassifier, Score, object> GenerateModel(IGenerator generator, object[] truthExamples, object[] falseExamples, object truthLabel, double trainingPct, object state = null) { Descriptor descriptor = generator.Descriptor; object[] examples = truthExamples.Union(falseExamples).Shuffle().ToArray(); // changed from .Shuffle() int total = examples.Count(); int trainingCount = (int)System.Math.Floor((double)total * trainingPct); //// 100 - trainingPercentage for testing int[] testingSlice = Learner.GetTestPoints(total - trainingCount, total).ToArray(); int[] trainingSlice = Learner.GetTrainingPoints(testingSlice, total).ToArray(); var training = generator.Descriptor.Convert(examples.Slice(trainingSlice).ToArray(), true).ToExamples(); // convert label to 1's and 0's Vector y = MultiClassLearner.ChangeClassLabels(examples.ToArray(), descriptor, truthLabel); IModel model = generator.Generate(training.Item1, y.Slice(trainingSlice)); Score score = new Score(); if (testingSlice.Count() > 0) { object[] testExamples = examples.Slice(testingSlice).ToArray(); var testing = generator.Descriptor.Convert(testExamples, true).ToExamples(); Vector y_pred = new Vector(testExamples.Length); // make sure labels are 1 / 0 based Vector y_test = MultiClassLearner.ChangeClassLabels(testExamples.ToArray(), descriptor, truthLabel); for (int i = 0; i < testExamples.Length; i++) { double result = model.Predict(testing.Item1[i, VectorType.Row]); y_pred[i] = result; } score = Score.ScorePredictions(y_pred, y_test); } return new Tuple<IClassifier, Score, object>((IClassifier)model, score, state); }
/// <summary> /// Trains a single model based on a generator a predefined number of times with the provided /// examples and data split and selects the best (or most accurate) model. /// </summary> /// <param name="examples">Source data.</param> /// <param name="trainingPercentage">Data split percentage.</param> /// <param name="repeat">Number of repetitions per generator.</param> /// <param name="generator">Model generator used.</param> /// <returns>Best model for provided generator.</returns> public static LearningModel Learn(IEnumerable<object> examples, double trainingPercentage, int repeat, IGenerator generator) { // count only once var total = examples.Count(); var descriptor = generator.Descriptor; var data = descriptor.Convert(examples).ToExamples(); Matrix x = data.Item1; Vector y = data.Item2; var models = new IModel[repeat]; //var accuracy = Vector.Zeros(repeat); var scores = new Score[repeat]; if (trainingPercentage > 1.0) trainingPercentage /= 100.0; // safe for parallisation // read-only references to the data model // and update indices independently for (int i = 0; i < models.Length; i++) { var t = GenerateModel(generator, x, y, examples, trainingPercentage, total); models[i] = t.Model; scores[i] = t.Score; } int idx = scores.Select(s => s.RMSE).MinIndex(); // sanity check, for convergence failures if (idx < 0 && trainingPercentage < 1d) throw new Exception("All models failed to initialize properly"); else if (idx < 0) idx = 0; return new LearningModel { Generator = generator, Model = models[idx], Score = scores[idx] }; }
/// <summary>Generates a model.</summary> /// <param name="generator">Model generator used.</param> /// <param name="x">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <param name="examples">Source data.</param> /// <param name="trainingPct">The training pct.</param> /// <param name="total">Number of Examples</param> /// <returns>The model.</returns> private static LearningModel GenerateModel(IGenerator generator, Matrix x, Vector y, IEnumerable<object> examples, double trainingPct, int total) { var descriptor = generator.Descriptor; //var total = examples.Count(); var trainingCount = (int)System.Math.Floor(total * trainingPct); // 100 - trainingPercentage for testing var testingSlice = GetTestPoints(total - trainingCount, total).ToArray(); // trainingPercentage for training var trainingSlice = GetTrainingPoints(testingSlice, total).ToArray(); // training var x_t = x.Slice(trainingSlice); var y_t = y.Slice(trainingSlice); // generate model var model = generator.Generate(x_t, y_t); model.Descriptor = descriptor; Score score = new Score(); if (testingSlice.Count() > 0) { // testing object[] test = GetTestExamples(testingSlice, examples); Vector y_pred = new Vector(test.Length); Vector y_test = descriptor.ToExamples(test).Item2; bool isBinary = y_test.IsBinary(); if (isBinary) y_test = y_test.ToBinary(f => f == 1d, 1.0, 0.0); for (int j = 0; j < test.Length; j++) { // items under test object o = test[j]; // make prediction var features = descriptor.Convert(o, false).ToVector(); // --- temp changes --- double val = model.Predict(features); var pred = descriptor.Label.Convert(val); var truth = Ject.Get(o, descriptor.Label.Name); if (truth.Equals(pred)) y_pred[j] = y_test[j]; else y_pred[j] = (isBinary ? (y_test[j] >= 1d ? 0d : 1d) : val); } // score predictions score = Score.ScorePredictions(y_pred, y_test); } return new LearningModel { Generator = generator, Model = model, Score = score }; }