/// <summary> /// Trains a single model based on a generator a predefined number of times with the provided /// examples and data split and selects the best (or most accurate) model. /// </summary> /// <param name="examples">Source data.</param> /// <param name="trainingPercentage">Data split percentage.</param> /// <param name="repeat">Number of repetitions per generator.</param> /// <param name="generator">Model generator used.</param> /// <returns>Best model for provided generator.</returns> public static LearningModel Learn(IEnumerable<object> examples, double trainingPercentage, int repeat, IGenerator generator) { // count only once var total = examples.Count(); var descriptor = generator.Descriptor; var data = descriptor.Convert(examples).ToExamples(); Matrix x = data.Item1; Vector y = data.Item2; var models = new IModel[repeat]; //var accuracy = Vector.Zeros(repeat); var scores = new Score[repeat]; if (trainingPercentage > 1.0) trainingPercentage /= 100.0; // safe for parallisation // read-only references to the data model // and update indices independently for (int i = 0; i < models.Length; i++) { var t = GenerateModel(generator, x, y, examples, trainingPercentage, total); models[i] = t.Model; scores[i] = t.Score; } int idx = scores.Select(s => s.RMSE).MinIndex(); // sanity check, for convergence failures if (idx < 0 && trainingPercentage < 1d) throw new Exception("All models failed to initialize properly"); else if (idx < 0) idx = 0; return new LearningModel { Generator = generator, Model = models[idx], Score = scores[idx] }; }