Beispiel #1
0
        /// <summary>
        /// Trains a single model based on a generator a predefined number of times with the provided
        /// examples and data split and selects the best (or most accurate) model.
        /// </summary>
        /// <param name="examples">Source data.</param>
        /// <param name="trainingPercentage">Data split percentage.</param>
        /// <param name="repeat">Number of repetitions per generator.</param>
        /// <param name="generator">Model generator used.</param>
        /// <returns>Best model for provided generator.</returns>
        public static LearningModel Learn(IEnumerable<object> examples, double trainingPercentage, int repeat, IGenerator generator)
        {
            // count only once
            var total = examples.Count();
            var descriptor = generator.Descriptor;
            var data = descriptor.Convert(examples).ToExamples();

            Matrix x = data.Item1;
            Vector y = data.Item2;

            var models = new IModel[repeat];
            //var accuracy = Vector.Zeros(repeat);
            var scores = new Score[repeat];

            if (trainingPercentage > 1.0) trainingPercentage /= 100.0;

            // safe for parallisation
            // read-only references to the data model
            // and update indices independently
            for (int i = 0; i < models.Length; i++)
            {
                var t = GenerateModel(generator, x, y, examples, trainingPercentage, total);
                models[i] = t.Model;
                scores[i] = t.Score;
            }

            int idx = scores.Select(s => s.RMSE).MinIndex();

            // sanity check, for convergence failures
            if (idx < 0 && trainingPercentage < 1d) throw new Exception("All models failed to initialize properly");
            else if (idx < 0) idx = 0;

            return new LearningModel { Generator = generator, Model = models[idx], Score = scores[idx] };
        }