Exemple #1
0
        /// <summary>
        /// Cross validated predictions.
        /// Only crossValidates within the provided indices.
        /// The predictions are returned in the predictions array.
        /// </summary>
        /// <param name="learner"></param>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="crossValidationIndices"></param>
        /// <param name="crossValidatedPredictions"></param>
        public void CrossValidate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets, int[] crossValidationIndices, TPrediction[] crossValidatedPredictions)
        {
            var rows = crossValidatedPredictions.Length;

            if (m_crossValidationFolds > rows)
            {
                throw new ArgumentException("Too few observations: " + rows +
                                            " for number of cross validation folds: " + m_crossValidationFolds);
            }

            var holdOutSamples = new int[m_crossValidationFolds][];
            var samplesPrFold  = rows / m_crossValidationFolds;

            var indices = crossValidationIndices.ToArray();

            // Map the provided crossValidationIndices to crossValidatedPredictions
            // Indices from crossValidationIndices can be larger than crossValidatedPredictions length
            // since crossValidatedPredictions might be a subset of the provided observations and targets
            var cvPredictionIndiceMap = Enumerable.Range(0, crossValidatedPredictions.Length)
                                        .ToDictionary(i => indices[i], i => i);

            for (int i = 0; i < m_crossValidationFolds; i++)
            {
                if (i == m_crossValidationFolds - 1)
                {
                    // last fold. Add remaining indices.
                    holdOutSamples[i] = indices.ToArray();
                }
                else
                {
                    var holdoutSample = m_indexedSampler.Sample(targets, samplesPrFold, indices);
                    holdOutSamples[i] = holdoutSample;
                    indices           = indices.Except(holdoutSample).ToArray();
                }
            }

            var observation = new double[observations.ColumnCount];

            for (int i = 0; i < m_crossValidationFolds; i++)
            {
                var holdoutIndices  = holdOutSamples[i];
                var trainingIndices = crossValidationIndices.Except(holdoutIndices).ToArray();
                var model           = learner.Learn(observations, targets, trainingIndices);
                var predictions     = new TPrediction[holdoutIndices.Length];

                for (int l = 0; l < predictions.Length; l++)
                {
                    observations.Row(holdoutIndices[l], observation);
                    predictions[l] = model.Predict(observation);
                }

                for (int j = 0; j < holdoutIndices.Length; j++)
                {
                    crossValidatedPredictions[cvPredictionIndiceMap[holdoutIndices[j]]] = predictions[j];
                }

                ModelDisposer.DisposeIfDisposable(model);
            }
        }
        /// <summary>
        /// Returns a list of BiasVarianceLearningCurvePoints for constructing learning curves.
        /// The points contain sample size, training score and validation score.
        /// </summary>
        /// <param name="learner"></param>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="trainingIndices">Indices that should be used for training</param>
        /// <param name="validationIndices">Indices that should be used for validation</param>
        /// <returns></returns>
        public List <LearningCurvePoint> Calculate(IIndexedLearner <TPrediction> learner,
                                                   F64Matrix observations, double[] targets, int[] trainingIndices, int[] validationIndices)
        {
            var learningCurves = new List <LearningCurvePoint>();

            var validationTargets     = targets.GetIndices(validationIndices);
            var validationPredictions = new TPrediction[validationTargets.Length];

            foreach (var samplePercentage in m_samplePercentages)
            {
                if (samplePercentage <= 0.0 || samplePercentage > 1.0)
                {
                    throw new ArgumentException("Sample percentage must be larger than 0.0 and smaller than or equal to 1.0");
                }

                var sampleSize = (int)Math.Round(samplePercentage * (double)trainingIndices.Length);
                if (sampleSize <= 0)
                {
                    throw new ArgumentException("Sample percentage " + samplePercentage +
                                                " too small for training set size " + trainingIndices.Length);
                }

                var trainError      = 0.0;
                var validationError = 0.0;

                var trainingPredictions = new TPrediction[sampleSize];

                for (int j = 0; j < m_numberOfShufflesPrSample; j++)
                {
                    var sampleIndices = m_indexedSampler.Sample(targets, sampleSize, trainingIndices);
                    var model         = learner.Learn(observations, targets, sampleIndices);

                    for (int i = 0; i < trainingPredictions.Length; i++)
                    {
                        trainingPredictions[i] = model.Predict(observations.Row(sampleIndices[i]));
                    }

                    for (int i = 0; i < validationIndices.Length; i++)
                    {
                        validationPredictions[i] = model.Predict(observations.Row(validationIndices[i]));
                    }

                    var sampleTargets = targets.GetIndices(sampleIndices);
                    trainError      += m_metric.Error(sampleTargets, trainingPredictions);
                    validationError += m_metric.Error(validationTargets, validationPredictions);

                    ModelDisposer.DisposeIfDisposable(model);
                }

                trainError      = trainError / m_numberOfShufflesPrSample;
                validationError = validationError / m_numberOfShufflesPrSample;

                learningCurves.Add(new LearningCurvePoint(sampleSize,
                                                          trainError, validationError));
            }

            return(learningCurves);
        }
Exemple #3
0
        /// <summary>
        /// Time series cross-validation. Based on rolling validation using the original order of the data.
        /// Using the specified initial size of the training set, a model is trained.
        /// The model predicts the first observation following the training data.
        /// Following, this data point is included in the training and a new model is trained,
        /// which predict the next observation. This continuous until all observations following the initial training size,
        /// has been validated.
        /// </summary>
        /// <param name="learner"></param>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <returns>The validated predictions, following the initial training size</returns>
        public TPrediction[] Validate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets)
        {
            if (observations.RowCount != targets.Length)
            {
                throw new ArgumentException($"observation row count {observations.RowCount} " +
                                            $"must match target length {targets.Length}");
            }

            if (m_initialTrainingSize >= observations.RowCount)
            {
                throw new ArgumentException($"observation row count {observations.RowCount} " +
                                            $"is smaller than initial training size {m_initialTrainingSize}");
            }

            var trainingIndices  = Enumerable.Range(0, m_initialTrainingSize).ToArray();
            var predictionLength = targets.Length - trainingIndices.Length;
            var predictions      = new TPrediction[predictionLength];

            var observation       = new double[observations.ColumnCount];
            var lastTrainingIndex = trainingIndices.Last();

            var model = learner.Learn(observations, targets, trainingIndices);

            for (int i = 0; i < predictions.Length; i++)
            {
                // Only train a new model at each retrain interval.
                if (((m_retrainInterval == 1) || ((i % m_retrainInterval) == 0)) && (i != 0))
                {
                    model = learner.Learn(observations, targets, trainingIndices);
                }

                var predictionIndex = lastTrainingIndex + 1;
                observations.Row(predictionIndex, observation);
                predictions[i] = model.Predict(observation);

                lastTrainingIndex++;

                // determine start index and length of the training period, if maxTrainingSetSize is specified.
                var startIndex = m_maxTrainingSetSize != 0 ?
                                 Math.Max(0, (lastTrainingIndex + 1) - m_maxTrainingSetSize) : 0;

                var length = m_maxTrainingSetSize != 0 ?
                             Math.Min(m_maxTrainingSetSize, lastTrainingIndex) : lastTrainingIndex;

                trainingIndices = Enumerable.Range(startIndex, length).ToArray();

                ModelDisposer.DisposeIfDisposable(model);
            }

            return(predictions);
        }
        /// <summary>
        /// Cross validated predictions.
        /// Only crossValidates within the provided indices.
        /// The predictions are returned in the predictions array.
        /// </summary>
        /// <param name="learner"></param>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="crossValidationIndices"></param>
        /// <param name="crossValidatedPredictions"></param>
        public void CrossValidate(IIndexedLearner <TPrediction> learner,
                                  F64Matrix observations,
                                  double[] targets,
                                  int[] crossValidationIndices,
                                  TPrediction[] crossValidatedPredictions)
        {
            var rows = crossValidatedPredictions.Length;

            if (m_crossValidationFolds > rows)
            {
                throw new ArgumentException("Too few observations: " + rows +
                                            " for number of cross validation folds: " + m_crossValidationFolds);
            }

            var indices = crossValidationIndices.ToArray();

            // Map the provided crossValidationIndices to crossValidatedPredictions
            // Indices from crossValidationIndices can be larger than crossValidatedPredictions length
            // since crossValidatedPredictions might be a subset of the provided observations and targets
            var cvPredictionIndiceMap = Enumerable.Range(0, crossValidatedPredictions.Length)
                                        .ToDictionary(i => indices[i], i => i);

            var crossValidationIndexSets = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(
                m_indexedSampler, m_crossValidationFolds, targets, indices);

            var observation = new double[observations.ColumnCount];

            foreach (var(trainingIndices, validationIndices) in crossValidationIndexSets)
            {
                var model       = learner.Learn(observations, targets, trainingIndices);
                var predictions = new TPrediction[validationIndices.Length];

                for (int l = 0; l < predictions.Length; l++)
                {
                    observations.Row(validationIndices[l], observation);
                    predictions[l] = model.Predict(observation);
                }

                for (int j = 0; j < validationIndices.Length; j++)
                {
                    crossValidatedPredictions[cvPredictionIndiceMap[validationIndices[j]]] = predictions[j];
                }

                ModelDisposer.DisposeIfDisposable(model);
            }
        }