/// <summary> /// Cross validated predictions. /// Only crossValidates within the provided indices. /// The predictions are returned in the predictions array. /// </summary> /// <param name="learner"></param> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="crossValidationIndices"></param> /// <param name="crossValidatedPredictions"></param> public void CrossValidate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets, int[] crossValidationIndices, TPrediction[] crossValidatedPredictions) { var rows = crossValidatedPredictions.Length; if (m_crossValidationFolds > rows) { throw new ArgumentException("Too few observations: " + rows + " for number of cross validation folds: " + m_crossValidationFolds); } var holdOutSamples = new int[m_crossValidationFolds][]; var samplesPrFold = rows / m_crossValidationFolds; var indices = crossValidationIndices.ToArray(); // Map the provided crossValidationIndices to crossValidatedPredictions // Indices from crossValidationIndices can be larger than crossValidatedPredictions length // since crossValidatedPredictions might be a subset of the provided observations and targets var cvPredictionIndiceMap = Enumerable.Range(0, crossValidatedPredictions.Length) .ToDictionary(i => indices[i], i => i); for (int i = 0; i < m_crossValidationFolds; i++) { if (i == m_crossValidationFolds - 1) { // last fold. Add remaining indices. holdOutSamples[i] = indices.ToArray(); } else { var holdoutSample = m_indexedSampler.Sample(targets, samplesPrFold, indices); holdOutSamples[i] = holdoutSample; indices = indices.Except(holdoutSample).ToArray(); } } var observation = new double[observations.ColumnCount]; for (int i = 0; i < m_crossValidationFolds; i++) { var holdoutIndices = holdOutSamples[i]; var trainingIndices = crossValidationIndices.Except(holdoutIndices).ToArray(); var model = learner.Learn(observations, targets, trainingIndices); var predictions = new TPrediction[holdoutIndices.Length]; for (int l = 0; l < predictions.Length; l++) { observations.Row(holdoutIndices[l], observation); predictions[l] = model.Predict(observation); } for (int j = 0; j < holdoutIndices.Length; j++) { crossValidatedPredictions[cvPredictionIndiceMap[holdoutIndices[j]]] = predictions[j]; } ModelDisposer.DisposeIfDisposable(model); } }
/// <summary> /// Returns a list of BiasVarianceLearningCurvePoints for constructing learning curves. /// The points contain sample size, training score and validation score. /// </summary> /// <param name="learner"></param> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="trainingIndices">Indices that should be used for training</param> /// <param name="validationIndices">Indices that should be used for validation</param> /// <returns></returns> public List <LearningCurvePoint> Calculate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets, int[] trainingIndices, int[] validationIndices) { var learningCurves = new List <LearningCurvePoint>(); var validationTargets = targets.GetIndices(validationIndices); var validationPredictions = new TPrediction[validationTargets.Length]; foreach (var samplePercentage in m_samplePercentages) { if (samplePercentage <= 0.0 || samplePercentage > 1.0) { throw new ArgumentException("Sample percentage must be larger than 0.0 and smaller than or equal to 1.0"); } var sampleSize = (int)Math.Round(samplePercentage * (double)trainingIndices.Length); if (sampleSize <= 0) { throw new ArgumentException("Sample percentage " + samplePercentage + " too small for training set size " + trainingIndices.Length); } var trainError = 0.0; var validationError = 0.0; var trainingPredictions = new TPrediction[sampleSize]; for (int j = 0; j < m_numberOfShufflesPrSample; j++) { var sampleIndices = m_indexedSampler.Sample(targets, sampleSize, trainingIndices); var model = learner.Learn(observations, targets, sampleIndices); for (int i = 0; i < trainingPredictions.Length; i++) { trainingPredictions[i] = model.Predict(observations.Row(sampleIndices[i])); } for (int i = 0; i < validationIndices.Length; i++) { validationPredictions[i] = model.Predict(observations.Row(validationIndices[i])); } var sampleTargets = targets.GetIndices(sampleIndices); trainError += m_metric.Error(sampleTargets, trainingPredictions); validationError += m_metric.Error(validationTargets, validationPredictions); ModelDisposer.DisposeIfDisposable(model); } trainError = trainError / m_numberOfShufflesPrSample; validationError = validationError / m_numberOfShufflesPrSample; learningCurves.Add(new LearningCurvePoint(sampleSize, trainError, validationError)); } return(learningCurves); }
/// <summary> /// Time series cross-validation. Based on rolling validation using the original order of the data. /// Using the specified initial size of the training set, a model is trained. /// The model predicts the first observation following the training data. /// Following, this data point is included in the training and a new model is trained, /// which predict the next observation. This continuous until all observations following the initial training size, /// has been validated. /// </summary> /// <param name="learner"></param> /// <param name="observations"></param> /// <param name="targets"></param> /// <returns>The validated predictions, following the initial training size</returns> public TPrediction[] Validate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets) { if (observations.RowCount != targets.Length) { throw new ArgumentException($"observation row count {observations.RowCount} " + $"must match target length {targets.Length}"); } if (m_initialTrainingSize >= observations.RowCount) { throw new ArgumentException($"observation row count {observations.RowCount} " + $"is smaller than initial training size {m_initialTrainingSize}"); } var trainingIndices = Enumerable.Range(0, m_initialTrainingSize).ToArray(); var predictionLength = targets.Length - trainingIndices.Length; var predictions = new TPrediction[predictionLength]; var observation = new double[observations.ColumnCount]; var lastTrainingIndex = trainingIndices.Last(); var model = learner.Learn(observations, targets, trainingIndices); for (int i = 0; i < predictions.Length; i++) { // Only train a new model at each retrain interval. if (((m_retrainInterval == 1) || ((i % m_retrainInterval) == 0)) && (i != 0)) { model = learner.Learn(observations, targets, trainingIndices); } var predictionIndex = lastTrainingIndex + 1; observations.Row(predictionIndex, observation); predictions[i] = model.Predict(observation); lastTrainingIndex++; // determine start index and length of the training period, if maxTrainingSetSize is specified. var startIndex = m_maxTrainingSetSize != 0 ? Math.Max(0, (lastTrainingIndex + 1) - m_maxTrainingSetSize) : 0; var length = m_maxTrainingSetSize != 0 ? Math.Min(m_maxTrainingSetSize, lastTrainingIndex) : lastTrainingIndex; trainingIndices = Enumerable.Range(startIndex, length).ToArray(); ModelDisposer.DisposeIfDisposable(model); } return(predictions); }
/// <summary> /// Cross validated predictions. /// Only crossValidates within the provided indices. /// The predictions are returned in the predictions array. /// </summary> /// <param name="learner"></param> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="crossValidationIndices"></param> /// <param name="crossValidatedPredictions"></param> public void CrossValidate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets, int[] crossValidationIndices, TPrediction[] crossValidatedPredictions) { var rows = crossValidatedPredictions.Length; if (m_crossValidationFolds > rows) { throw new ArgumentException("Too few observations: " + rows + " for number of cross validation folds: " + m_crossValidationFolds); } var indices = crossValidationIndices.ToArray(); // Map the provided crossValidationIndices to crossValidatedPredictions // Indices from crossValidationIndices can be larger than crossValidatedPredictions length // since crossValidatedPredictions might be a subset of the provided observations and targets var cvPredictionIndiceMap = Enumerable.Range(0, crossValidatedPredictions.Length) .ToDictionary(i => indices[i], i => i); var crossValidationIndexSets = CrossValidationUtilities.GetKFoldCrossValidationIndexSets( m_indexedSampler, m_crossValidationFolds, targets, indices); var observation = new double[observations.ColumnCount]; foreach (var(trainingIndices, validationIndices) in crossValidationIndexSets) { var model = learner.Learn(observations, targets, trainingIndices); var predictions = new TPrediction[validationIndices.Length]; for (int l = 0; l < predictions.Length; l++) { observations.Row(validationIndices[l], observation); predictions[l] = model.Predict(observation); } for (int j = 0; j < validationIndices.Length; j++) { crossValidatedPredictions[cvPredictionIndiceMap[validationIndices[j]]] = predictions[j]; } ModelDisposer.DisposeIfDisposable(model); } }