/// <summary> /// Grid search with crossvalidation /// </summary> /// <param name="problemData">The classification problem data</param> /// <param name="numberOfFolds">The number of folds for crossvalidation</param> /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param> /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> /// <param name="seed">The random seed (for shuffling)</param> /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> public static RFParameter GridSearch(IClassificationProblemData problemData, int numberOfFolds, bool shuffleFolds, Dictionary <string, IEnumerable <double> > parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { DoubleValue accuracy = new DoubleValue(0); RFParameter bestParameter = new RFParameter(); var setters = parameterRanges.Keys.Select(GenerateSetter).ToList(); var crossProduct = parameterRanges.Values.CartesianProduct(); var partitions = GenerateRandomForestPartitions(problemData, numberOfFolds, shuffleFolds); var locker = new object(); Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => { var parameterValues = parameterCombination.ToList(); double testAccuracy; var parameters = new RFParameter(); for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); } CrossValidate(problemData, partitions, parameters.N, parameters.R, parameters.M, seed, out testAccuracy); lock (locker) { if (testAccuracy > accuracy.Value) { accuracy.Value = testAccuracy; bestParameter = (RFParameter)parameters.Clone(); } } }); return(bestParameter); }
protected RFParameter(RFParameter original, Cloner cloner) : base(original, cloner) { this.N = original.N; this.R = original.R; this.M = original.M; }
/// <summary> /// Grid search without crossvalidation (since for random forests the out-of-bag estimate is unbiased) /// </summary> /// <param name="problemData">The classification problem data</param> /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> /// <param name="seed">The random seed (required by the random forest model)</param> /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> public static RFParameter GridSearch(IClassificationProblemData problemData, Dictionary <string, IEnumerable <double> > parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { var setters = parameterRanges.Keys.Select(GenerateSetter).ToList(); var crossProduct = parameterRanges.Values.CartesianProduct(); double bestOutOfBagRmsError = double.MaxValue; RFParameter bestParameters = new RFParameter(); var locker = new object(); Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => { var parameterValues = parameterCombination.ToList(); var parameters = new RFParameter(); for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); } double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError; RandomForestModel.CreateClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError); lock (locker) { if (bestOutOfBagRmsError > outOfBagRmsError) { bestOutOfBagRmsError = outOfBagRmsError; bestParameters = (RFParameter)parameters.Clone(); } } }); return(bestParameters); }