public void Hyper_Parameter_Tuning() { #region Read data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // metric to minimize var metric = new MeanSquaredErrorRegressionMetric(); // Parameter ranges for the optimizer var paramers = new ParameterBounds[] { new ParameterBounds(min: 1, max: 100, transform: Transform.Linear), // maximumTreeDepth new ParameterBounds(min: 1, max: 16, transform: Transform.Linear), // minimumSplitSize }; // create random search optimizer var optimizer = new RandomSearchOptimizer(paramers, iterations: 30, runParallel: true); // other availible optimizers // GridSearchOptimizer // GlobalizedBoundedNelderMeadOptimizer // ParticleSwarmOptimizer // BayesianOptimizer // function to minimize Func <double[], OptimizerResult> minimize = p => { var cv = new RandomCrossValidation <double>(crossValidationFolds: 5, seed: 42); var optlearner = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)p[0], minimumSplitSize: (int)p[1]); var predictions = cv.CrossValidate(optlearner, observations, targets); var error = metric.Error(targets, predictions); Trace.WriteLine("Error: " + error); return(new OptimizerResult(p, error)); }; // run optimizer var result = optimizer.OptimizeBest(minimize); var bestParameters = result.ParameterSet; Trace.WriteLine("Result: " + result.Error); // create learner with found parameters var learner = new RegressionDecisionTreeLearner(maximumTreeDepth: (int)bestParameters[0], minimumSplitSize: (int)bestParameters[1]); // learn model with found parameters var model = learner.Learn(observations, targets); }
public void RandomSearchOptimizer_OptimizeBest() { var parameters = new double[][] { new double[] { 0.0, 100.0 } }; var sut = new RandomSearchOptimizer(parameters, 100); var actual = sut.OptimizeBest(Minimize); Assert.AreEqual(110.67173923600831, actual.Error, 0.00001); Assert.AreEqual(37.533294194160632, actual.ParameterSet.Single(), 0.00001); }
public override double Evaluate(IChromosome chromosome) { try { var parameters = Config.Genes.Select(s => new MinMaxParameterSpec(min: (double)(s.MinDecimal ?? s.MinInt.Value), max: (double)(s.MaxDecimal ?? s.MaxInt.Value), transform: Transform.Linear, parameterType: s.Precision > 0 ? ParameterType.Continuous : ParameterType.Discrete) ).ToArray(); IOptimizer optimizer = null; if (Config.Fitness != null) { if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.RandomSearch.ToString()) { optimizer = new RandomSearchOptimizer(parameters, iterations: Config.Generations, seed: 42, maxDegreeOfParallelism: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.ParticleSwarm.ToString()) { optimizer = new ParticleSwarmOptimizer(parameters, maxIterations: Config.Generations, numberOfParticles: Config.PopulationSize, seed: 42, maxDegreeOfParallelism: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.Bayesian.ToString()) { optimizer = new BayesianOptimizer(parameters, maxIterations: Config.Generations, numberOfStartingPoints: Config.PopulationSize, seed: 42); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.GlobalizedBoundedNelderMead.ToString()) { optimizer = new GlobalizedBoundedNelderMeadOptimizer(parameters, maxRestarts: Config.Generations, maxIterationsPrRestart: Config.PopulationSize, seed: 42, maxDegreeOfParallelism: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.Genetic.ToString()) { throw new Exception("Genetic optimizer cannot be used with Sharpe Maximizer"); } } //todo: // GridSearchOptimizer? Func <double[], OptimizerResult> minimize = p => Minimize(p, (Chromosome)chromosome); // run optimizer var result = optimizer.OptimizeBest(minimize); Best = ToChromosome(result, chromosome); return(result.Error); } catch (Exception ex) { Program.Logger.Error(ex); return(ErrorFitness); } }
public async Task <IterationResult> Start(IOptimizerConfiguration config, CancellationToken cancellationToken) { CancellationToken = cancellationToken; var parameters = config.Genes.Select(s => new MinMaxParameterSpec(min: s.Min ?? s.Actual.Value, max: s.Max ?? s.Actual.Value, transform: Transform.Linear, parameterType: s.Precision > 0 ? ParameterType.Continuous : ParameterType.Discrete) ).ToArray(); Keys = config.Genes.Where(g => g.Key != "id").Select(s => s.Key); IOptimizer optimizerMethod = null; if (config.Fitness != null) { if (config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.RandomSearch.ToString()) { optimizerMethod = new RandomSearchOptimizer(parameters, iterations: config.Generations, seed: 42, runParallel: false); } else if (config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.ParticleSwarm.ToString()) { optimizerMethod = new ParticleSwarmOptimizer(parameters, maxIterations: config.Generations, numberOfParticles: config.PopulationSize, seed: 42, maxDegreeOfParallelism: 1); } else if (config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.Bayesian.ToString()) { optimizerMethod = new BayesianOptimizer(parameters: parameters, iterations: config.Generations, randomStartingPointCount: config.PopulationSize, functionEvaluationsPerIterationCount: config.PopulationSize, seed: 42, runParallel: false); } else if (config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.GlobalizedBoundedNelderMead.ToString()) { optimizerMethod = new GlobalizedBoundedNelderMeadOptimizer(parameters, maxRestarts: config.Generations, maxIterationsPrRestart: config.PopulationSize, seed: 42, maxDegreeOfParallelism: 1); } else if (config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.GridSearch.ToString()) { optimizerMethod = new GridSearchOptimizer(config.Genes.Select(s => new GridParameterSpec(RangeWithPrecision.Range(s.Min.Value, s.Max.Value, s.Precision.Value).ToArray())).ToArray(), runParallel: false); } } else { throw new ArgumentException("No optimizer was configured."); } var result = await optimizerMethod.OptimizeBest(Minimize); return(new IterationResult { ParameterSet = result.ParameterSet, Cost = IsMaximizing ? result.Error * -1 : result.Error }); }
public void RandomSearchOptimizer_Optimize() { var parameters = new double[][] { new double[] { 10.0, 37.5 } }; var sut = new RandomSearchOptimizer(parameters, 2); var actual = sut.Optimize(Minimize); var expected = new OptimizerResult[] { new OptimizerResult(new double[] { 28.372927812567415 }, 3690.8111981874217), new OptimizerResult(new double[] { 13.874950705270725 }, 23438.215764163542) }; Assert.AreEqual(expected.First().Error, actual.First().Error, 0.0001); Assert.AreEqual(expected.First().ParameterSet.First(), actual.First().ParameterSet.First(), 0.0001); Assert.AreEqual(expected.Last().Error, actual.Last().Error, 0.0001); Assert.AreEqual(expected.Last().ParameterSet.First(), actual.Last().ParameterSet.First(), 0.0001); }
public void GradientBoost_Optimize_Hyperparameters() { #region read and split data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read regression targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // creates training test splitter, // Since this is a regression problem, we use the random training/test set splitter. // 30 % of the data is used for the test set. var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(observations, targets); var trainSet = trainingTestSplit.TrainingSet; var testSet = trainingTestSplit.TestSet; #endregion // since this is a regression problem we are using square error as metric // for evaluating how well the model performs. var metric = new MeanSquaredErrorRegressionMetric(); // Usually better results can be achieved by tuning a gradient boost learner var numberOfFeatures = trainSet.Observations.ColumnCount; // Parameter specs for the optimizer // best parameter to tune on random forest is featuresPrSplit. var parameters = new IParameterSpec[] { new MinMaxParameterSpec(min: 80, max: 300, transform: Transform.Linear, parameterType: ParameterType.Discrete), // iterations new MinMaxParameterSpec(min: 0.02, max: 0.2, transform: Transform.Logarithmic, parameterType: ParameterType.Continuous), // learning rate new MinMaxParameterSpec(min: 8, max: 15, transform: Transform.Linear, parameterType: ParameterType.Discrete), // maximumTreeDepth new MinMaxParameterSpec(min: 0.5, max: 0.9, transform: Transform.Linear, parameterType: ParameterType.Continuous), // subSampleRatio new MinMaxParameterSpec(min: 1, max: numberOfFeatures, transform: Transform.Linear, parameterType: ParameterType.Discrete), // featuresPrSplit }; // Further split the training data to have a validation set to measure // how well the model generalizes to unseen data during the optimization. var validationSplit = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24) .SplitSet(trainSet.Observations, trainSet.Targets); // Define optimizer objective (function to minimize) Func <double[], OptimizerResult> minimize = p => { // create the candidate learner using the current optimization parameters. var candidateLearner = new RegressionSquareLossGradientBoostLearner( iterations: (int)p[0], learningRate: p[1], maximumTreeDepth: (int)p[2], subSampleRatio: p[3], featuresPrSplit: (int)p[4], runParallel: false); var candidateModel = candidateLearner.Learn(validationSplit.TrainingSet.Observations, validationSplit.TrainingSet.Targets); var validationPredictions = candidateModel.Predict(validationSplit.TestSet.Observations); var candidateError = metric.Error(validationSplit.TestSet.Targets, validationPredictions); // trace current error Trace.WriteLine(string.Format("Candidate Error: {0:0.0000}, Candidate Parameters: {1}", candidateError, string.Join(", ", p))); return(new OptimizerResult(p, candidateError)); }; // create random search optimizer var optimizer = new RandomSearchOptimizer(parameters, iterations: 30, runParallel: true); // find best hyperparameters var result = optimizer.OptimizeBest(minimize); var best = result.ParameterSet; // create the final learner using the best hyperparameters. var learner = new RegressionSquareLossGradientBoostLearner( iterations: (int)best[0], learningRate: best[1], maximumTreeDepth: (int)best[2], subSampleRatio: best[3], featuresPrSplit: (int)best[4], runParallel: false); // learn model with found parameters var model = learner.Learn(trainSet.Observations, trainSet.Targets); // predict the training and test set. var trainPredictions = model.Predict(trainSet.Observations); var testPredictions = model.Predict(testSet.Observations); // measure the error on training and test set. var trainError = metric.Error(trainSet.Targets, trainPredictions); var testError = metric.Error(testSet.Targets, testPredictions); // Optimizer found hyperparameters. Trace.WriteLine(string.Format("Found parameters, iterations: {0}, learning rate {1:0.000}: maximumTreeDepth: {2}, subSampleRatio {3:0.000}, featuresPrSplit: {4} ", (int)best[0], best[1], (int)best[2], best[3], (int)best[4])); TraceTrainingAndTestError(trainError, testError); }
public static double FitGBT(double[] pred_Features) { var parser = new CsvParser(() => new StreamReader("dataset.csv"), separator: ','); var targetName = "Y"; var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // read regression targets var metric = new MeanSquaredErrorRegressionMetric(); var parameters = new double[][] { new double[] { 80, 300 }, // iterations (min: 20, max: 100) new double[] { 0.02, 0.2 }, // learning rate (min: 0.02, max: 0.2) new double[] { 8, 15 }, // maximumTreeDepth (min: 8, max: 15) new double[] { 0.5, 0.9 }, // subSampleRatio (min: 0.5, max: 0.9) new double[] { 1, observations.ColumnCount }, // featuresPrSplit (min: 1, max: numberOfFeatures) }; var validationSplit = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24) .SplitSet(observations, targets); Func <double[], OptimizerResult> minimize = p => { // create the candidate learner using the current optimization parameters var candidateLearner = new RegressionSquareLossGradientBoostLearner( iterations: (int)p[0], learningRate: p[1], maximumTreeDepth: (int)p[2], subSampleRatio: p[3], featuresPrSplit: (int)p[4], runParallel: false); var candidateModel = candidateLearner.Learn(validationSplit.TrainingSet.Observations, validationSplit.TrainingSet.Targets); var validationPredictions = candidateModel.Predict(validationSplit.TestSet.Observations); var candidateError = metric.Error(validationSplit.TestSet.Targets, validationPredictions); return(new OptimizerResult(p, candidateError)); }; // Hyper-parameter tuning var optimizer = new RandomSearchOptimizer(parameters, iterations: 30, runParallel: true); var result = optimizer.OptimizeBest(minimize); var best = result.ParameterSet; var learner = new RegressionSquareLossGradientBoostLearner( iterations: (int)best[0], learningRate: best[1], maximumTreeDepth: (int)best[2], subSampleRatio: best[3], featuresPrSplit: (int)best[4], runParallel: false); var model = learner.Learn(observations, targets); var prediction = model.Predict(pred_Features); return(prediction); }
public override double Evaluate(IChromosome chromosome) { try { var parameters = Config.Genes.Select(s => new MinMaxParameterSpec(min: s.Min ?? s.Actual.Value, max: s.Max ?? s.Actual.Value, transform: Transform.Linear, parameterType: s.Precision > 0 ? ParameterType.Continuous : ParameterType.Discrete) ).ToArray(); IOptimizer optimizer = null; if (Config.Fitness != null) { if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.RandomSearch.ToString()) { optimizer = new RandomSearchOptimizer(parameters, iterations: Config.Generations, seed: Seed, maxDegreeOfParallelism: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.ParticleSwarm.ToString()) { optimizer = new ParticleSwarmOptimizer(parameters, maxIterations: Config.Generations, numberOfParticles: Config.PopulationSize, seed: Seed, maxDegreeOfParallelism: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.Bayesian.ToString()) { optimizer = new BayesianOptimizer(parameters: parameters, iterations: Config.Generations, randomStartingPointCount: Config.PopulationSize, functionEvaluationsPerIterationCount: Config.PopulationSize, seed: Seed); //optimizer = new BayesianOptimizer(parameters, iterations: Config.Generations, randomStartingPointCount: Config.PopulationSize, // functionEvaluationsPerIteration: Config.MaxThreads, seed: 42, maxDegreeOfParallelism: Config.MaxThreads, allowMultipleEvaluations: true); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.GlobalizedBoundedNelderMead.ToString()) { optimizer = new GlobalizedBoundedNelderMeadOptimizer(parameters, maxRestarts: Config.Generations, maxIterationsPrRestart: Config.PopulationSize, seed: Seed, maxDegreeOfParallelism: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.Smac.ToString()) { optimizer = new SmacOptimizer(parameters, iterations: Config.Generations, randomStartingPointCount: Config.PopulationSize, seed: 42, functionEvaluationsPerIterationCount: Config.MaxThreads); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.GridSearch.ToString()) { optimizer = new GridSearchOptimizer(parameters); } else if (Config.Fitness.OptimizerTypeName == Enums.OptimizerTypeOptions.Genetic.ToString()) { throw new Exception("Genetic optimizer cannot be used with Sharpe Maximizer"); } } else { throw new ArgumentException("No fitness section was configured."); } Func <double[], OptimizerResult> minimize = p => Minimize(p, (Chromosome)chromosome); var result = optimizer.OptimizeBest(minimize); Best = MergeFromResult(result, chromosome); return(result.Error); } catch (Exception ex) { LogProvider.ErrorLogger.Error(ex); return(ErrorFitness); } }
public void RandomSearchOptimizer_ArgumentCheck_ParameterRanges() { var sut = new RandomSearchOptimizer(null, 10); }