/// <summary> /// Bayesian optimization (BO) for global black box optimization problems. BO learns a model based on the initial parameter sets and scores. /// This model is used to sample new promising parameter candiates which are evaluated and added to the existing paramter sets. /// This process iterates several times. The method is computational expensive so is most relevant for expensive problems, /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning method. /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less sophisticated methods. /// Implementation loosely based on: /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf /// </summary> /// <param name="parameters">A list of parameter bounds for each optimization parameter</param> /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfCandidatesEvaluatedPrIteration = totalFunctionEvaluations</param> /// <param name="numberOfStartingPoints">Number of randomly created starting points to use for the initial model in the first iteration (default is 5)</param> /// <param name="numberOfCandidatesEvaluatedPrIteration">How many candiate parameter set should by sampled from the model in each iteration. /// The parameter sets are inlcuded in order of most promissing outcome (default is 1)</param> /// <param name="seed">Seed for the random initialization</param> public BayesianOptimizer(ParameterBounds[] parameters, int maxIterations, int numberOfStartingPoints = 5, int numberOfCandidatesEvaluatedPrIteration = 1, int seed = 42) { if (parameters == null) { throw new ArgumentNullException("parameters"); } if (maxIterations <= 0) { throw new ArgumentNullException("maxIterations must be at least 1"); } if (numberOfStartingPoints < 1) { throw new ArgumentNullException("numberOfParticles must be at least 1"); } m_parameters = parameters; m_maxIterations = maxIterations; m_numberOfStartingPoints = numberOfStartingPoints; m_numberOfCandidatesEvaluatedPrIteration = numberOfCandidatesEvaluatedPrIteration; m_sampler = new RandomUniform(seed); // Hyper parameters for regression extra trees learner. These are based on the values suggested in http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf. // However, according to the author Frank Hutter, the hyper parameters for the forest model should not matter that much. m_learner = new RegressionExtremelyRandomizedTreesLearner(30, 10, 2000, parameters.Length, 1e-6, 1.0, 42, false); // optimizer for finding maximum expectation (most promissing hyper parameters) from extra trees model. m_maximizer = new RandomSearchOptimizer(m_parameters, 1000, 42, false); // acquisition function to maximize, m_acquisitionFunc = AcquisitionFunctions.ExpectedImprovement; }
/// <summary> /// Bayesian optimization (BO) for global black box optimization problems. BO learns a model based on the initial parameter sets and scores. /// This model is used to sample new promising parameter candiates which are evaluated and added to the existing paramter sets. /// This process iterates several times. The method is computational expensive so is most relevant for expensive problems, /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning method. /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less sophisticated methods. /// Implementation loosely based on: /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf /// </summary> /// <param name="parameters">A list of parameter bounds for each optimization parameter</param> /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfCandidatesEvaluatedPrIteration = totalFunctionEvaluations</param> /// <param name="previousParameterSets">Parameter sets from previous run</param> /// <param name="previousParameterSetScores">Scores from from previous run corresponding to each parameter set</param> /// <param name="numberOfCandidatesEvaluatedPrIteration">How many candiate parameter set should by sampled from the model in each iteration. /// The parameter sets are inlcuded in order of most promissing outcome (default is 1)</param> /// <param name="seed">Seed for the random initialization</param> public BayesianOptimizer(ParameterBounds[] parameters, int maxIterations, List <double[]> previousParameterSets, List <double> previousParameterSetScores, int numberOfCandidatesEvaluatedPrIteration = 1, int seed = 42) { if (parameters == null) { throw new ArgumentNullException("parameters"); } if (maxIterations <= 0) { throw new ArgumentNullException("maxIterations must be at least 1"); } if (previousParameterSets == null) { throw new ArgumentNullException("previousParameterSets"); } if (previousParameterSetScores == null) { throw new ArgumentNullException("previousResults"); } if (previousParameterSets.Count != previousParameterSetScores.Count) { throw new ArgumentException("previousParameterSets length: " + previousParameterSets.Count + " does not correspond with previousResults length: " + previousParameterSetScores.Count); } if (previousParameterSetScores.Count < 2 || previousParameterSets.Count < 2) { throw new ArgumentException("previousParameterSets length and previousResults length must be at least 2 and was: " + previousParameterSetScores.Count); } m_parameters = parameters; m_maxIterations = maxIterations; m_numberOfCandidatesEvaluatedPrIteration = numberOfCandidatesEvaluatedPrIteration; m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); // Hyper parameters for regression extra trees learner. These are based on the values suggested in http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf. // However, according to the author Frank Hutter, the hyper parameters for the forest model should not matter that much. m_learner = new RegressionExtremelyRandomizedTreesLearner(trees: 30, minimumSplitSize: 10, maximumTreeDepth: 2000, featuresPrSplit: parameters.Length, minimumInformationGain: 1e-6, subSampleRatio: 1.0, seed: m_random.Next(), // Use member to seed the random uniform sampler. runParallel: false); // Optimizer for finding maximum expectation (most promissing hyper parameters) from extra trees model. m_maximizer = new RandomSearchOptimizer(m_parameters, iterations: 1000, seed: m_random.Next(), // Use member to seed the random uniform sampler. runParallel: false); // Acquisition function to maximize. m_acquisitionFunc = AcquisitionFunctions.ExpectedImprovement; m_previousParameterSets = previousParameterSets; m_previousParameterSetScores = previousParameterSetScores; }
/// <summary> /// Globalized bounded Nelder-Mead method. This version of Nelder-Mead optimization /// avoids some of the shortcommings the standard implementation. /// Specifically it is better suited for multimodal optimization problems through its restart property. /// It also respect the bounds given by the provided parameter space. /// Roughly based on: /// http://home.ku.edu.tr/~daksen/2004-Nelder-Mead-Method-Wolff.pdf /// and /// http://www.emse.fr/~leriche/GBNM_SMO_1026_final.pdf /// </summary> /// <param name="parameters">Each row is a series of values for a specific parameter</param> /// <param name="maxRestarts">Maximun number of restart (default is 8</param> /// <param name="noImprovementThreshold">Minimum value of improvement before the improvement is accepted as an actual improvement (default is 0.001)</param> /// <param name="maxIterationsWithoutImprovement">Maximum number of iterations without an improvement (default is 5)</param> /// <param name="maxIterationsPrRestart">Maximum iterations pr. restart. 0 is no limit and will run to convergens (default is 0)</param> /// <param name="maxFunctionEvaluations">Maximum function evaluations. 0 is no limit and will run to convergens (default is 0)</param> /// <param name="alpha">Coefficient for reflection part of the algorithm (default is 1)</param> /// <param name="gamma">Coefficient for expansion part of the algorithm (default is 2)</param> /// <param name="rho">Coefficient for contraction part of the algorithm (default is -0.5)</param> /// <param name="sigma">Coefficient for shrink part of the algorithm (default is 0.5)</param> public GlobalizedBoundedNelderMeadOptimizer(ParameterBounds[] parameters, int maxRestarts = 8, double noImprovementThreshold = 0.001, int maxIterationsWithoutImprovement = 5, int maxIterationsPrRestart = 0, int maxFunctionEvaluations = 0, double alpha = 1, double gamma = 2, double rho = -0.5, double sigma = 0.5) { if (parameters == null) { throw new ArgumentNullException("parameters"); } if (maxIterationsWithoutImprovement <= 0) { throw new ArgumentNullException("maxIterationsWithoutImprovement must be at least 1"); } if (maxFunctionEvaluations < 0) { throw new ArgumentNullException("maxFunctionEvaluations must be at least 1"); } m_maxRestarts = maxRestarts; m_maxIterationsPrRestart = maxIterationsPrRestart; m_alpha = alpha; m_gamma = gamma; m_rho = rho; m_sigma = sigma; m_parameters = parameters; m_noImprovementThreshold = noImprovementThreshold; m_maxIterationsWithoutImprovement = maxIterationsWithoutImprovement; m_maxFunctionEvaluations = maxFunctionEvaluations; m_random = new Random(324); m_sampler = new RandomUniform(); }
/// <summary> /// Globalized bounded Nelder-Mead method. This version of Nelder-Mead optimization /// avoids some of the shortcommings the standard implementation. /// Specifically it is better suited for multimodal optimization problems through its restart property. /// It also respect the bounds given by the provided parameter space. /// Roughly based on: /// http://home.ku.edu.tr/~daksen/2004-Nelder-Mead-Method-Wolff.pdf /// and /// http://www.emse.fr/~leriche/GBNM_SMO_1026_final.pdf /// </summary> /// <param name="parameters">Each row is a series of values for a specific parameter</param> /// <param name="maxRestarts">Maximun number of restart (default is 8</param> /// <param name="noImprovementThreshold">Minimum value of improvement before the improvement is accepted as an actual improvement (default is 0.001)</param> /// <param name="maxIterationsWithoutImprovement">Maximum number of iterations without an improvement (default is 5)</param> /// <param name="maxIterationsPrRestart">Maximum iterations pr. restart. 0 is no limit and will run to convergens (default is 0)</param> /// <param name="maxFunctionEvaluations">Maximum function evaluations. 0 is no limit and will run to convergens (default is 0)</param> /// <param name="alpha">Coefficient for reflection part of the algorithm (default is 1)</param> /// <param name="gamma">Coefficient for expansion part of the algorithm (default is 2)</param> /// <param name="rho">Coefficient for contraction part of the algorithm (default is -0.5)</param> /// <param name="sigma">Coefficient for shrink part of the algorithm (default is 0.5)</param> public NelderMeadWithStartPoints(ParameterBounds[] parameters, double[] startingValue, int maxRestarts = 8, double noImprovementThreshold = 0.001, int maxIterationsWithoutImprovement = 5, int maxIterationsPrRestart = 0, int maxFunctionEvaluations = 0, double alpha = 1, double gamma = 2, double rho = -0.5, double sigma = 0.5) { if (parameters == null) { throw new ArgumentNullException("parameters"); } if (maxIterationsWithoutImprovement <= 0) { throw new ArgumentNullException("maxIterationsWithoutImprovement must be at least 1"); } if (maxFunctionEvaluations < 0) { throw new ArgumentNullException("maxFunctionEvaluations must be at least 1"); } m_maxRestarts = maxRestarts; m_maxIterationsPrRestart = maxIterationsPrRestart; m_alpha = alpha; m_gamma = gamma; m_rho = rho; m_sigma = sigma; m_parameters = parameters; m_noImprovementThreshold = noImprovementThreshold; m_maxIterationsWithoutImprovement = maxIterationsWithoutImprovement; m_maxFunctionEvaluations = maxFunctionEvaluations; this.startingValue = startingValue; m_random = new Random(startingValue[0].GetHashCode()); m_sampler = new RandomUniform(startingValue[0].GetHashCode()); }
/// <summary> /// Particle Swarm optimizer (PSO). PSO is initialized with a group of random particles /// and then searches for optima by updating generations. In every iteration, each particle is updated by following two "best" values. /// The first one is the best solution found by the specific particle so far. /// The other "best" value is the global best value obtained by any particle in the population so far. /// </summary> /// <param name="parameters">A list of parameter bounds for each optimization parameter</param> /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfParticles = totalFunctionEvaluations</param> /// <param name="numberOfParticles">The number of particles to use (default is 10). MaxIteration * numberOfParticles = totalFunctionEvaluations</param> /// <param name="c1">Learning factor weigting local particle best solution. (default is 2)</param> /// <param name="c2">Learning factor weigting global best solution. (default is 2)</param> /// <param name="seed">Seed for the random initialization and velocity corrections</param> public ParticleSwarmOptimizer(ParameterBounds[] parameters, int maxIterations, int numberOfParticles = 10, double c1 = 2, double c2 = 2, int seed = 42) { if (parameters == null) { throw new ArgumentNullException("parameters"); } if (maxIterations <= 0) { throw new ArgumentNullException("maxIterations must be at least 1"); } if (numberOfParticles < 1) { throw new ArgumentNullException("numberOfParticles must be at least 1"); } m_parameters = parameters; m_maxIterations = maxIterations; m_numberOfParticles = numberOfParticles; m_c1 = c1; m_c2 = c2; m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); }
/// <summary> /// Implementation of the SMAC algorithm for hyperparameter optimization. /// Based on: Sequential Model-Based Optimization for General Algorithm Configuration: /// https://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf /// Uses Bayesian optimization in tandem with a greedy local search on the top performing solutions. /// And ML.Net implementation: /// https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.Sweeper/Algorithms/SmacSweeper.cs /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="iterations">The number of iterations to perform. /// Iteration * functionEvaluationsPerIteration = totalFunctionEvaluations</param> /// <param name="randomStartingPointCount">Number of randomly parameter sets used /// for initialization (default is 20)</param> /// <param name="functionEvaluationsPerIterationCount">The number of function evaluations per iteration. /// The parameter sets are included in order of most promising outcome (default is 1)</param> /// <param name="localSearchPointCount">The number of top contenders /// to use in the greedy local search (default is (10)</param> /// <param name="randomSearchPointCount">The number of random parameter sets /// used when maximizing the expected improvement acquisition function (default is 1000)</param> /// <param name="epsilon">Threshold for ending local search (default is 0.00001)</param> /// <param name="seed"></param> public SmacOptimizer(IParameterSpec[] parameters, int iterations, int randomStartingPointCount = 20, int functionEvaluationsPerIterationCount = 1, int localSearchPointCount = 10, int randomSearchPointCount = 1000, double epsilon = 0.00001, int seed = 42) { m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); if (iterations < 1) { throw new ArgumentException(nameof(iterations) + "must be at least 1. Was: " + iterations); } if (randomStartingPointCount < 1) { throw new ArgumentException(nameof(randomStartingPointCount) + "must be at least 1. Was: " + randomStartingPointCount); } if (functionEvaluationsPerIterationCount < 1) { throw new ArgumentException(nameof(functionEvaluationsPerIterationCount) + "must be at least 1. Was: " + functionEvaluationsPerIterationCount); } if (localSearchPointCount < 1) { throw new ArgumentException(nameof(localSearchPointCount) + "must be at least 1. Was: " + localSearchPointCount); } if (randomSearchPointCount < 1) { throw new ArgumentException(nameof(randomSearchPointCount) + "must be at least 1. Was: " + randomSearchPointCount); } m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); m_iterations = iterations; m_randomStartingPointsCount = randomStartingPointCount; m_functionEvaluationsPerIterationCount = functionEvaluationsPerIterationCount; m_localSearchPointCount = localSearchPointCount; m_randomSearchPointCount = randomSearchPointCount; m_epsilon = epsilon; // Hyper parameters for regression extra trees learner. // These are based on the values suggested in http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf. // However, according to the author Frank Hutter, // the hyper parameters for the forest model should not matter that much. m_learner = new RegressionExtremelyRandomizedTreesLearner(trees: 10, minimumSplitSize: 2, maximumTreeDepth: 2000, featuresPrSplit: parameters.Length, minimumInformationGain: 1e-6, subSampleRatio: 1.0, seed: m_random.Next(), // Use member to seed the random uniform sampler. runParallel: false); }
/// <summary> /// Hyperband optimizer based on: https://arxiv.org/pdf/1603.06560.pdf /// /// Hyperband controls a budget of compute for each set of hyperparameters, /// Initially it will run each parameter set with very little compute budget to get a taste of how they perform. /// Then it takes the best performers and runs them on a larger budget. /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="maximumUnitsOfCompute">This indicates the maximum units of compute. /// A unit of compute could be 5 epochs over a dataset for instance. Consequently, /// a unit of compute should be chosen to be the minimum amount of computation where different /// hyperparameter configurations start to separate (or where it is clear that some settings diverge)></param> /// <param name="eta">Controls the proportion of configurations discarded in each round. /// Together with maximumUnitsOfCompute, it dictates how many rounds are considered</param> /// <param name="skipLastIterationOfEachRound">True to skip the last, /// most computationally expensive, iteration of each round. Default is false.</param> public HyperbandOptimizer(IParameterSpec[] parameters, int maximumUnitsOfCompute = 81, int eta = 3, bool skipLastIterationOfEachRound = false, int seed = 34) { m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); if (maximumUnitsOfCompute < 1) { throw new ArgumentException(nameof(maximumUnitsOfCompute) + " must be at larger than 0"); } if (eta < 1) { throw new ArgumentException(nameof(eta) + " must be at larger than 0"); } m_sampler = new RandomUniform(seed); // This is called R in the paper. m_maximumUnitsOfCompute = maximumUnitsOfCompute; m_eta = eta; // This is called `s max` in the paper. m_numberOfRounds = (int)(Math.Log(m_maximumUnitsOfCompute) / Math.Log(m_eta)); // This is called `B` in the paper. m_totalUnitsOfComputePerRound = (m_numberOfRounds + 1) * m_maximumUnitsOfCompute; // Suggestion by fastml: http://fastml.com/tuning-hyperparams-fast-with-hyperband/ // "One could discard the last tier (1 x 81, 2 x 81, etc.) in each round, // including the last round. This drastically reduces time needed. m_skipLastIterationOfEachRound = skipLastIterationOfEachRound; }
/// <summary> /// Globalized bounded Nelder-Mead method. This version of Nelder-Mead optimization /// avoids some of the shortcomings the standard implementation. /// Specifically it is better suited for multi-modal optimization problems through its restart property. /// It also respect the bounds given by the provided parameter space. /// Roughly based on: /// http://home.ku.edu.tr/~daksen/2004-Nelder-Mead-Method-Wolff.pdf /// and /// http://www.emse.fr/~leriche/GBNM_SMO_1026_final.pdf /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="maxRestarts">Maximum number of restart (default is 8</param> /// <param name="noImprovementThreshold">Minimum value of improvement before the improvement is accepted as an actual improvement (default is 0.001)</param> /// <param name="maxIterationsWithoutImprovement">Maximum number of iterations without an improvement (default is 5)</param> /// <param name="maxIterationsPrRestart">Maximum iterations pr. restart. 0 is no limit and will run to convergence (default is 0)</param> /// <param name="maxFunctionEvaluations">Maximum function evaluations. 0 is no limit and will run to convergence (default is 0)</param> /// <param name="alpha">Coefficient for reflection part of the algorithm (default is 1)</param> /// <param name="gamma">Coefficient for expansion part of the algorithm (default is 2)</param> /// <param name="rho">Coefficient for contraction part of the algorithm (default is -0.5)</param> /// <param name="sigma">Coefficient for shrink part of the algorithm (default is 0.5)</param> /// <param name="seed">Seed for random restarts</param> /// <param name="maxDegreeOfParallelism">Maximum number of concurrent operations (default is -1 (unlimited))</param> public GlobalizedBoundedNelderMeadOptimizer(IParameterSpec[] parameters, int maxRestarts = 8, double noImprovementThreshold = 0.001, int maxIterationsWithoutImprovement = 5, int maxIterationsPrRestart = 0, int maxFunctionEvaluations = 0, double alpha = 1, double gamma = 2, double rho = -0.5, double sigma = 0.5, int seed = 324, int maxDegreeOfParallelism = -1) { if (maxIterationsWithoutImprovement <= 0) { throw new ArgumentException("maxIterationsWithoutImprovement must be at least 1"); } if (maxFunctionEvaluations < 0) { throw new ArgumentException("maxFunctionEvaluations must be at least 1"); } m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); m_maxRestarts = maxRestarts; m_maxIterationsPrRestart = maxIterationsPrRestart; m_alpha = alpha; m_gamma = gamma; m_rho = rho; m_sigma = sigma; m_noImprovementThreshold = noImprovementThreshold; m_maxIterationsWithoutImprovement = maxIterationsWithoutImprovement; m_maxFunctionEvaluations = maxFunctionEvaluations; m_maxDegreeOfParallelism = maxDegreeOfParallelism; m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); }
/// <summary> /// Particle Swarm optimizer (PSO). PSO is initialized with a group of random particles /// and then searches for optima by updating generations. In every iteration, each particle is updated by following two /// "best" values. /// The first one is the best solution found by the specific particle so far. /// The other "best" value is the global best value obtained by any particle in the population so far. /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfParticles = totalFunctionEvaluations</param> /// <param name="numberOfParticles"> /// The number of particles to use (default is 10). MaxIteration * numberOfParticles = /// totalFunctionEvaluations /// </param> /// <param name="c1">Learning factor weighting local particle best solution. (default is 2)</param> /// <param name="c2">Learning factor weighting global best solution. (default is 2)</param> /// <param name="seed">Seed for the random initialization and velocity corrections</param> /// <param name="maxDegreeOfParallelism">Maximum number of concurrent operations (default is -1 (unlimited))</param> public ParticleSwarmOptimizer( IParameterSpec[] parameters, int maxIterations, int numberOfParticles = 10, double c1 = 2, double c2 = 2, int seed = 42, int maxDegreeOfParallelism = -1) { if (maxIterations <= 0) { throw new ArgumentException("maxIterations must be at least 1"); } if (numberOfParticles < 1) { throw new ArgumentException("numberOfParticles must be at least 1"); } m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); m_maxIterations = maxIterations; m_numberOfParticles = numberOfParticles; m_c1 = c1; m_c2 = c2; m_maxDegreeOfParallelism = maxDegreeOfParallelism; m_bestLocker = new object(); m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); }
/// <summary> /// Random search optimizer initializes random parameters between min and max of the provided parameters. /// Roughly based on: http://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="iterations">The number of iterations to perform</param> /// <param name="seed"></param> /// <param name="runParallel">Use multi threading to speed up execution (default is true)</param> /// <param name="maxDegreeOfParallelism">Maximum number of concurrent operations (default is -1 (unlimited))</param> public RandomSearchOptimizer(IParameterSpec[] parameters, int iterations, int seed = 42, bool runParallel = true, int maxDegreeOfParallelism = -1) { m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); m_runParallel = runParallel; m_sampler = new RandomUniform(seed); m_iterations = iterations; m_maxDegreeOfParallelism = maxDegreeOfParallelism; }
/// <summary> /// Samples a value defined for the parameter. /// </summary> /// <param name="sampler"></param> /// <returns></returns> public double SampleValue(IParameterSampler sampler) { // sample random parameter index. var index = (int)sampler.Sample(m_minIndex, m_maxIndex, m_parameterType); // return the values of the index. return(m_parameters[index]); }
/// <summary> /// Random search optimizer initializes random parameters between min and max of the provided parameters. /// Roughly based on: http://www.jmlr.org/papers/volume13/bergstra12a/bergstra12a.pdf /// </summary> /// <param name="parameterRanges">A list of parameter bounds for each optimization parameter</param> /// <param name="iterations">The number of iterations to perform</param> /// <param name="seed"></param> /// <param name="runParallel">Use multi threading to speed up execution (default is true)</param> public RandomSearchOptimizer(ParameterBounds[] parameterRanges, int iterations, int seed = 42, bool runParallel = true) { if (parameterRanges == null) { throw new ArgumentNullException("parameterRanges"); } m_parameters = parameterRanges; m_runParallel = runParallel; m_sampler = new RandomUniform(seed); m_iterations = iterations; }
/// <summary> /// Samples a random parameter set. /// </summary> /// <param name="parameters"></param> /// <param name="sampler"></param> /// <returns></returns> public static double[] SampleParameterSet(IParameterSpec[] parameters, IParameterSampler sampler) { var parameterSet = new double[parameters.Length]; for (var i = 0; i < parameters.Length; i++) { var parameter = parameters[i]; parameterSet[i] = parameter.SampleValue(sampler); } return(parameterSet); }
/// <summary> /// Bayesian optimization (BO) for global black box optimization problems. BO learns a model based on the initial parameter sets and scores. /// This model is used to sample new promising parameter candidates which are evaluated and added to the existing parameter sets. /// This process iterates several times. The method is computational expensive so is most relevant for expensive problems, /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning method. /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less sophisticated methods. /// Implementation loosely based on: /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="iterations">Number of iterations. Iteration * functionEvaluationsPerIteration = totalFunctionEvaluations</param> /// <param name="randomStartingPointCount">Number of randomly created starting points to use for the initial model in the first iteration (default is 5)</param> /// <param name="functionEvaluationsPerIteration">The number of function evaluations per iteration. /// The parameter sets are included in order of most promising outcome (default is 1)</param> /// <param name="seed">Seed for the random initialization</param> /// <param name="maxDegreeOfParallelism">Maximum number of concurrent operations. Default is -1 (unlimited)</param> /// <param name="allowMultipleEvaluations">Enables re-evaluation of duplicate parameter sets for non-deterministic functions</param> public BayesianOptimizer(IParameterSpec[] parameters, int iterations, int randomStartingPointCount = 5, int functionEvaluationsPerIteration = 1, int seed = 42, int maxDegreeOfParallelism = -1, bool allowMultipleEvaluations = false) { if (iterations <= 0) { throw new ArgumentException("maxIterations must be at least 1"); } if (randomStartingPointCount < 1) { throw new ArgumentException("numberOfParticles must be at least 1"); } m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); m_iterations = iterations; m_randomStartingPointCount = randomStartingPointCount; m_functionEvaluationsPerIteration = functionEvaluationsPerIteration; m_runParallel = maxDegreeOfParallelism != 1; m_parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }; m_allowMultipleEvaluations = allowMultipleEvaluations; m_locker = new object(); m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); // Hyper parameters for regression extra trees learner. These are based on the values suggested in http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf. // However, according to the author Frank Hutter, the hyper parameters for the forest model should not matter that much. m_learner = new RegressionExtremelyRandomizedTreesLearner(trees: 30, minimumSplitSize: 10, maximumTreeDepth: 2000, featuresPrSplit: parameters.Length, minimumInformationGain: 1e-6, subSampleRatio: 1.0, seed: m_random.Next(), // Use member to seed the random uniform sampler. runParallel: m_runParallel); // Optimizer for finding maximum expectation (most promising hyper parameters) from extra trees model. m_maximizer = new RandomSearchOptimizer(m_parameters, iterations: 1000, seed: m_random.Next(), // Use member to seed the random uniform sampler. runParallel: maxDegreeOfParallelism > 1); // Acquisition function to maximize. m_acquisitionFunc = AcquisitionFunctions.ExpectedImprovement; }
/// <summary> /// Logarithmic scale. For ranges with a large difference in numerical scale, like min: 0.0001 and max: 1.0. /// </summary> /// <param name="min"></param> /// <param name="max"></param> /// <param name="sampler"></param> /// <returns></returns> public double Transform(double min, double max, IParameterSampler sampler) { if (min <= 0 || max <= 0) { throw new ArgumentException($"logarithmic scale requires min: {min} and max: {max} to be larger than zero"); } var a = Math.Log10(min); var b = Math.Log10(max); var r = sampler.Sample(a, b); return(Math.Pow(10, r)); }
/// <summary> /// ExponentialAverage scale. For ranges close to one, like min: 0.9 and max: 0.999. /// Note that the min and max values must be smaller than 1 for this transform. /// </summary> /// <param name="min"></param> /// <param name="max"></param> /// <param name="sampler"></param> /// <param name="parameterType">Selects the type of parameter. Should the parameter be sampled as discrete values, or as continous values.</param> /// <returns></returns> public double Transform(double min, double max, ParameterType parameterType, IParameterSampler sampler) { if (min >= 1 || max >= 1) { throw new ArgumentException($"ExponentialAverage scale requires min: {min} and max: {max} to be smaller than one"); } var a = Math.Log10(1 - max); var b = Math.Log10(1 - min); var r = sampler.Sample(a, b, parameterType); return(1.0 - Math.Pow(10, r)); }
/// <summary> /// Samples a set of random parameter sets. /// </summary> /// <param name="parameterSetCount"></param> /// <param name="parameters"></param> /// <param name="sampler"></param> /// <returns></returns> public static double[][] SampleRandomParameterSets( int parameterSetCount, IParameterSpec[] parameters, IParameterSampler sampler) { var parameterSets = new double[parameterSetCount][]; for (var i = 0; i < parameterSetCount; i++) { parameterSets[i] = SampleParameterSet(parameters, sampler); } return(parameterSets); }
/// <summary> /// Bayesian optimization (BO) for global black box optimization problems. BO learns a model based on the initial parameter sets and scores. /// This model is used to sample new promising parameter candiates which are evaluated and added to the existing paramter sets. /// This process iterates several times. The method is computational expensive so is most relevant for expensive problems, /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning method. /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less sophisticated methods. /// Implementation loosely based on: /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf /// </summary> /// <param name="parameters">A list of parameter bounds for each optimization parameter</param> /// <param name="maxIterations">Maximum number of iterations. MaxIteration * numberOfCandidatesEvaluatedPrIteration = totalFunctionEvaluations</param> /// <param name="previousParameterSets">Parameter sets from previous run</param> /// <param name="previousParameterSetScores">Scores from from previous run corresponding to each parameter set</param> /// <param name="numberOfCandidatesEvaluatedPrIteration">How many candiate parameter set should by sampled from the model in each iteration. /// The parameter sets are inlcuded in order of most promissing outcome (default is 1)</param> /// <param name="seed">Seed for the random initialization</param> public BayesianOptimizer(ParameterBounds[] parameters, int maxIterations, List <double[]> previousParameterSets, List <double> previousParameterSetScores, int numberOfCandidatesEvaluatedPrIteration = 1, int seed = 42) { if (parameters == null) { throw new ArgumentNullException("parameters"); } if (maxIterations <= 0) { throw new ArgumentNullException("maxIterations must be at least 1"); } if (previousParameterSets == null) { throw new ArgumentNullException("previousParameterSets"); } if (previousParameterSetScores == null) { throw new ArgumentNullException("previousResults"); } if (previousParameterSets.Count != previousParameterSetScores.Count) { throw new ArgumentException("previousParameterSets length: " + previousParameterSets.Count + " does not correspond with previousResults length: " + previousParameterSetScores.Count); } if (previousParameterSetScores.Count < 2 || previousParameterSets.Count < 2) { throw new ArgumentException("previousParameterSets length and previousResults length must be at least 2 and was: " + previousParameterSetScores.Count); } m_parameters = parameters; m_maxIterations = maxIterations; m_numberOfCandidatesEvaluatedPrIteration = numberOfCandidatesEvaluatedPrIteration; m_sampler = new RandomUniform(seed); // Hyper parameters for regression extra trees learner. These are based on the values suggested in http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf. // However, according to the author Frank Hutter, the hyper parameters for the forest model should not matter that much. m_learner = new RegressionExtremelyRandomizedTreesLearner(30, 10, 2000, parameters.Length, 1e-6, 1.0, 42, false); // optimizer for finding maximum expectation (most promissing hyper parameters) from random forest model m_maximizer = new RandomSearchOptimizer(m_parameters, 1000, 42, false); // acquisition function to maximize, m_acquisitionFunc = AcquisitionFunctions.ExpectedImprovement; m_previousParameterSets = previousParameterSets; m_previousParameterSetScores = previousParameterSetScores; }
/// <summary> /// Linear scale. For ranges with a small difference in numerical scale, like min: 64 and max: 256. /// Returns the samplers value directly. /// </summary> /// <param name="min"></param> /// <param name="max"></param> /// <param name="sampler"></param> /// <param name="parameterType">Selects the type of parameter. Should the parameter be sampled as discrete values, or as continous values.</param> /// <returns></returns> public double Transform(double min, double max, ParameterType parameterType, IParameterSampler sampler) { return(sampler.Sample(min, max, parameterType)); }
/// <summary> /// Samples a new value within the specified parameter bounds. /// </summary> /// <param name="sampler"></param> /// <returns></returns> public double SampleValue(IParameterSampler sampler) { return(m_transform.Transform(Min, Max, m_parameterType, sampler)); }
/// <summary> /// Samples a set of random parameter sets. /// </summary> /// <param name="parameterSetCount"></param> /// <param name="parameters"></param> /// <param name="sampler"></param> /// <returns></returns> public static IEnumerable <double[]> SampleRandomParameterSets(int parameterSetCount, IParameterSpec[] parameters, IParameterSampler sampler) { var parameterSets = new List <double[]>(); for (int i = 0; i < parameterSetCount; i++) { parameterSets.Add(SampleParameterSet(parameters, sampler)); } return(parameterSets); }
/// <summary> /// Linear scale. For ranges with a small difference in numerical scale, like min: 64 and max: 256. /// Returns the samplers value directly. /// </summary> /// <param name="min"></param> /// <param name="max"></param> /// <param name="sampler"></param> /// <returns></returns> public double Transform(double min, double max, IParameterSampler sampler) { return(sampler.Sample(min, max)); }
/// <summary> /// Bayesian optimization (BO) for global black box optimization problems. BO learns a model based on the initial /// parameter sets and scores. /// This model is used to sample new promising parameter candidates which are evaluated and added to the existing /// parameter sets. /// This process iterates several times. The method is computational expensive so is most relevant for expensive /// problems, /// where each evaluation of the function to minimize takes a long time, like hyper parameter tuning a machine learning /// method. /// But in that case it can usually reduce the number of iterations required to reach a good solution compared to less /// sophisticated methods. /// Implementation loosely based on: /// http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf /// https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf /// https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf /// </summary> /// <param name="parameters">A list of parameter specs, one for each optimization parameter</param> /// <param name="iterations"> /// The number of iterations to perform. /// Iteration * functionEvaluationsPerIteration = totalFunctionEvaluations /// </param> /// <param name="randomStartingPointCount"> /// Number of randomly parameter sets used /// for initialization (default is 20) /// </param> /// <param name="functionEvaluationsPerIterationCount"> /// The number of function evaluations per iteration. /// The parameter sets are included in order of most promising outcome (default is 1) /// </param> /// <param name="randomSearchPointCount"> /// The number of random parameter sets /// used when maximizing the expected improvement acquisition function (default is 1000) /// </param> /// <param name="seed"></param> /// <param name="runParallel"> /// Use multi threading to speed up execution (default is false). /// Note that the order of results returned the Optimize method will not be reproducible when running in parallel. /// Results will be the same, only the order is not reproducible /// </param> /// <param name="maxDegreeOfParallelism">Maximum number of concurrent operations (default is -1 (unlimited))</param> public BayesianOptimizer( IParameterSpec[] parameters, int iterations, int randomStartingPointCount = 5, int functionEvaluationsPerIterationCount = 1, int randomSearchPointCount = 1000, int seed = 42, bool runParallel = false, int maxDegreeOfParallelism = -1) { m_parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); if (iterations < 1) { throw new ArgumentException(nameof(iterations) + "must be at least 1. Was: " + iterations); } if (randomStartingPointCount < 1) { throw new ArgumentException( nameof(randomStartingPointCount) + "must be at least 1. Was: " + randomStartingPointCount ); } if (functionEvaluationsPerIterationCount < 1) { throw new ArgumentException( nameof(functionEvaluationsPerIterationCount) + "must be at least 1. Was: " + functionEvaluationsPerIterationCount ); } if (randomSearchPointCount < 1) { throw new ArgumentException( nameof(randomSearchPointCount) + "must be at least 1. Was: " + randomSearchPointCount ); } m_random = new Random(seed); // Use member to seed the random uniform sampler. m_sampler = new RandomUniform(m_random.Next()); m_iterations = iterations; m_randomStartingPointsCount = randomStartingPointCount; m_functionEvaluationsPerIterationCount = functionEvaluationsPerIterationCount; m_randomSearchPointCount = randomSearchPointCount; // Hyper parameters for regression extra trees learner. // These are based on the values suggested in http://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf. // However, according to the author Frank Hutter, // the hyper parameters for the forest model should not matter that much. m_learner = new RegressionExtremelyRandomizedTreesLearner( 30, 10, 2000, parameters.Length, 1e-6, 1.0, m_random.Next(), // Use member to seed the random uniform sampler. false ); m_runParallel = runParallel; m_maxDegreeOfParallelism = maxDegreeOfParallelism; }
/// <summary> /// Samples a new point within the specified parameter bounds. /// </summary> /// <param name="sampler"></param> /// <returns></returns> public double NextValue(IParameterSampler sampler) { return(m_transform.Transform(Min, Max, sampler)); }