private FastForestRegressionPredictor FitModel(IEnumerable <IRunResult> previousRuns) { Single[] targets = new Single[previousRuns.Count()]; Single[][] features = new Single[previousRuns.Count()][]; int i = 0; foreach (RunResult r in previousRuns) { features[i] = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, r.ParameterSet, true); targets[i] = (Float)r.MetricValue; i++; } ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_host); dvBuilder.AddColumn(DefaultColumnNames.Label, NumberType.Float, targets); dvBuilder.AddColumn(DefaultColumnNames.Features, NumberType.Float, features); IDataView view = dvBuilder.GetDataView(); _host.Assert(view.GetRowCount() == targets.Length, "This data view will have as many rows as there have been evaluations"); RoleMappedData data = new RoleMappedData(view, DefaultColumnNames.Label, DefaultColumnNames.Features); using (IChannel ch = _host.Start("Single training")) { // Set relevant random forest arguments. FastForestRegression.Arguments args = new FastForestRegression.Arguments(); args.FeatureFraction = _args.SplitRatio; args.NumTrees = _args.NumOfTrees; args.MinDocumentsInLeafs = _args.NMinForSplit; // Train random forest. FastForestRegression trainer = new FastForestRegression(_host, args); trainer.Train(data); FastForestRegressionPredictor predictor = trainer.CreatePredictor(); // Return random forest predictor. ch.Done(); return(predictor); } }
/// <summary> /// Does a mix of greedy local search around best performing parameter sets, while throwing random parameter sets into the mix. /// </summary> /// <param name="parents">Beginning locations for local greedy search.</param> /// <param name="forest">Trained random forest, used later for evaluating parameters.</param> /// <param name="numOfCandidates">Number of candidate configurations returned by the method (top K).</param> /// <param name="previousRuns">Historical run results.</param> /// <returns>Array of parameter sets, which will then be evaluated.</returns> private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForestRegressionPredictor forest, int numOfCandidates, IEnumerable <IRunResult> previousRuns) { // REVIEW: The IsMetricMaximizing flag affects the comparator, so that // performing Max() should get the best, regardless of if it is maximizing or // minimizing. RunResult bestRun = (RunResult)previousRuns.Max(); RunResult worstRun = (RunResult)previousRuns.Min(); double bestVal = bestRun.IsMetricMaximizing ? bestRun.MetricValue : worstRun.MetricValue - bestRun.MetricValue; HashSet <Tuple <double, ParameterSet> > configurations = new HashSet <Tuple <double, ParameterSet> >(); // Perform local search. foreach (ParameterSet c in parents) { Tuple <double, ParameterSet> bestChildKvp = LocalSearch(c, forest, bestVal, _args.Epsilon); configurations.Add(bestChildKvp); } // Additional set of random configurations to choose from during local search. ParameterSet[] randomConfigs = _randomSweeper.ProposeSweeps(_args.NumRandomEISearchConfigurations, previousRuns); double[] randomEIs = EvaluateConfigurationsByEI(forest, bestVal, randomConfigs); _host.Assert(randomConfigs.Length == randomEIs.Length); for (int i = 0; i < randomConfigs.Length; i++) { configurations.Add(new Tuple <double, ParameterSet>(randomEIs[i], randomConfigs[i])); } HashSet <ParameterSet> retainedConfigs = new HashSet <ParameterSet>(); IOrderedEnumerable <Tuple <double, ParameterSet> > bestConfigurations = configurations.OrderByDescending(x => x.Item1); foreach (Tuple <double, ParameterSet> t in bestConfigurations.Take(numOfCandidates)) { retainedConfigs.Add(t.Item2); } return(retainedConfigs.ToArray()); }
/// <summary> /// Generates a set of candidate configurations to sweep through, based on a combination of random and local /// search, as outlined in Hutter et al - Sequential Model-Based Optimization for General Algorithm Configuration. /// Makes use of class private members which determine how many candidates are returned. This number will include /// random configurations interleaved (per the paper), and thus will be double the specified value. /// </summary> /// <param name="numOfCandidates">Number of candidate solutions to return.</param> /// <param name="previousRuns">History of previously evaluated points, with their emprical performance values.</param> /// <param name="forest">Trained random forest ensemble. Used in evaluating the candidates.</param> /// <returns>An array of ParamaterSets which are the candidate configurations to sweep.</returns> private ParameterSet[] GenerateCandidateConfigurations(int numOfCandidates, IEnumerable <IRunResult> previousRuns, FastForestRegressionPredictor forest) { // Get k best previous runs ParameterSets. ParameterSet[] bestKParamSets = GetKBestConfigurations(previousRuns, forest, _args.LocalSearchParentCount); // Perform local searches using the k best previous run configurations. ParameterSet[] eiChallengers = GreedyPlusRandomSearch(bestKParamSets, forest, (int)Math.Ceiling(numOfCandidates / 2.0F), previousRuns); // Generate another set of random configurations to interleave. ParameterSet[] randomChallengers = _randomSweeper.ProposeSweeps(numOfCandidates - eiChallengers.Length, previousRuns); // Return interleaved challenger candidates with random candidates. Since the number of candidates from either can be less than // the number asked for, since we only generate unique candidates, and the number from either method may vary considerably. ParameterSet[] configs = new ParameterSet[eiChallengers.Length + randomChallengers.Length]; Array.Copy(eiChallengers, 0, configs, 0, eiChallengers.Length); Array.Copy(randomChallengers, 0, configs, eiChallengers.Length, randomChallengers.Length); return(configs); }
private ParameterSet[] GetKBestConfigurations(IEnumerable <IRunResult> previousRuns, FastForestRegressionPredictor forest, int k = 10) { // NOTE: Should we change this to rank according to EI (using forest), instead of observed performance? SortedSet <RunResult> bestK = new SortedSet <RunResult>(); foreach (RunResult r in previousRuns) { RunResult worst = bestK.Min(); if (bestK.Count < k || r.CompareTo(worst) > 0) { bestK.Add(r); } if (bestK.Count > k) { bestK.Remove(worst); } } // Extract the ParamaterSets and return. List <ParameterSet> outSet = new List <ParameterSet>(); foreach (RunResult r in bestK) { outSet.Add(r.ParameterSet); } return(outSet.ToArray()); }
private double[] EvaluateConfigurationsByEI(FastForestRegressionPredictor forest, double bestVal, ParameterSet[] configs) { double[][] leafPredictions = GetForestRegressionLeafValues(forest, configs); double[][] forestStatistics = ComputeForestStats(leafPredictions); return(ComputeEIs(bestVal, forestStatistics)); }
/// <summary> /// Generates a set of candidate configurations to sweep through, based on a combination of random and local /// search, as outlined in Hutter et al - Sequential Model-Based Optimization for General Algorithm Configuration. /// Makes use of class private members which determine how many candidates are returned. This number will include /// random configurations interleaved (per the paper), and thus will be double the specified value. /// </summary> /// <param name="numOfCandidates">Number of candidate solutions to return.</param> /// <param name="previousRuns">History of previously evaluated points, with their emprical performance values.</param> /// <param name="forest">Trained random forest ensemble. Used in evaluating the candidates.</param> /// <returns>An array of ParamaterSets which are the candidate configurations to sweep.</returns> private ParameterSet[] GenerateCandidateConfigurations(int numOfCandidates, IEnumerable <IRunResult> previousRuns, FastForestRegressionPredictor forest) { ParameterSet[] configs = new ParameterSet[numOfCandidates]; // Get k best previous runs ParameterSets. ParameterSet[] bestKParamSets = GetKBestConfigurations(previousRuns, forest, _args.LocalSearchParentCount); // Perform local searches using the k best previous run configurations. ParameterSet[] eiChallengers = GreedyPlusRandomSearch(bestKParamSets, forest, (int)Math.Ceiling(numOfCandidates / 2.0F), previousRuns); // Generate another set of random configurations to interleave ParameterSet[] randomChallengers = _randomSweeper.ProposeSweeps(numOfCandidates - eiChallengers.Length, previousRuns); // Return interleaved challenger candidates with random candidates for (int j = 0; j < configs.Length; j++) { configs[j] = j % 2 == 0 ? eiChallengers[j / 2] : randomChallengers[j / 2]; } return(configs); }