Esempio n. 1
0
        private FastForestRegressionPredictor FitModel(IEnumerable <IRunResult> previousRuns)
        {
            Single[]   targets  = new Single[previousRuns.Count()];
            Single[][] features = new Single[previousRuns.Count()][];

            int i = 0;

            foreach (RunResult r in previousRuns)
            {
                features[i] = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, r.ParameterSet, true);
                targets[i]  = (Float)r.MetricValue;
                i++;
            }

            ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_host);

            dvBuilder.AddColumn(DefaultColumnNames.Label, NumberType.Float, targets);
            dvBuilder.AddColumn(DefaultColumnNames.Features, NumberType.Float, features);

            IDataView view = dvBuilder.GetDataView();

            _host.Assert(view.GetRowCount() == targets.Length, "This data view will have as many rows as there have been evaluations");
            RoleMappedData data = new RoleMappedData(view, DefaultColumnNames.Label, DefaultColumnNames.Features);

            using (IChannel ch = _host.Start("Single training"))
            {
                // Set relevant random forest arguments.
                FastForestRegression.Arguments args = new FastForestRegression.Arguments();
                args.FeatureFraction     = _args.SplitRatio;
                args.NumTrees            = _args.NumOfTrees;
                args.MinDocumentsInLeafs = _args.NMinForSplit;

                // Train random forest.
                FastForestRegression trainer = new FastForestRegression(_host, args);
                trainer.Train(data);
                FastForestRegressionPredictor predictor = trainer.CreatePredictor();

                // Return random forest predictor.
                ch.Done();
                return(predictor);
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Does a mix of greedy local search around best performing parameter sets, while throwing random parameter sets into the mix.
        /// </summary>
        /// <param name="parents">Beginning locations for local greedy search.</param>
        /// <param name="forest">Trained random forest, used later for evaluating parameters.</param>
        /// <param name="numOfCandidates">Number of candidate configurations returned by the method (top K).</param>
        /// <param name="previousRuns">Historical run results.</param>
        /// <returns>Array of parameter sets, which will then be evaluated.</returns>
        private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForestRegressionPredictor forest, int numOfCandidates, IEnumerable <IRunResult> previousRuns)
        {
            // REVIEW: The IsMetricMaximizing flag affects the comparator, so that
            // performing Max() should get the best, regardless of if it is maximizing or
            // minimizing.
            RunResult bestRun  = (RunResult)previousRuns.Max();
            RunResult worstRun = (RunResult)previousRuns.Min();
            double    bestVal  = bestRun.IsMetricMaximizing ? bestRun.MetricValue : worstRun.MetricValue - bestRun.MetricValue;

            HashSet <Tuple <double, ParameterSet> > configurations = new HashSet <Tuple <double, ParameterSet> >();

            // Perform local search.
            foreach (ParameterSet c in parents)
            {
                Tuple <double, ParameterSet> bestChildKvp = LocalSearch(c, forest, bestVal, _args.Epsilon);
                configurations.Add(bestChildKvp);
            }

            // Additional set of random configurations to choose from during local search.
            ParameterSet[] randomConfigs = _randomSweeper.ProposeSweeps(_args.NumRandomEISearchConfigurations, previousRuns);
            double[]       randomEIs     = EvaluateConfigurationsByEI(forest, bestVal, randomConfigs);
            _host.Assert(randomConfigs.Length == randomEIs.Length);

            for (int i = 0; i < randomConfigs.Length; i++)
            {
                configurations.Add(new Tuple <double, ParameterSet>(randomEIs[i], randomConfigs[i]));
            }

            HashSet <ParameterSet> retainedConfigs = new HashSet <ParameterSet>();
            IOrderedEnumerable <Tuple <double, ParameterSet> > bestConfigurations = configurations.OrderByDescending(x => x.Item1);

            foreach (Tuple <double, ParameterSet> t in bestConfigurations.Take(numOfCandidates))
            {
                retainedConfigs.Add(t.Item2);
            }

            return(retainedConfigs.ToArray());
        }
Esempio n. 3
0
        /// <summary>
        /// Generates a set of candidate configurations to sweep through, based on a combination of random and local
        /// search, as outlined in Hutter et al - Sequential Model-Based Optimization for General Algorithm Configuration.
        /// Makes use of class private members which determine how many candidates are returned. This number will include
        /// random configurations interleaved (per the paper), and thus will be double the specified value.
        /// </summary>
        /// <param name="numOfCandidates">Number of candidate solutions to return.</param>
        /// <param name="previousRuns">History of previously evaluated points, with their emprical performance values.</param>
        /// <param name="forest">Trained random forest ensemble. Used in evaluating the candidates.</param>
        /// <returns>An array of ParamaterSets which are the candidate configurations to sweep.</returns>
        private ParameterSet[] GenerateCandidateConfigurations(int numOfCandidates, IEnumerable <IRunResult> previousRuns, FastForestRegressionPredictor forest)
        {
            // Get k best previous runs ParameterSets.
            ParameterSet[] bestKParamSets = GetKBestConfigurations(previousRuns, forest, _args.LocalSearchParentCount);

            // Perform local searches using the k best previous run configurations.
            ParameterSet[] eiChallengers = GreedyPlusRandomSearch(bestKParamSets, forest, (int)Math.Ceiling(numOfCandidates / 2.0F), previousRuns);

            // Generate another set of random configurations to interleave.
            ParameterSet[] randomChallengers = _randomSweeper.ProposeSweeps(numOfCandidates - eiChallengers.Length, previousRuns);

            // Return interleaved challenger candidates with random candidates. Since the number of candidates from either can be less than
            // the number asked for, since we only generate unique candidates, and the number from either method may vary considerably.
            ParameterSet[] configs = new ParameterSet[eiChallengers.Length + randomChallengers.Length];
            Array.Copy(eiChallengers, 0, configs, 0, eiChallengers.Length);
            Array.Copy(randomChallengers, 0, configs, eiChallengers.Length, randomChallengers.Length);

            return(configs);
        }
Esempio n. 4
0
        private ParameterSet[] GetKBestConfigurations(IEnumerable <IRunResult> previousRuns, FastForestRegressionPredictor forest, int k = 10)
        {
            // NOTE: Should we change this to rank according to EI (using forest), instead of observed performance?

            SortedSet <RunResult> bestK = new SortedSet <RunResult>();

            foreach (RunResult r in previousRuns)
            {
                RunResult worst = bestK.Min();

                if (bestK.Count < k || r.CompareTo(worst) > 0)
                {
                    bestK.Add(r);
                }

                if (bestK.Count > k)
                {
                    bestK.Remove(worst);
                }
            }

            // Extract the ParamaterSets and return.
            List <ParameterSet> outSet = new List <ParameterSet>();

            foreach (RunResult r in bestK)
            {
                outSet.Add(r.ParameterSet);
            }
            return(outSet.ToArray());
        }
Esempio n. 5
0
 private double[] EvaluateConfigurationsByEI(FastForestRegressionPredictor forest, double bestVal, ParameterSet[] configs)
 {
     double[][] leafPredictions  = GetForestRegressionLeafValues(forest, configs);
     double[][] forestStatistics = ComputeForestStats(leafPredictions);
     return(ComputeEIs(bestVal, forestStatistics));
 }
Esempio n. 6
0
        /// <summary>
        /// Generates a set of candidate configurations to sweep through, based on a combination of random and local
        /// search, as outlined in Hutter et al - Sequential Model-Based Optimization for General Algorithm Configuration.
        /// Makes use of class private members which determine how many candidates are returned. This number will include
        /// random configurations interleaved (per the paper), and thus will be double the specified value.
        /// </summary>
        /// <param name="numOfCandidates">Number of candidate solutions to return.</param>
        /// <param name="previousRuns">History of previously evaluated points, with their emprical performance values.</param>
        /// <param name="forest">Trained random forest ensemble. Used in evaluating the candidates.</param>
        /// <returns>An array of ParamaterSets which are the candidate configurations to sweep.</returns>
        private ParameterSet[] GenerateCandidateConfigurations(int numOfCandidates, IEnumerable <IRunResult> previousRuns, FastForestRegressionPredictor forest)
        {
            ParameterSet[] configs = new ParameterSet[numOfCandidates];

            // Get k best previous runs ParameterSets.
            ParameterSet[] bestKParamSets = GetKBestConfigurations(previousRuns, forest, _args.LocalSearchParentCount);

            // Perform local searches using the k best previous run configurations.
            ParameterSet[] eiChallengers = GreedyPlusRandomSearch(bestKParamSets, forest, (int)Math.Ceiling(numOfCandidates / 2.0F), previousRuns);

            // Generate another set of random configurations to interleave
            ParameterSet[] randomChallengers = _randomSweeper.ProposeSweeps(numOfCandidates - eiChallengers.Length, previousRuns);

            // Return interleaved challenger candidates with random candidates
            for (int j = 0; j < configs.Length; j++)
            {
                configs[j] = j % 2 == 0 ? eiChallengers[j / 2] : randomChallengers[j / 2];
            }

            return(configs);
        }