Beispiel #1
0
        /// <summary>
        /// REVIEW: This was the original CategoriesToWeights function. Should be deprecated once we can validate the new function works
        /// better. It contains a subtle issue, such that categories with poor performance but which are seen a lot will have
        /// high weight. New function addresses this issue, while also improving exploration capability of algorithm.
        /// </summary>
        /// <param name="param"></param>
        /// <param name="previousRuns"></param>
        /// <returns></returns>
        private double[] CategoriesToWeightsOld(DiscreteValueGenerator param, IEnumerable <IRunResult> previousRuns)
        {
            double[] weights = new double[param.Count];
            Dictionary <string, int> labelToIndex = new Dictionary <string, int>();

            // Map categorical values to their index.
            for (int j = 0; j < param.Count; j++)
            {
                labelToIndex[param[j].ValueText] = j;
            }

            // Add pseudo-observations, to account for unobserved parameter settings.
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] = 0.1;
            }

            // Sum up the results for each category value.
            bool isMaximizing = true;

            foreach (RunResult r in previousRuns)
            {
                weights[labelToIndex[r.ParameterSet[param.Name].ValueText]] += r.MetricValue;
                isMaximizing = r.IsMetricMaximizing;
            }

            // Normalize weights to sum to one and return
            return(isMaximizing ? SweeperProbabilityUtils.Normalize(weights) : SweeperProbabilityUtils.InverseNormalize(weights));
        }
Beispiel #2
0
        /// <summary>
        /// Converts a set of history into a set of weights, one for each run in the history.
        /// </summary>
        /// <param name="history">Input set of historical runs.</param>
        /// <param name="n">Number of total runs (history may be truncated)</param>
        /// <param name="rMean">Mean metric value of previous random runs.</param>
        /// <param name="rVar">Metric value empirical variance of previous random runs.</param>
        /// <returns>Array of weights.</returns>
        private double[] HistoryToWeights(IRunResult[] history, int n, double rMean, double rVar)
        {
            // Extract weights and normalize.
            double[] weights = new double[history.Length];

            for (int i = 0; i < history.Length; i++)
            {
                weights[i] = (double)history[i].MetricValue;
            }

            // Fitness proportional scaling constant.
            bool   isMinimizing   = history.Length > 0 && !history[0].IsMetricMaximizing;
            double currentMaxPerf = isMinimizing ? SweeperProbabilityUtils.NormalCdf(2 * rMean - weights.Min(), rMean, rVar) : SweeperProbabilityUtils.NormalCdf(weights.Max(), rMean, rVar);

            // Normalize weights to sum to one. Automatically Takes care of case where all are equal to zero.
            weights = isMinimizing ? SweeperProbabilityUtils.InverseNormalize(weights) : SweeperProbabilityUtils.Normalize(weights);

            // Scale weights. (Concentrates mass on good points, depending on how good the best currently is.)
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] = _args.Simple ? Math.Pow(weights[i], Math.Min(Math.Sqrt(n), 100)) : Math.Pow(weights[i], _args.WeightRescalingPower * currentMaxPerf);
            }

            weights = SweeperProbabilityUtils.Normalize(weights);

            return(weights);
        }
Beispiel #3
0
        private double ComputeEI(double bestVal, double[] forestStatistics)
        {
            double empMean   = forestStatistics[0];
            double empStdDev = forestStatistics[1];
            double centered  = empMean - bestVal;
            double ztrans    = centered / empStdDev;

            return(centered * SweeperProbabilityUtils.StdNormalCdf(ztrans) + empStdDev * SweeperProbabilityUtils.StdNormalPdf(ztrans));
        }
Beispiel #4
0
 public KdoSweeper(Arguments args)
 {
     _args             = args;
     _sweepParameters  = args.SweptParameters.ToArray();
     _randomSweeper    = new UniformRandomSweeper(new SweeperBase.ArgumentsBase(), _sweepParameters);
     _redundantSweeper = new UniformRandomSweeper(new SweeperBase.ArgumentsBase {
         Retries = 0
     }, _sweepParameters);
     _spu = new SweeperProbabilityUtils();
     _alreadySeenConfigs = new SortedSet <Float[]>(new FloatArrayComparer());
     _randomParamSets    = new List <ParameterSet>();
 }
Beispiel #5
0
        /// <summary>
        /// New version of CategoryToWeights method, which fixes an issue where we could
        /// potentially assign a lot of mass to bad categories.
        /// </summary>
        private double[] CategoriesToWeights(DiscreteValueGenerator param, IRunResult[] previousRuns)
        {
            double[] weights = new double[param.Count];
            Dictionary <string, int> labelToIndex = new Dictionary <string, int>();

            int[] counts = new int[param.Count];

            // Map categorical values to their index.
            for (int j = 0; j < param.Count; j++)
            {
                labelToIndex[param[j].ValueText] = j;
            }

            // Add mass according to performance
            bool isMaximizing = true;

            foreach (RunResult r in previousRuns)
            {
                weights[labelToIndex[r.ParameterSet[param.Name].ValueText]] += r.MetricValue;
                counts[labelToIndex[r.ParameterSet[param.Name].ValueText]]++;
                isMaximizing = r.IsMetricMaximizing;
            }

            // Take average mass for each category
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] /= (counts[i] > 0 ? counts[i] : 1);
            }

            // If any learner has not been seen, default its average to
            // best value to encourage exploration of untried algorithms.
            double bestVal = isMaximizing ?
                             previousRuns.Cast <RunResult>().Where(r => r.HasMetricValue).Max(r => r.MetricValue) :
                             previousRuns.Cast <RunResult>().Where(r => r.HasMetricValue).Min(r => r.MetricValue);

            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] += counts[i] == 0 ? bestVal : 0;
            }

            // Normalize weights to sum to one and return
            return(isMaximizing ? SweeperProbabilityUtils.Normalize(weights) : SweeperProbabilityUtils.InverseNormalize(weights));
        }
Beispiel #6
0
        /// <summary>
        /// Goes through forest to extract the set of leaf values associated with filtering each configuration.
        /// </summary>
        /// <param name="forest">Trained forest predictor, used for filtering configs.</param>
        /// <param name="configs">Parameter configurations.</param>
        /// <returns>2D array where rows correspond to configurations, and columns to the predicted leaf values.</returns>
        private double[][] GetForestRegressionLeafValues(FastForestRegressionModelParameters forest, ParameterSet[] configs)
        {
            List <double[]> datasetLeafValues = new List <double[]>();

            foreach (ParameterSet config in configs)
            {
                List <double> leafValues = new List <double>();
                for (var treeId = 0; treeId < _args.NumOfTrees; treeId++)
                {
                    Float[]         transformedParams = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, config, true);
                    VBuffer <Float> features          = new VBuffer <Float>(transformedParams.Length, transformedParams);
                    List <int>      path      = null;
                    var             leafId    = forest.GetLeaf(treeId, features, ref path);
                    var             leafValue = forest.GetLeafValue(treeId, leafId);
                    leafValues.Add(leafValue);
                }
                datasetLeafValues.Add(leafValues.ToArray());
            }
            return(datasetLeafValues.ToArray());
        }
Beispiel #7
0
        private FastForestRegressionModelParameters FitModel(IEnumerable <IRunResult> previousRuns)
        {
            Single[]   targets  = new Single[previousRuns.Count()];
            Single[][] features = new Single[previousRuns.Count()][];

            int i = 0;

            foreach (RunResult r in previousRuns)
            {
                features[i] = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, r.ParameterSet, true);
                targets[i]  = (Float)r.MetricValue;
                i++;
            }

            ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_context);

            dvBuilder.AddColumn(DefaultColumnNames.Label, NumberType.Float, targets);
            dvBuilder.AddColumn(DefaultColumnNames.Features, NumberType.Float, features);

            IDataView data = dvBuilder.GetDataView();

            AutoMlUtils.Assert(data.GetRowCount() == targets.Length, "This data view will have as many rows as there have been evaluations");

            // Set relevant random forest arguments.
            // Train random forest.
            var trainer = new FastForestRegression(_context, DefaultColumnNames.Label, DefaultColumnNames.Features, advancedSettings: s =>
            {
                s.FeatureFraction     = _args.SplitRatio;
                s.NumTrees            = _args.NumOfTrees;
                s.MinDocumentsInLeafs = _args.NMinForSplit;
            });
            var predictor = trainer.Train(data).Model;

            // Return random forest predictor.
            return(predictor);
        }
Beispiel #8
0
        /// <summary>
        /// Computes a single-mutation neighborhood (one param at a time) for a given configuration. For
        /// numeric parameters, samples K mutations (i.e., creates K neighbors based on that paramater).
        /// </summary>
        /// <param name="parent">Starting configuration.</param>
        /// <returns>A set of configurations that each differ from parent in exactly one parameter.</returns>
        private ParameterSet[] GetOneMutationNeighborhood(ParameterSet parent)
        {
            List <ParameterSet>     neighbors = new List <ParameterSet>();
            SweeperProbabilityUtils spu       = new SweeperProbabilityUtils();

            for (int i = 0; i < _sweepParameters.Length; i++)
            {
                // This allows us to query possible values of this parameter.
                IValueGenerator sweepParam = _sweepParameters[i];

                // This holds the actual value for this parameter, chosen in this parameter set.
                IParameterValue pset = parent[sweepParam.Name];

                AutoMlUtils.Assert(pset != null);

                DiscreteValueGenerator parameterDiscrete = sweepParam as DiscreteValueGenerator;
                if (parameterDiscrete != null)
                {
                    // Create one neighbor for every discrete parameter.
                    Float[] neighbor = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, parent, false);

                    int hotIndex = -1;
                    for (int j = 0; j < parameterDiscrete.Count; j++)
                    {
                        if (parameterDiscrete[j].Equals(pset))
                        {
                            hotIndex = j;
                            break;
                        }
                    }

                    AutoMlUtils.Assert(hotIndex >= 0);

                    Random r           = new Random();
                    int    randomIndex = r.Next(0, parameterDiscrete.Count - 1);
                    randomIndex += randomIndex >= hotIndex ? 1 : 0;
                    neighbor[i]  = randomIndex;
                    neighbors.Add(SweeperProbabilityUtils.FloatArrayAsParameterSet(_sweepParameters, neighbor, false));
                }
                else
                {
                    INumericValueGenerator parameterNumeric = sweepParam as INumericValueGenerator;
                    AutoMlUtils.Assert(parameterNumeric != null, "SMAC sweeper can only sweep over discrete and numeric parameters");

                    // Create k neighbors (typically 4) for every numerical parameter.
                    for (int j = 0; j < _args.NumNeighborsForNumericalParams; j++)
                    {
                        Float[] neigh  = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, parent, false);
                        double  newVal = spu.NormalRVs(1, neigh[i], 0.2)[0];
                        while (newVal <= 0.0 || newVal >= 1.0)
                        {
                            newVal = spu.NormalRVs(1, neigh[i], 0.2)[0];
                        }
                        neigh[i] = (Float)newVal;
                        ParameterSet neighbor = SweeperProbabilityUtils.FloatArrayAsParameterSet(_sweepParameters, neigh, false);
                        neighbors.Add(neighbor);
                    }
                }
            }
            return(neighbors.ToArray());
        }
Beispiel #9
0
        /// <summary>
        /// Sample child configuration from configuration centered at parent, using fitness proportional mutation.
        /// </summary>
        /// <param name="parent">Starting parent configuration (used as mean in multivariate Gaussian).</param>
        /// <param name="fitness">Numeric value indicating how good a configuration parent is.</param>
        /// <param name="n">Count of how many items currently in history.</param>
        /// <param name="previousRuns">Run history.</param>
        /// <param name="rMean">Mean metric value of previous random runs.</param>
        /// <param name="rVar">Metric value empirical variance of previous random runs.</param>
        /// <param name="isMetricMaximizing">Flag for if we are minimizing or maximizing values.</param>
        /// <returns>A mutated version of parent (i.e., point sampled near parent).</returns>
        private ParameterSet SampleChild(ParameterSet parent, double fitness, int n, IRunResult[] previousRuns, double rMean, double rVar, bool isMetricMaximizing)
        {
            Float[]       child = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, parent, false);
            List <int>    numericParamIndices = new List <int>();
            List <double> numericParamValues  = new List <double>();
            int           loopCount           = 0;

            // Interleave uniform random samples, according to proportion defined.
            if (_spu.SampleUniform() <= _args.ProportionRandom)
            {
                ParameterSet ps = _randomSweeper.ProposeSweeps(1)[0];
                _randomParamSets.Add(ps);
                return(ps);
            }

            do
            {
                for (int i = 0; i < _sweepParameters.Length; i++)
                {
                    // This allows us to query possible values of this parameter.
                    var sweepParam = _sweepParameters[i];

                    if (sweepParam is DiscreteValueGenerator parameterDiscrete)
                    {
                        // Sample categorical parameter.
                        double[] categoryWeights = _args.LegacyDpBehavior
                            ? CategoriesToWeightsOld(parameterDiscrete, previousRuns)
                            : CategoriesToWeights(parameterDiscrete, previousRuns);
                        child[i] = SampleCategoricalDist(1, categoryWeights)[0];
                    }
                    else
                    {
                        var parameterNumeric = sweepParam as INumericValueGenerator;
                        numericParamIndices.Add(i);
                        numericParamValues.Add(child[i]);
                    }
                }

                if (numericParamIndices.Count > 0)
                {
                    if (!_args.Beta)
                    {
                        // Sample point from multivariate Gaussian, centered on parent values, with mutation proportional to fitness.
                        double[] mu           = numericParamValues.ToArray();
                        double   correctedVal = isMetricMaximizing
                            ? 1.0 - SweeperProbabilityUtils.NormalCdf(fitness, rMean, rVar)
                            : 1.0 - SweeperProbabilityUtils.NormalCdf(2 * rMean - fitness, rMean, rVar);
                        double     bandwidthScale  = Math.Max(_args.MinimumMutationSpread, correctedVal);
                        double[]   stddevs         = Enumerable.Repeat(_args.Simple ? 0.2 : bandwidthScale, mu.Length).ToArray();
                        double[][] bandwidthMatrix = BuildBandwidthMatrix(n, stddevs);
                        double[]   sampledPoint    = SampleDiagonalCovMultivariateGaussian(1, mu, bandwidthMatrix)[0];
                        for (int j = 0; j < sampledPoint.Length; j++)
                        {
                            child[numericParamIndices[j]] = (Float)Corral(sampledPoint[j]);
                        }
                    }
                    else
                    {
                        // If Beta flag set, sample from independent Beta distributions instead.
                        double alpha = 1 + 15 * fitness;
                        foreach (int index in numericParamIndices)
                        {
                            const double epsCutoff = 1e-10;
                            double       eps       = Math.Min(Math.Max(child[index], epsCutoff), 1 - epsCutoff);
                            double       beta      = alpha / eps - alpha;
                            child[index] = (Float)Stats.SampleFromBeta(alpha, beta);
                        }
                    }
                }

                // Don't get stuck at local point.
                loopCount++;
                if (loopCount > 10)
                {
                    return(_randomSweeper.ProposeSweeps(1, null)[0]);
                }
            } while (_alreadySeenConfigs.Contains(child));

            _alreadySeenConfigs.Add(child);
            return(SweeperProbabilityUtils.FloatArrayAsParameterSet(_sweepParameters, child, false));
        }