protected PipelineOptimizerBase(IHostEnvironment env, IHost host)
 {
     Env  = env;
     Host = host;
     TransformsMaskValidity = new Dictionary <long, bool>();
     VisitedPipelines       = new HashSet <string>();
     ProbUtils = new SweeperProbabilityUtils(host);
 }
Exemple #2
0
        private double ComputeEI(double bestVal, double[] forestStatistics)
        {
            double empMean   = forestStatistics[0];
            double empStdDev = forestStatistics[1];
            double centered  = empMean - bestVal;
            double ztrans    = centered / empStdDev;

            return(centered * SweeperProbabilityUtils.StdNormalCdf(ztrans) + empStdDev * SweeperProbabilityUtils.StdNormalPdf(ztrans));
        }
        /// <summary>
        /// Goes through forest to extract the set of leaf values associated with filtering each configuration.
        /// </summary>
        /// <param name="forest">Trained forest predictor, used for filtering configs.</param>
        /// <param name="configs">Parameter configurations.</param>
        /// <returns>2D array where rows correspond to configurations, and columns to the predicted leaf values.</returns>
        private double[][] GetForestRegressionLeafValues(FastForestRegressionPredictor forest, ParameterSet[] configs)
        {
            List <double[]> datasetLeafValues = new List <double[]>();
            var             e = forest.TrainedEnsemble;

            foreach (ParameterSet config in configs)
            {
                List <double> leafValues = new List <double>();
                foreach (RegressionTree t in e.Trees)
                {
                    Float[]         transformedParams = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, config, true);
                    VBuffer <Float> features          = new VBuffer <Float>(transformedParams.Length, transformedParams);
                    leafValues.Add((Float)t.LeafValues[t.GetLeaf(in features)]);
        protected double[] LearnerHistoryToWeights(PipelinePattern[] history, bool isMaximizingMetric)
        {
            int numLearners = AvailableLearners.Length;

            double[] weights = new double[numLearners];
            int[]    counts  = new int[numLearners];
            Dictionary <string, int> labelToIndex = new Dictionary <string, int>();
            double maxWeight = history.Length > 0 ? history.Max(w => w.PerformanceSummary.MetricValue) : 0d;

            // Map categorical values to their index
            for (int j = 0; j < numLearners; j++)
            {
                labelToIndex[AvailableLearners[j].LearnerName] = j;
            }

            // Add mass according to performance
            foreach (var pipeline in history)
            {
                if (AvailableLearners.All(l => l.LearnerName != pipeline.Learner.LearnerName))
                {
                    continue;
                }
                weights[labelToIndex[pipeline.Learner.LearnerName]] +=
                    AutoMlUtils.ProcessWeight(pipeline.PerformanceSummary.MetricValue,
                                              maxWeight, isMaximizingMetric);
                counts[labelToIndex[pipeline.Learner.LearnerName]]++;
            }

            // Take average mass for each learner
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] /= counts[i] > 0 ? counts[i] : 1;
            }

            // If any learner has not been seen, default its average to 1.0
            // to encourage exploration of untried algorithms.
            for (int i = 0; i < weights.Length; i++)
            {
                weights[i] += counts[i] == 0 ? 1 : 0;
            }

            // Normalize weights to sum to one and return
            return(SweeperProbabilityUtils.Normalize(weights));
        }
Exemple #5
0
        private FastForestRegressionModelParameters FitModel(IEnumerable <IRunResult> previousRuns)
        {
            Single[]   targets  = new Single[previousRuns.Count()];
            Single[][] features = new Single[previousRuns.Count()][];

            int i = 0;

            foreach (RunResult r in previousRuns)
            {
                features[i] = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, r.ParameterSet, true);
                targets[i]  = (float)r.MetricValue;
                i++;
            }

            ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_host);

            dvBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single, targets);
            dvBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single, features);

            IDataView view = dvBuilder.GetDataView();

            _host.Assert(view.GetRowCount() == targets.Length, "This data view will have as many rows as there have been evaluations");

            using (IChannel ch = _host.Start("Single training"))
            {
                // Set relevant random forest arguments.
                // Train random forest.
                var trainer = new FastForestRegressionTrainer(_host,
                                                              new FastForestRegressionTrainer.Options
                {
                    FeatureFraction            = _args.SplitRatio,
                    NumberOfTrees              = _args.NumOfTrees,
                    MinimumExampleCountPerLeaf = _args.NMinForSplit,
                    LabelColumnName            = DefaultColumnNames.Label,
                    FeatureColumnName          = DefaultColumnNames.Features,
                });
                var predictor = trainer.Fit(view);

                // Return random forest predictor.
                return(predictor.Model);
            }
        }
        private FastForestRegressionPredictor FitModel(IEnumerable <IRunResult> previousRuns)
        {
            Single[]   targets  = new Single[previousRuns.Count()];
            Single[][] features = new Single[previousRuns.Count()][];

            int i = 0;

            foreach (RunResult r in previousRuns)
            {
                features[i] = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, r.ParameterSet, true);
                targets[i]  = (Float)r.MetricValue;
                i++;
            }

            ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_host);

            dvBuilder.AddColumn("Label", NumberType.Float, targets);
            dvBuilder.AddColumn("Features", NumberType.Float, features);

            IDataView view = dvBuilder.GetDataView();

            _host.Assert(view.GetRowCount() == targets.Length, "This data view will have as many rows as there have been evaluations");
            RoleMappedData data = TrainUtils.CreateExamples(view, "Label", "Features");

            using (IChannel ch = _host.Start("Single training"))
            {
                // Set relevant random forest arguments.
                FastForestRegression.Arguments args = new FastForestRegression.Arguments();
                args.FeatureFraction     = _args.SplitRatio;
                args.NumTrees            = _args.NumOfTrees;
                args.MinDocumentsInLeafs = _args.NMinForSplit;

                // Train random forest.
                FastForestRegression trainer = new FastForestRegression(_host, args);
                trainer.Train(data);
                FastForestRegressionPredictor predictor = trainer.CreatePredictor();

                // Return random forest predictor.
                ch.Done();
                return(predictor);
            }
        }
        protected void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, ISweeper sweeper,
                                             bool isMaximizingMetric, PipelinePattern[] history)
        {
            // Make sure there are hyperparameters to sweep over.
            var hyperParams = learner.PipelineNode.SweepParams;

            if (hyperParams.Length == 0)
            {
                return;
            }

            // Get new set of hyperparameter values.
            var proposedParamSet = sweeper.ProposeSweeps(1, AutoMlUtils.ConvertToRunResults(history, isMaximizingMetric)).First();

            Env.Assert(proposedParamSet != null && proposedParamSet.All(ps => hyperParams.Any(hp => hp.Name == ps.Name)));

            // Associate proposed param set with learner, so that smart hyperparam
            // sweepers (like KDO) can map them back.
            learner.PipelineNode.HyperSweeperParamSet = proposedParamSet;

            var generatorSet = hyperParams.Select(AutoMlUtils.ToIValueGenerator).ToArray();
            var values       = SweeperProbabilityUtils.ParameterSetAsFloatArray(Host, generatorSet, proposedParamSet, false);

            // Update hyperparameters.
            for (int i = 0; i < hyperParams.Length; i++)
            {
                if (hyperParams[i] is TlcModule.SweepableDiscreteParamAttribute dp)
                {
                    hyperParams[i].RawValue = (int)values[i];
                }
                else
                {
                    hyperParams[i].RawValue = values[i];
                }
            }
        }
        /// <summary>
        /// Computes a single-mutation neighborhood (one param at a time) for a given configuration. For
        /// numeric parameters, samples K mutations (i.e., creates K neighbors based on that paramater).
        /// </summary>
        /// <param name="parent">Starting configuration.</param>
        /// <returns>A set of configurations that each differ from parent in exactly one parameter.</returns>
        private ParameterSet[] GetOneMutationNeighborhood(ParameterSet parent)
        {
            List <ParameterSet>     neighbors = new List <ParameterSet>();
            SweeperProbabilityUtils spu       = new SweeperProbabilityUtils(_host);

            for (int i = 0; i < _sweepParameters.Length; i++)
            {
                // This allows us to query possible values of this parameter.
                IValueGenerator sweepParam = _sweepParameters[i];

                // This holds the actual value for this parameter, chosen in this parameter set.
                IParameterValue pset = parent[sweepParam.Name];

                _host.AssertValue(pset);

                DiscreteValueGenerator parameterDiscrete = sweepParam as DiscreteValueGenerator;
                if (parameterDiscrete != null)
                {
                    // Create one neighbor for every discrete parameter.
                    Float[] neighbor = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, parent, false);

                    int hotIndex = -1;
                    for (int j = 0; j < parameterDiscrete.Count; j++)
                    {
                        if (parameterDiscrete[j].Equals(pset))
                        {
                            hotIndex = j;
                            break;
                        }
                    }

                    _host.Assert(hotIndex >= 0);

                    Random r           = new Random();
                    int    randomIndex = r.Next(0, parameterDiscrete.Count - 1);
                    randomIndex += randomIndex >= hotIndex ? 1 : 0;
                    neighbor[i]  = randomIndex;
                    neighbors.Add(SweeperProbabilityUtils.FloatArrayAsParameterSet(_host, _sweepParameters, neighbor, false));
                }
                else
                {
                    INumericValueGenerator parameterNumeric = sweepParam as INumericValueGenerator;
                    _host.Check(parameterNumeric != null, "SMAC sweeper can only sweep over discrete and numeric parameters");

                    // Create k neighbors (typically 4) for every numerical parameter.
                    for (int j = 0; j < _args.NumNeighborsForNumericalParams; j++)
                    {
                        Float[] neigh  = SweeperProbabilityUtils.ParameterSetAsFloatArray(_host, _sweepParameters, parent, false);
                        double  newVal = spu.NormalRVs(1, neigh[i], 0.2)[0];
                        while (newVal <= 0.0 || newVal >= 1.0)
                        {
                            newVal = spu.NormalRVs(1, neigh[i], 0.2)[0];
                        }
                        neigh[i] = (Float)newVal;
                        ParameterSet neighbor = SweeperProbabilityUtils.FloatArrayAsParameterSet(_host, _sweepParameters, neigh, false);
                        neighbors.Add(neighbor);
                    }
                }
            }
            return(neighbors.ToArray());
        }