public FloatParameterValue(string name, Float value) { AutoMlUtils.Assert(!Float.IsNaN(value)); _name = name; _value = value; _valueText = _value.ToString("R"); }
public bool InRange(IParameterValue value) { var valueTyped = value as FloatParameterValue; AutoMlUtils.Assert(valueTyped != null, "Parameter should be of type FloatParameterValue"); return(_args.Min <= valueTyped.Value && valueTyped.Value <= _args.Max); }
public static Float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, ParameterSet ps, bool expandCategoricals = true) { AutoMlUtils.Assert(ps.Count == sweepParams.Length); var result = new List <Float>(); for (int i = 0; i < sweepParams.Length; i++) { // This allows us to query possible values of this parameter. var sweepParam = sweepParams[i]; // This holds the actual value for this parameter, chosen in this parameter set. var pset = ps[sweepParam.Name]; AutoMlUtils.Assert(pset != null); var parameterDiscrete = sweepParam as DiscreteValueGenerator; if (parameterDiscrete != null) { int hotIndex = -1; for (int j = 0; j < parameterDiscrete.Count; j++) { if (parameterDiscrete[j].Equals(pset)) { hotIndex = j; break; } } AutoMlUtils.Assert(hotIndex >= 0); if (expandCategoricals) { for (int j = 0; j < parameterDiscrete.Count; j++) { result.Add(j == hotIndex ? 1 : 0); } } else { result.Add(hotIndex); } } else if (sweepParam is LongValueGenerator lvg) { // Normalizing all numeric parameters to [0,1] range. result.Add(lvg.NormalizeValue(new LongParameterValue(pset.Name, long.Parse(pset.ValueText)))); } else if (sweepParam is FloatValueGenerator fvg) { // Normalizing all numeric parameters to [0,1] range. result.Add(fvg.NormalizeValue(new FloatParameterValue(pset.Name, float.Parse(pset.ValueText)))); } else { throw new InvalidOperationException("Smart sweeper can only sweep over discrete and numeric parameters"); } } return(result.ToArray()); }
public FloatValueGenerator(FloatParamArguments args) { AutoMlUtils.Assert(args.Min < args.Max, "min must be less than max"); // REVIEW: this condition can be relaxed if we change the math below to deal with it AutoMlUtils.Assert(!args.LogBase || args.Min > 0, "min must be positive if log scale is used"); AutoMlUtils.Assert(!args.LogBase || args.StepSize == null || args.StepSize > 1, "StepSize must be greater than 1 if log scale is used"); AutoMlUtils.Assert(args.LogBase || args.StepSize == null || args.StepSize > 0, "StepSize must be greater than 0 if linear scale is used"); _args = args; }
private ParameterSet[] GenerateChildConfigurations(IRunResult[] history, int[] parentIndicies, double[] weights, IRunResult[] previousRuns, double rMean, double rVar) { AutoMlUtils.Assert(history.Length == weights.Length && parentIndicies.Max() < history.Length); List <ParameterSet> children = new List <ParameterSet>(); for (int i = 0; i < parentIndicies.Length; i++) { RunResult parent = (RunResult)history[parentIndicies[i]]; children.Add(SampleChild(parent.ParameterSet, parent.MetricValue, history.Length, previousRuns, rMean, rVar, parent.IsMetricMaximizing)); } return(children.ToArray()); }
public Float NormalizeValue(IParameterValue value) { var valueTyped = value as FloatParameterValue; AutoMlUtils.Assert(valueTyped != null, "FloatValueGenerator could not normalized parameter because it is not of the correct type"); AutoMlUtils.Assert(_args.Min <= valueTyped.Value && valueTyped.Value <= _args.Max, "Value not in correct range"); if (_args.LogBase) { Float logBase = (Float)(_args.StepSize ?? Math.Pow(1.0 * _args.Max / _args.Min, 1.0 / (_args.NumSteps - 1))); return((Float)((Math.Log(valueTyped.Value, logBase) - Math.Log(_args.Min, logBase)) / (Math.Log(_args.Max, logBase) - Math.Log(_args.Min, logBase)))); } else { return((valueTyped.Value - _args.Min) / (_args.Max - _args.Min)); } }
public static ParameterSet FloatArrayAsParameterSet(IValueGenerator[] sweepParams, Float[] array, bool expandedCategoricals = true) { AutoMlUtils.Assert(array.Length == sweepParams.Length); List <IParameterValue> parameters = new List <IParameterValue>(); int currentArrayIndex = 0; for (int i = 0; i < sweepParams.Length; i++) { var parameterDiscrete = sweepParams[i] as DiscreteValueGenerator; if (parameterDiscrete != null) { if (expandedCategoricals) { int hotIndex = -1; for (int j = 0; j < parameterDiscrete.Count; j++) { if (array[i + j] > 0) { hotIndex = j; break; } } AutoMlUtils.Assert(hotIndex >= i); parameters.Add(new StringParameterValue(sweepParams[i].Name, parameterDiscrete[hotIndex].ValueText)); currentArrayIndex += parameterDiscrete.Count; } else { parameters.Add(new StringParameterValue(sweepParams[i].Name, parameterDiscrete[(int)array[currentArrayIndex]].ValueText)); currentArrayIndex++; } } else { parameters.Add(sweepParams[i].CreateFromNormalized(array[currentArrayIndex])); currentArrayIndex++; } } return(new ParameterSet(parameters)); }
public virtual ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable <IRunResult> previousRuns = null) { var prevParamSets = previousRuns?.Select(r => r.ParameterSet).ToList() ?? new List <ParameterSet>(); var result = new HashSet <ParameterSet>(); for (int i = 0; i < maxSweeps; i++) { ParameterSet paramSet; int retries = 0; do { paramSet = CreateParamSet(); ++retries; } while (paramSet != null && retries < _args.Retries && (AlreadyGenerated(paramSet, prevParamSets) || AlreadyGenerated(paramSet, result))); AutoMlUtils.Assert(paramSet != null); result.Add(paramSet); } return(result.ToArray()); }
/// <summary> /// REVIEW: Assumes metric is between 0.0 and 1.0. Will not work with metrics that have values outside this range. /// </summary> private ParameterSet[] GenerateCandidateConfigurations(int numOfCandidates, IRunResult[] previousRuns) { AutoMlUtils.Assert(previousRuns != null && previousRuns.Length > 1); IRunResult[] history = previousRuns; int totalHistoryLength = history.Length; // Reduce length of history if necessary. if (history.Length > _args.HistoryLength) { history = TruncateHistory(history); } double[] randomVals = ExtractRandomRunValues(previousRuns); double rMean = VectorUtils.GetMean(randomVals); // Add a small amount of variance for unlikely edge cases when all values were identical (i.e., zero variance). // Should not happen, but adding a small variance ensures it will never cause problems if it does. double rVar = Math.Pow(VectorUtils.GetStandardDeviation(randomVals), 2) + 1e-10; double[] weights = HistoryToWeights(history, totalHistoryLength, rMean, rVar); int[] parentIndicies = SampleCategoricalDist(numOfCandidates, weights); return(GenerateChildConfigurations(history, parentIndicies, weights, previousRuns, rMean, rVar)); }
/// <summary> /// Does a mix of greedy local search around best performing parameter sets, while throwing random parameter sets into the mix. /// </summary> /// <param name="parents">Beginning locations for local greedy search.</param> /// <param name="forest">Trained random forest, used later for evaluating parameters.</param> /// <param name="numOfCandidates">Number of candidate configurations returned by the method (top K).</param> /// <param name="previousRuns">Historical run results.</param> /// <returns>Array of parameter sets, which will then be evaluated.</returns> private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForestRegressionModelParameters forest, int numOfCandidates, IEnumerable <IRunResult> previousRuns) { // REVIEW: The IsMetricMaximizing flag affects the comparator, so that // performing Max() should get the best, regardless of if it is maximizing or // minimizing. RunResult bestRun = (RunResult)previousRuns.Max(); RunResult worstRun = (RunResult)previousRuns.Min(); double bestVal = bestRun.IsMetricMaximizing ? bestRun.MetricValue : worstRun.MetricValue - bestRun.MetricValue; HashSet <Tuple <double, ParameterSet> > configurations = new HashSet <Tuple <double, ParameterSet> >(); // Perform local search. foreach (ParameterSet c in parents) { Tuple <double, ParameterSet> bestChildKvp = LocalSearch(c, forest, bestVal, _args.Epsilon); configurations.Add(bestChildKvp); } // Additional set of random configurations to choose from during local search. ParameterSet[] randomConfigs = _randomSweeper.ProposeSweeps(_args.NumRandomEISearchConfigurations, previousRuns); double[] randomEIs = EvaluateConfigurationsByEI(forest, bestVal, randomConfigs); AutoMlUtils.Assert(randomConfigs.Length == randomEIs.Length); for (int i = 0; i < randomConfigs.Length; i++) { configurations.Add(new Tuple <double, ParameterSet>(randomEIs[i], randomConfigs[i])); } HashSet <ParameterSet> retainedConfigs = new HashSet <ParameterSet>(); IOrderedEnumerable <Tuple <double, ParameterSet> > bestConfigurations = configurations.OrderByDescending(x => x.Item1); foreach (Tuple <double, ParameterSet> t in bestConfigurations.Take(numOfCandidates)) { retainedConfigs.Add(t.Item2); } return(retainedConfigs.ToArray()); }
private Float ParameterAsFloat(ParameterSet parameterSet, int index) { AutoMlUtils.Assert(parameterSet.Count == _sweepParameters.Length); AutoMlUtils.Assert(index >= 0 && index <= _sweepParameters.Length); var sweepParam = _sweepParameters[index]; var pset = parameterSet[sweepParam.Name]; AutoMlUtils.Assert(pset != null); var parameterDiscrete = sweepParam as DiscreteValueGenerator; if (parameterDiscrete != null) { int hotIndex = -1; for (int j = 0; j < parameterDiscrete.Count; j++) { if (parameterDiscrete[j].Equals(pset)) { hotIndex = j; break; } } AutoMlUtils.Assert(hotIndex >= 0); return(hotIndex); } else { var parameterNumeric = sweepParam as INumericValueGenerator; //_host.Check(parameterNumeric != null, "SMAC sweeper can only sweep over discrete and numeric parameters"); // Normalizing all numeric parameters to [0,1] range. return(parameterNumeric.NormalizeValue(pset)); } }
private FastForestRegressionModelParameters FitModel(IEnumerable <IRunResult> previousRuns) { Single[] targets = new Single[previousRuns.Count()]; Single[][] features = new Single[previousRuns.Count()][]; int i = 0; foreach (RunResult r in previousRuns) { features[i] = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, r.ParameterSet, true); targets[i] = (Float)r.MetricValue; i++; } ArrayDataViewBuilder dvBuilder = new ArrayDataViewBuilder(_context); dvBuilder.AddColumn(DefaultColumnNames.Label, NumberType.Float, targets); dvBuilder.AddColumn(DefaultColumnNames.Features, NumberType.Float, features); IDataView data = dvBuilder.GetDataView(); AutoMlUtils.Assert(data.GetRowCount() == targets.Length, "This data view will have as many rows as there have been evaluations"); // Set relevant random forest arguments. // Train random forest. var trainer = new FastForestRegression(_context, DefaultColumnNames.Label, DefaultColumnNames.Features, advancedSettings: s => { s.FeatureFraction = _args.SplitRatio; s.NumTrees = _args.NumOfTrees; s.MinDocumentsInLeafs = _args.NMinForSplit; }); var predictor = trainer.Train(data).Model; // Return random forest predictor. return(predictor); }
private double[][] SampleDiagonalCovMultivariateGaussian(int numRVs, double[] mu, double[][] diagonalCovariance) { // Perform checks to ensure covariance has correct form (square diagonal with dimension d). int d = mu.Length; AutoMlUtils.Assert(d > 0 && diagonalCovariance.Length == d); for (int i = 0; i < d; i++) { AutoMlUtils.Assert(diagonalCovariance[i].Length == d); for (int j = 0; j < d; j++) { AutoMlUtils.Assert((i == j && diagonalCovariance[i][j] >= 0) || diagonalCovariance[i][j] == 0); } } // Create transform matrix double[][] a = new double[d][]; for (int i = 0; i < d; i++) { a[i] = new double[d]; for (int j = 0; j < d; j++) { a[i][j] = i + j == d - 1 ? Math.Sqrt(diagonalCovariance[i][i]) : 0; } } // Sample points double[][] points = new double[numRVs][]; for (int i = 0; i < points.Length; i++) { // Generate vector of independent standard normal RVs. points[i] = VectorTransformAdd(mu, _spu.NormalRVs(mu.Length, 0, 1), a); } return(points); }
/// <summary> /// Computes a single-mutation neighborhood (one param at a time) for a given configuration. For /// numeric parameters, samples K mutations (i.e., creates K neighbors based on that paramater). /// </summary> /// <param name="parent">Starting configuration.</param> /// <returns>A set of configurations that each differ from parent in exactly one parameter.</returns> private ParameterSet[] GetOneMutationNeighborhood(ParameterSet parent) { List <ParameterSet> neighbors = new List <ParameterSet>(); SweeperProbabilityUtils spu = new SweeperProbabilityUtils(); for (int i = 0; i < _sweepParameters.Length; i++) { // This allows us to query possible values of this parameter. IValueGenerator sweepParam = _sweepParameters[i]; // This holds the actual value for this parameter, chosen in this parameter set. IParameterValue pset = parent[sweepParam.Name]; AutoMlUtils.Assert(pset != null); DiscreteValueGenerator parameterDiscrete = sweepParam as DiscreteValueGenerator; if (parameterDiscrete != null) { // Create one neighbor for every discrete parameter. Float[] neighbor = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, parent, false); int hotIndex = -1; for (int j = 0; j < parameterDiscrete.Count; j++) { if (parameterDiscrete[j].Equals(pset)) { hotIndex = j; break; } } AutoMlUtils.Assert(hotIndex >= 0); Random r = new Random(); int randomIndex = r.Next(0, parameterDiscrete.Count - 1); randomIndex += randomIndex >= hotIndex ? 1 : 0; neighbor[i] = randomIndex; neighbors.Add(SweeperProbabilityUtils.FloatArrayAsParameterSet(_sweepParameters, neighbor, false)); } else { INumericValueGenerator parameterNumeric = sweepParam as INumericValueGenerator; AutoMlUtils.Assert(parameterNumeric != null, "SMAC sweeper can only sweep over discrete and numeric parameters"); // Create k neighbors (typically 4) for every numerical parameter. for (int j = 0; j < _args.NumNeighborsForNumericalParams; j++) { Float[] neigh = SweeperProbabilityUtils.ParameterSetAsFloatArray(_sweepParameters, parent, false); double newVal = spu.NormalRVs(1, neigh[i], 0.2)[0]; while (newVal <= 0.0 || newVal >= 1.0) { newVal = spu.NormalRVs(1, neigh[i], 0.2)[0]; } neigh[i] = (Float)newVal; ParameterSet neighbor = SweeperProbabilityUtils.FloatArrayAsParameterSet(_sweepParameters, neigh, false); neighbors.Add(neighbor); } } } return(neighbors.ToArray()); }
private int[] SampleCategoricalDist(int numSamples, double[] weights) { AutoMlUtils.Assert(weights != null && weights.Any()); AutoMlUtils.Assert(weights.Sum() > 0); return(_spu.SampleCategoricalDistribution(numSamples, weights)); }