/// <summary> /// REVIEW: This was the original CategoriesToWeights function. Should be deprecated once we can validate the new function works /// better. It contains a subtle issue, such that categories with poor performance but which are seen a lot will have /// high weight. New function addresses this issue, while also improving exploration capability of algorithm. /// </summary> /// <param name="param"></param> /// <param name="previousRuns"></param> /// <returns></returns> private double[] CategoriesToWeightsOld(DiscreteValueGenerator param, IEnumerable <IRunResult> previousRuns) { double[] weights = new double[param.Count]; Dictionary <string, int> labelToIndex = new Dictionary <string, int>(); // Map categorical values to their index. for (int j = 0; j < param.Count; j++) { labelToIndex[param[j].ValueText] = j; } // Add pseudo-observations, to account for unobserved parameter settings. for (int i = 0; i < weights.Length; i++) { weights[i] = 0.1; } // Sum up the results for each category value. bool isMaximizing = true; foreach (RunResult r in previousRuns) { weights[labelToIndex[r.ParameterSet[param.Name].ValueText]] += r.MetricValue; isMaximizing = r.IsMetricMaximizing; } // Normalize weights to sum to one and return return(isMaximizing ? SweeperProbabilityUtils.Normalize(weights) : SweeperProbabilityUtils.InverseNormalize(weights)); }
internal Parameter(T[] candidates) { var option = new DiscreteValueGenerator <T> .Option <T>() { Values = candidates, }; this.ValueGenerator = new DiscreteValueGenerator <T>(option); }
public void TestDiscreteValueSweep(double normalizedValue, string expected) { var paramSweep = new DiscreteValueGenerator(new DiscreteParamArguments() { Name = "bla", Values = new[] { "foo", "bar", "baz" } }); var value = paramSweep.CreateFromNormalized(normalizedValue); Assert.Equal("bla", value.Name); Assert.Equal(expected, value.ValueText); }
public void DiscreteValueGenerator_should_return_one_hot_encode() { var objects = new object[] { "a", 2, "c", 4 }; var option = new DiscreteValueGenerator <object> .Option <object>() { Name = "discrete", Values = objects, }; var generator = new DiscreteValueGenerator <object>(option); generator.OneHotEncodeValue(new ObjectParameterValue <object>("val", objects[0])).Should().BeEquivalentTo(new int[] { 1, 0, 0, 0 }); }
public void DiscreteValueGenerator_should_generate_value_from_normalize(object a, object b, object c, object d) { var objects = new object[] { a, b, c, d }; var option = new DiscreteValueGenerator <object> .Option <object>() { Name = "discrete", Values = objects, }; var generator = new DiscreteValueGenerator <object>(option); objects.Should().Contain(generator.CreateFromNormalized(0.5).RawValue); generator.Count.Should().Be(4); }
public void RandomGridSweeperReturnsDistinctValuesWhenProposeSweep() { DiscreteValueGenerator valueGenerator = CreateDiscreteValueGenerator(); var env = new MLContext(42); var sweeper = new RandomGridSweeper(env, new RandomGridSweeper.Options(), new[] { valueGenerator }); var results = sweeper.ProposeSweeps(3); Assert.NotNull(results); int length = results.Length; Assert.Equal(2, length); }
public void UniformRandomSweeperReturnsDistinctValuesWhenProposeSweep() { DiscreteValueGenerator valueGenerator = CreateDiscreteValueGenerator(); var env = new MLContext(42); var sweeper = new UniformRandomSweeper(env, new SweeperBase.ArgumentsBase(), new[] { valueGenerator }); var results = sweeper.ProposeSweeps(3); Assert.NotNull(results); int length = results.Length; Assert.Equal(2, length); }
public void RandomGridSweeperReturnsDistinctValuesWhenProposeSweep() { DiscreteValueGenerator valueGenerator = CreateDiscreteValueGenerator(); using (var writer = new StreamWriter(new MemoryStream())) using (var env = new ConsoleEnvironment(42, outWriter: writer, errWriter: writer)) { var sweeper = new RandomGridSweeper(env, new RandomGridSweeper.Arguments(), new[] { valueGenerator }); var results = sweeper.ProposeSweeps(3); Assert.NotNull(results); int length = results.Length; Assert.Equal(2, length); } }
/// <summary> /// New version of CategoryToWeights method, which fixes an issue where we could /// potentially assign a lot of mass to bad categories. /// </summary> private double[] CategoriesToWeights(DiscreteValueGenerator param, IRunResult[] previousRuns) { double[] weights = new double[param.Count]; Dictionary <string, int> labelToIndex = new Dictionary <string, int>(); int[] counts = new int[param.Count]; // Map categorical values to their index. for (int j = 0; j < param.Count; j++) { labelToIndex[param[j].ValueText] = j; } // Add mass according to performance bool isMaximizing = true; foreach (RunResult r in previousRuns) { weights[labelToIndex[r.ParameterSet[param.Name].ValueText]] += r.MetricValue; counts[labelToIndex[r.ParameterSet[param.Name].ValueText]]++; isMaximizing = r.IsMetricMaximizing; } // Take average mass for each category for (int i = 0; i < weights.Length; i++) { weights[i] /= (counts[i] > 0 ? counts[i] : 1); } // If any learner has not been seen, default its average to // best value to encourage exploration of untried algorithms. double bestVal = isMaximizing ? previousRuns.Cast <RunResult>().Where(r => r.HasMetricValue).Max(r => r.MetricValue) : previousRuns.Cast <RunResult>().Where(r => r.HasMetricValue).Min(r => r.MetricValue); for (int i = 0; i < weights.Length; i++) { weights[i] += counts[i] == 0 ? bestVal : 0; } // Normalize weights to sum to one and return return(isMaximizing ? SweeperProbabilityUtils.Normalize(weights) : SweeperProbabilityUtils.InverseNormalize(weights)); }
public void SmacQuickRunTest() { var numInitialPopulation = 10; var floatValueGenerator = new FloatValueGenerator(new FloatParamArguments() { Name = "float", Min = 1, Max = 1000 }); var floatLogValueGenerator = new FloatValueGenerator(new FloatParamArguments() { Name = "floatLog", Min = 1, Max = 1000, LogBase = true }); var longValueGenerator = new LongValueGenerator(new LongParamArguments() { Name = "long", Min = 1, Max = 1000 }); var longLogValueGenerator = new LongValueGenerator(new LongParamArguments() { Name = "longLog", Min = 1, Max = 1000, LogBase = true }); var discreteValueGeneator = new DiscreteValueGenerator(new DiscreteParamArguments() { Name = "discrete", Values = new[] { "200", "400", "600", "800" } }); var sweeper = new SmacSweeper(new MLContext(), new SmacSweeper.Arguments() { SweptParameters = new IValueGenerator[] { floatValueGenerator, floatLogValueGenerator, longValueGenerator, longLogValueGenerator, discreteValueGeneator }, NumberInitialPopulation = numInitialPopulation }); // sanity check grid Assert.NotNull(floatValueGenerator[0].ValueText); Assert.NotNull(floatLogValueGenerator[0].ValueText); Assert.NotNull(longValueGenerator[0].ValueText); Assert.NotNull(longLogValueGenerator[0].ValueText); Assert.NotNull(discreteValueGeneator[0].ValueText); List <RunResult> results = new List <RunResult>(); RunResult bestResult = null; for (var i = 0; i < numInitialPopulation + 1; i++) { ParameterSet[] pars = sweeper.ProposeSweeps(1, results); foreach (ParameterSet p in pars) { float x1 = float.Parse(p["float"].ValueText); float x2 = float.Parse(p["floatLog"].ValueText); long x3 = long.Parse(p["long"].ValueText); long x4 = long.Parse(p["longLog"].ValueText); int x5 = int.Parse(p["discrete"].ValueText); double metric = x1 + x2 + x3 + x4 + x5; RunResult result = new RunResult(p, metric, true); if (bestResult == null || bestResult.MetricValue < metric) { bestResult = result; } results.Add(result); Console.WriteLine($"{metric}\t{x1},{x2}"); } } Console.WriteLine($"Best: {bestResult.MetricValue}"); Assert.NotNull(bestResult); Assert.True(bestResult.MetricValue > 0); }