public Bandit( int kArm = 4, double epsilon = 0.0, double initial = 0.0, double stepSize = 0.1, bool sampleAverages = false, int?ucbParam = null, bool gradient = false, bool gradientBaseline = false, double trueReward = 0.0) { KArm = kArm; Epsilon = epsilon; Initial = initial; StepSize = stepSize; SampleAverages = sampleAverages; UcbParam = ucbParam; Gradient = gradient; GradientBaseline = gradientBaseline; TrueReward = trueReward; Time = 0; Indices = Enumerable.Range(0, KArm).ToList(); AverageReward = 0.0; Ran = new Random(DateTime.Now.Second); ProbabilityRandom = new ProbabilityRandom <int>(); }
public void Test1() { var dicsOfProbs = new Dictionary <string, double> { { "A", 0.1 }, { "B", 0.2 }, { "C", 0.4 }, { "D", 0.2 }, { "E", 0.1 }, }; var probabilityRandom = new ProbabilityRandom <string>(); foreach (var item in dicsOfProbs) { probabilityRandom.SetProb(item.Key, item.Value); } probabilityRandom.SetProb("D", 0.1); probabilityRandom.SetProb("F", 0.1); var rand = new Random(); var actualProbs = new Dictionary <string, double> { { "A", 0 }, { "B", 0 }, { "C", 0 }, { "D", 0 }, { "E", 0 }, { "F", 0 }, }; int maxRand = 50000; for (int i = 0; i < maxRand; i++) { var num = probabilityRandom.Next(); actualProbs[num] = actualProbs[num] + 1; } double sum = 0; foreach (var item in actualProbs) { console.WriteLine($"P({item.Key}) = {item.Value / maxRand} ({probabilityRandom.DicOfProbs[item.Key]})"); sum += item.Value / maxRand; } console.WriteLine($"P = {sum}"); }