public void Test1() { var dicsOfProbs = new Dictionary <string, double> { { "A", 0.1 }, { "B", 0.2 }, { "C", 0.4 }, { "D", 0.2 }, { "E", 0.1 }, }; var probabilityRandom = new ProbabilityRandom <string>(); foreach (var item in dicsOfProbs) { probabilityRandom.SetProb(item.Key, item.Value); } probabilityRandom.SetProb("D", 0.1); probabilityRandom.SetProb("F", 0.1); var rand = new Random(); var actualProbs = new Dictionary <string, double> { { "A", 0 }, { "B", 0 }, { "C", 0 }, { "D", 0 }, { "E", 0 }, { "F", 0 }, }; int maxRand = 50000; for (int i = 0; i < maxRand; i++) { var num = probabilityRandom.Next(); actualProbs[num] = actualProbs[num] + 1; } double sum = 0; foreach (var item in actualProbs) { console.WriteLine($"P({item.Key}) = {item.Value / maxRand} ({probabilityRandom.DicOfProbs[item.Key]})"); sum += item.Value / maxRand; } console.WriteLine($"P = {sum}"); }
public (int, bool) Act() { if (Ran.NextDouble() < Epsilon) { return(Indices[Ran.Next(KArm)], false); } if (UcbParam.HasValue) { UcbEstimation = QEstimation.Select((data, index) => { return(data + UcbParam.Value * Math.Sqrt(Math.Log(Time + 1) / (ActionCount[index] + (10 ^ -5)))); }).ToList(); QBest = UcbEstimation.Max(); var ucbEstMaxList = UcbEstimation.Where(v => v == QBest).Select((data, index) => new { Index = index, Data = data }).ToList(); return(ucbEstMaxList[Ran.Next(ucbEstMaxList.Count())].Index, true); } if (Gradient) { ExpEst = QEstimation.Select(qEst => Math.Exp(qEst)).ToList(); ActionProb = ExpEst.Select(expEst => expEst / ExpEst.Sum()).ToList(); ProbabilityRandom.Reset(); for (int i = 0; i < ActionProb.Count; i++) { ProbabilityRandom.SetProb(i, ActionProb[i]); } //var acts = ActionProb.TakeWhile(p => Ran.NextDouble() < p).Select((d, i) => i).ToList(); //if (acts.Count == 0) // return (ActionProb.Select((d, i) => i).ToList()[Ran.Next(ActionProb.Count)], false); //return (acts[Ran.Next(acts.Count)], false); return(ProbabilityRandom.Next(), false); } return(QEstimation.IndexOf(QEstimation.Max()), true); }