예제 #1
0
        public void Test1()
        {
            var dicsOfProbs = new Dictionary <string, double>
            {
                { "A", 0.1 },
                { "B", 0.2 },
                { "C", 0.4 },
                { "D", 0.2 },
                { "E", 0.1 },
            };

            var probabilityRandom = new ProbabilityRandom <string>();

            foreach (var item in dicsOfProbs)
            {
                probabilityRandom.SetProb(item.Key, item.Value);
            }

            probabilityRandom.SetProb("D", 0.1);
            probabilityRandom.SetProb("F", 0.1);

            var rand        = new Random();
            var actualProbs = new Dictionary <string, double>
            {
                { "A", 0 },
                { "B", 0 },
                { "C", 0 },
                { "D", 0 },
                { "E", 0 },
                { "F", 0 },
            };

            int maxRand = 50000;

            for (int i = 0; i < maxRand; i++)
            {
                var num = probabilityRandom.Next();

                actualProbs[num] = actualProbs[num] + 1;
            }

            double sum = 0;

            foreach (var item in actualProbs)
            {
                console.WriteLine($"P({item.Key}) = {item.Value / maxRand} ({probabilityRandom.DicOfProbs[item.Key]})");

                sum += item.Value / maxRand;
            }

            console.WriteLine($"P = {sum}");
        }
예제 #2
0
        public (int, bool) Act()
        {
            if (Ran.NextDouble() < Epsilon)
            {
                return(Indices[Ran.Next(KArm)], false);
            }

            if (UcbParam.HasValue)
            {
                UcbEstimation = QEstimation.Select((data, index) =>
                {
                    return(data + UcbParam.Value * Math.Sqrt(Math.Log(Time + 1) / (ActionCount[index] + (10 ^ -5))));
                }).ToList();

                QBest = UcbEstimation.Max();
                var ucbEstMaxList = UcbEstimation.Where(v => v == QBest).Select((data, index) => new {
                    Index = index,
                    Data  = data
                }).ToList();
                return(ucbEstMaxList[Ran.Next(ucbEstMaxList.Count())].Index, true);
            }

            if (Gradient)
            {
                ExpEst     = QEstimation.Select(qEst => Math.Exp(qEst)).ToList();
                ActionProb = ExpEst.Select(expEst => expEst / ExpEst.Sum()).ToList();
                ProbabilityRandom.Reset();
                for (int i = 0; i < ActionProb.Count; i++)
                {
                    ProbabilityRandom.SetProb(i, ActionProb[i]);
                }
                //var acts = ActionProb.TakeWhile(p => Ran.NextDouble() < p).Select((d, i) => i).ToList();
                //if (acts.Count == 0)
                //    return (ActionProb.Select((d, i) => i).ToList()[Ran.Next(ActionProb.Count)], false);
                //return (acts[Ran.Next(acts.Count)], false);
                return(ProbabilityRandom.Next(), false);
            }

            return(QEstimation.IndexOf(QEstimation.Max()), true);
        }