Ejemplo n.º 1
0
        public void TestRandomProbability()
        {
            var random  = new Random(1337);
            var tau     = 200;
            var softmax = new Softmax(tau, random);
            var qValue  = new QValue(new double[]
            {
                121, 231, 425, 676
            });
            var bestAction = PolicyHelpers.SelectMax(qValue, random);

            var numSelected = new TestInstance[qValue.Count];

            for (int i = 0; i < qValue.Count; i++)
            {
                numSelected[i] = new TestInstance()
                {
                    Action = i
                };
            }

            int numTests = 3000;

            for (int i = 0; i < numTests; i++)
            {
                int action = softmax.Select(qValue);
                numSelected[action].Count++;
            }

            numSelected = numSelected.OrderBy(x => x.Count).ToArray();

            Assert.AreEqual(0, numSelected[0].Action);
            Assert.AreEqual(1, numSelected[1].Action);
            Assert.AreEqual(2, numSelected[2].Action);
            Assert.AreEqual(3, numSelected[3].Action);
        }