Esempio n. 1
0
        public void TestRandomProbability()
        {
            var random  = new Random(1337);
            var epsilon = 0.4;
            var eGreedy = new EGreedy(epsilon, random);
            var qValue  = new QValue(new double[]
            {
                121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015
            });
            var bestAction = PolicyHelpers.SelectMax(qValue, random);

            int numBestSelected = 0;
            int numTests        = 3000;

            for (int i = 0; i < numTests; i++)
            {
                int action = eGreedy.Select(qValue);

                if (action == bestAction)
                {
                    numBestSelected++;
                }
            }

            Assert.AreEqual((1 - epsilon) + epsilon * (1.0 / qValue.Count), numBestSelected / (double)numTests, 0.05);
        }
Esempio n. 2
0
        /// <summary>
        /// Updates the learning algorithm
        /// </summary>
        /// <param name="currentState">The current state</param>
        /// <param name="newState">The new state</param>
        /// <param name="action">The action that was executed</param>
        /// <param name="reward">The reward that was received</param>
        public void Update(TState currentState, TState newState, int action, double reward)
        {
            double oldValue = this.qValueTable[currentState][action];

            var    newQValue = this.qValueTable[newState];
            double maxQNew   = newQValue[PolicyHelpers.SelectMax(newQValue, this.random)];
            double newValue  = oldValue + this.alpha * (reward + this.gamma * maxQNew - oldValue);

            this.qValueTable[currentState, action] = newValue;
        }
Esempio n. 3
0
 /// <summary>
 /// Returns the action to execute in the given state
 /// </summary>
 /// <param name="state">The state</param>
 public int SelectAction(TState state)
 {
     if (!this.FollowPolicy)
     {
         return(this.selectionPolicy.Select(this.qValueTable[state]));
     }
     else
     {
         return(PolicyHelpers.SelectMax(this.qValueTable[state], this.random));
     }
 }
Esempio n. 4
0
 /// <summary>
 /// Returns the action to execute in the given state
 /// </summary>
 /// <param name="state">The state</param>
 public int SelectAction(TState state)
 {
     if (!this.FollowPolicy)
     {
         if (this.possibleState == null || !this.possibleState.Equals(state))
         {
             return(this.selectionPolicy.Select(this.qValueTable[state]));
         }
         else
         {
             return(this.possibleAction);
         }
     }
     else
     {
         return(PolicyHelpers.SelectMax(this.qValueTable[state], this.random));
     }
 }
Esempio n. 5
0
        public void TestRandomProbability()
        {
            var random  = new Random(1337);
            var tau     = 200;
            var softmax = new Softmax(tau, random);
            var qValue  = new QValue(new double[]
            {
                121, 231, 425, 676
            });
            var bestAction = PolicyHelpers.SelectMax(qValue, random);

            var numSelected = new TestInstance[qValue.Count];

            for (int i = 0; i < qValue.Count; i++)
            {
                numSelected[i] = new TestInstance()
                {
                    Action = i
                };
            }

            int numTests = 3000;

            for (int i = 0; i < numTests; i++)
            {
                int action = softmax.Select(qValue);
                numSelected[action].Count++;
            }

            numSelected = numSelected.OrderBy(x => x.Count).ToArray();

            Assert.AreEqual(0, numSelected[0].Action);
            Assert.AreEqual(1, numSelected[1].Action);
            Assert.AreEqual(2, numSelected[2].Action);
            Assert.AreEqual(3, numSelected[3].Action);
        }