public void TestRandomProbability() { var random = new Random(1337); var epsilon = 0.4; var eGreedy = new EGreedy(epsilon, random); var qValue = new QValue(new double[] { 121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015 }); var bestAction = PolicyHelpers.SelectMax(qValue, random); int numBestSelected = 0; int numTests = 3000; for (int i = 0; i < numTests; i++) { int action = eGreedy.Select(qValue); if (action == bestAction) { numBestSelected++; } } Assert.AreEqual((1 - epsilon) + epsilon * (1.0 / qValue.Count), numBestSelected / (double)numTests, 0.05); }
/// <summary> /// Updates the learning algorithm /// </summary> /// <param name="currentState">The current state</param> /// <param name="newState">The new state</param> /// <param name="action">The action that was executed</param> /// <param name="reward">The reward that was received</param> public void Update(TState currentState, TState newState, int action, double reward) { double oldValue = this.qValueTable[currentState][action]; var newQValue = this.qValueTable[newState]; double maxQNew = newQValue[PolicyHelpers.SelectMax(newQValue, this.random)]; double newValue = oldValue + this.alpha * (reward + this.gamma * maxQNew - oldValue); this.qValueTable[currentState, action] = newValue; }
/// <summary> /// Returns the action to execute in the given state /// </summary> /// <param name="state">The state</param> public int SelectAction(TState state) { if (!this.FollowPolicy) { return(this.selectionPolicy.Select(this.qValueTable[state])); } else { return(PolicyHelpers.SelectMax(this.qValueTable[state], this.random)); } }
/// <summary> /// Returns the action to execute in the given state /// </summary> /// <param name="state">The state</param> public int SelectAction(TState state) { if (!this.FollowPolicy) { if (this.possibleState == null || !this.possibleState.Equals(state)) { return(this.selectionPolicy.Select(this.qValueTable[state])); } else { return(this.possibleAction); } } else { return(PolicyHelpers.SelectMax(this.qValueTable[state], this.random)); } }
public void TestRandomProbability() { var random = new Random(1337); var tau = 200; var softmax = new Softmax(tau, random); var qValue = new QValue(new double[] { 121, 231, 425, 676 }); var bestAction = PolicyHelpers.SelectMax(qValue, random); var numSelected = new TestInstance[qValue.Count]; for (int i = 0; i < qValue.Count; i++) { numSelected[i] = new TestInstance() { Action = i }; } int numTests = 3000; for (int i = 0; i < numTests; i++) { int action = softmax.Select(qValue); numSelected[action].Count++; } numSelected = numSelected.OrderBy(x => x.Count).ToArray(); Assert.AreEqual(0, numSelected[0].Action); Assert.AreEqual(1, numSelected[1].Action); Assert.AreEqual(2, numSelected[2].Action); Assert.AreEqual(3, numSelected[3].Action); }