private void PolicyIteration() { StateValueTable valueTable = new StateValueTable(m_initMode, m_initValue, this.EnvRows, this.EnvCols, // my pos this.EnvRows, this.EnvCols, // opp pos 2); // ball ownership status m_valueTable = valueTable; Policy policy = new Policy(m_initMode, m_initMode == ValueTableInitModes.Constant ? 0 : m_numActions, this.EnvRows, this.EnvCols, // my pos this.EnvRows, this.EnvCols, // opp pos 2); // ball ownership status m_piPolicy = policy; long stateCounts = m_valueTable.NumStates; while (true) { // Policy Evaluation double delta = 0.0; do { delta = 0.0; // foreach state for (int s = 0; s < stateCounts; s++) { double v = valueTable.GetValueLinear(s); double newV = EstimateNewValueUsingPolicy(s, policy); valueTable.SetValueLinear(newV, s); delta = Math.Max(delta, Math.Abs(v - newV)); } } while (delta > Theta); // Policy Improvement bool policyStable = true; for (int s = 0; s < stateCounts; s++) { int b = policy.GetValueLinear(s); int bestAct; EstimateNewValue((int)s, out bestAct); policy.SetValueLinear(bestAct, s); if (b != bestAct) { policyStable = false; } } if (policyStable) { break; } } }
private double EstimateNewValueUsingPolicy(int s, Policy policy) { StateValueTable valueTable = m_valueTable as StateValueTable; if (valueTable == null) { throw new Exception("A ValueTable needed!"); } List <int> nextStates; List <double> nextStateProbs; List <double> nextStateRew; int a = policy.GetValueLinear(s); GetPossibleNextStates(s, a, out nextStates, out nextStateProbs, out nextStateRew); double nextV = 0.0; for (int i = 0; i < nextStates.Count; i++) { nextV += nextStateProbs[i] * (nextStateRew[i] + (Gamma * valueTable.GetValueLinear(nextStates[i]))); } return(nextV); }
private void ValueIteration() { StateValueTable valueTable = new StateValueTable(m_initMode, m_initValue, this.EnvRows, this.EnvCols, // my pos this.EnvRows, this.EnvCols, // opp pos 2); // ball ownership status m_valueTable = valueTable; long stateCounts = m_valueTable.NumStates; double delta = 0.0; do { delta = 0.0; // foreach state for (int s = 0; s < stateCounts; s++) { double v = valueTable.GetValueLinear(s); double newV = EstimateNewValue(s); valueTable.SetValueLinear(newV, s); delta = Math.Max(delta, Math.Abs(v - newV)); } } while (delta > Theta); }
private double EstimateNewValue(int s, out int maxA) { StateValueTable valueTable = m_valueTable as StateValueTable; if (valueTable == null) { throw new Exception("A ValueTable needed!"); } List <int> nextStates; List <double> nextStateProbs; List <double> nextStateRew; double maxV = Double.MinValue; maxA = -1; for (int a = 0; a < m_numActions; a++) { GetPossibleNextStates(s, a, out nextStates, out nextStateProbs, out nextStateRew); double nextV = 0.0; for (int i = 0; i < nextStates.Count; i++) { nextV += nextStateProbs[i] * (nextStateRew[i] + (Gamma * valueTable.GetValueLinear(nextStates[i]))); } if (nextV > maxV) { maxV = nextV; maxA = a; } } return(maxV); }