private void QValueIteration() { StateActionValueTable qValueTable = new StateActionValueTable(m_initMode, m_initValue, 5, // actions this.EnvRows, this.EnvCols, // my pos this.EnvRows, this.EnvCols, // opp pos 2); // ball ownership status m_valueTable = qValueTable; long stateCounts = qValueTable.NumStates; int actionCounts = qValueTable.NumActions; double delta = 0.0; do { delta = 0.0; // foreach state for (int s = 0; s < stateCounts; s++) { // foreach action for (int a = 0; a < actionCounts; a++) { double q = qValueTable.GetValueLinear(a, s); double newQ = EstimateNewQValue(s, a); qValueTable.SetValueLinear(newQ, a, s); delta = Math.Max(delta, Math.Abs(q - newQ)); } } } while (delta > Theta); }
private double GetMaxQForState(long linStateInd, out int maxA) { StateActionValueTable qTable = m_valueTable as StateActionValueTable; if (qTable == null) { throw new Exception("value table is not a Q-Table"); } maxA = -1; double maxQ = Double.MinValue; for (int a = 0; a < m_numActions; a++) { double curQ = qTable.GetValueLinear(a, (int)linStateInd); if (curQ > maxQ) { maxQ = curQ; maxA = a; } } return(maxQ); }