public override double update(StateTransition <stateType, actionType> transition) { stats.cumulativeReward += transition.reward; double q_s_a = value(transition.oldState, transition.action); if (!Qtable.ContainsKey(transition.newState)) { Qtable.Add(transition.newState, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[transition.newState].Add(act, defaultQ); } } if (!Qtable.ContainsKey(transition.oldState)) { Qtable.Add(transition.oldState, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[transition.oldState].Add(act, defaultQ); } } double maxNewQ = Qtable[transition.newState].Values.Max(); Qtable[transition.oldState][transition.action] = q_s_a + alpha * (transition.reward + gamma * maxNewQ - q_s_a); double newVal = Qtable[transition.oldState][transition.action]; return(Math.Abs(newVal - q_s_a)); }
private void updateQ(stateType state, actionType action) { double P = T.GetStateValueTable(state, action).Values.Sum(); if (P == 0) { return; } double newQ = 0, maxQ = 0; double T_s_a_s2; foreach (stateType s2 in T.GetStateValueTable(state, action).Keys) { if (!Qtable.ContainsKey(s2)) { Qtable.Add(s2, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[s2].Add(act, defaultQ); } } maxQ = Qtable[s2].Values.Max(); double thisT = T.Get(state, action, s2); double thisR = R.Get(state, action, s2).Average(); double thisProb = thisT / P; newQ += thisProb * (thisR + gamma * maxQ); } if (!Qtable.ContainsKey(state)) { Qtable.Add(state, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[state].Add(act, defaultQ); } } Qtable[state][action] = newQ; }