public override double update(StateTransition <stateType, actionType> transition) { stats.cumulativeReward += transition.reward; double q_s_a = value(transition.oldState, transition.action); if (!Qtable.ContainsKey(transition.newState)) { Qtable.Add(transition.newState, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[transition.newState].Add(act, defaultQ); } } if (!Qtable.ContainsKey(transition.oldState)) { Qtable.Add(transition.oldState, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[transition.oldState].Add(act, defaultQ); } } double maxNewQ = Qtable[transition.newState].Values.Max(); Qtable[transition.oldState][transition.action] = q_s_a + alpha * (transition.reward + gamma * maxNewQ - q_s_a); double newVal = Qtable[transition.oldState][transition.action]; return(Math.Abs(newVal - q_s_a)); }
public override double[] value(stateType state, List <actionType> actions) { double[] response = new double[actions.Count()]; for (int i = 0; i < response.Length; i++) { // retrieve the table of q values for this state Dictionary <actionType, double> stateTable = new Dictionary <actionType, double>(); if (Qtable.ContainsKey(state)) { stateTable = Qtable[state]; } else { response[i] = defaultQ; } // retrieve the q value for this action if (stateTable.ContainsKey(actions.ElementAt(i))) { response[i] = stateTable[actions.ElementAt(i)]; } else { response[i] = defaultQ; } } return(response); }
private void updateQ(stateType state, actionType action) { double P = T.GetStateValueTable(state, action).Values.Sum(); if (P == 0) { return; } double newQ = 0, maxQ = 0; double T_s_a_s2; foreach (stateType s2 in T.GetStateValueTable(state, action).Keys) { if (!Qtable.ContainsKey(s2)) { Qtable.Add(s2, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[s2].Add(act, defaultQ); } } maxQ = Qtable[s2].Values.Max(); double thisT = T.Get(state, action, s2); double thisR = R.Get(state, action, s2).Average(); double thisProb = thisT / P; newQ += thisProb * (thisR + gamma * maxQ); } if (!Qtable.ContainsKey(state)) { Qtable.Add(state, new Dictionary <actionType, double>(actionComparer)); foreach (actionType act in availableActions) { Qtable[state].Add(act, defaultQ); } } Qtable[state][action] = newQ; }
public double value(stateType state, actionType action) { // retrieve the current estimate from the Q table Dictionary <actionType, double> stateTable = new Dictionary <actionType, double>(); if (Qtable.ContainsKey(state)) { stateTable = Qtable[state]; } else { return(defaultQ); } // retrieve the q value for this action if (stateTable.ContainsKey(action)) { return(stateTable[action]); } else { return(defaultQ); } }