public override int Think(IState state) { if (state.IsFinal) { return(0); } IState newState; while (true) { float delta = 0; foreach (var s in States.ToList()) { if (s.IsFinal) { continue; // ne modifie pas V_s si état final } if (!V_s.ContainsKey(s)) { AddNewStateToVS(s); } float tmp = V_s[s]; float max = -INFINITY; foreach (var action in s.Actions) { float current = GetRewardForAction(s, action, out newState); if (current > max) { max = current; } } V_s[s] = max; delta = Math.Max(delta, Math.Abs(tmp - V_s[s])); } if (delta < EPSILON) { break; } } return(ArgMaxAction(state)); }
// Met à jour V_s pour la stratégie courante. private void PolicyEvaluation() { IState newState; while (true) { float delta = 0; foreach (var state in States.ToList()) { if (state.IsFinal) { continue; // ne modifie pas V_s si état final } if (!V_s.ContainsKey(state)) { AddNewStateToVS(state); } if (!policy.ContainsKey(state)) { if (state.HasActions) { AddNewStateToPolicy(state); } else { continue; // skip if it's an no action state } } float tmp = V_s[state]; V_s[state] = GetRewardForAction(state, (int)policy[state], out newState); delta = Math.Max(delta, Math.Abs(tmp - V_s[state])); } if (delta < EPSILON) { break; } } }