protected override void AddNewStateToVS(IState state) { if (!States.Contains(state)) { States.Add(state); } V_s.Add(state, 0f); }
public override int Think(IState state) { if (state.IsFinal) { return(0); } IState newState; while (true) { float delta = 0; foreach (var s in States.ToList()) { if (s.IsFinal) { continue; // ne modifie pas V_s si état final } if (!V_s.ContainsKey(s)) { AddNewStateToVS(s); } float tmp = V_s[s]; float max = -INFINITY; foreach (var action in s.Actions) { float current = GetRewardForAction(s, action, out newState); if (current > max) { max = current; } } V_s[s] = max; delta = Math.Max(delta, Math.Abs(tmp - V_s[s])); } if (delta < EPSILON) { break; } } return(ArgMaxAction(state)); }
protected override void AddNewStateToVS(IState state) { if (!States.Contains(state)) { States.Add(state); } // Intialise V_s avec un nombre aléatoire entre 0 et 1 float value = RandomFloat(0, 1); V_s.Add(state, value); if (state.IsFinal) { V_s[state] = 0; // Sauf si c'est un état final } }
// Met à jour V_s pour la stratégie courante. private void PolicyEvaluation() { IState newState; while (true) { float delta = 0; foreach (var state in States.ToList()) { if (state.IsFinal) { continue; // ne modifie pas V_s si état final } if (!V_s.ContainsKey(state)) { AddNewStateToVS(state); } if (!policy.ContainsKey(state)) { if (state.HasActions) { AddNewStateToPolicy(state); } else { continue; // skip if it's an no action state } } float tmp = V_s[state]; V_s[state] = GetRewardForAction(state, (int)policy[state], out newState); delta = Math.Max(delta, Math.Abs(tmp - V_s[state])); } if (delta < EPSILON) { break; } } }