Пример #1
0
 protected override void AddNewStateToVS(IState state)
 {
     if (!States.Contains(state))
     {
         States.Add(state);
     }
     V_s.Add(state, 0f);
 }
Пример #2
0
    public override int Think(IState state)
    {
        if (state.IsFinal)
        {
            return(0);
        }

        IState newState;

        while (true)
        {
            float delta = 0;

            foreach (var s in States.ToList())
            {
                if (s.IsFinal)
                {
                    continue;            // ne modifie pas V_s si état final
                }
                if (!V_s.ContainsKey(s))
                {
                    AddNewStateToVS(s);
                }

                float tmp = V_s[s];

                float max = -INFINITY;
                foreach (var action in s.Actions)
                {
                    float current = GetRewardForAction(s, action, out newState);
                    if (current > max)
                    {
                        max = current;
                    }
                }

                V_s[s] = max;

                delta = Math.Max(delta, Math.Abs(tmp - V_s[s]));
            }

            if (delta < EPSILON)
            {
                break;
            }
        }

        return(ArgMaxAction(state));
    }
Пример #3
0
    protected override void AddNewStateToVS(IState state)
    {
        if (!States.Contains(state))
        {
            States.Add(state);
        }

        // Intialise V_s avec un nombre aléatoire entre 0 et 1
        float value = RandomFloat(0, 1);

        V_s.Add(state, value);
        if (state.IsFinal)
        {
            V_s[state] = 0;                // Sauf si c'est un état final
        }
    }
Пример #4
0
    // Met à jour V_s pour la stratégie courante.
    private void PolicyEvaluation()
    {
        IState newState;

        while (true)
        {
            float delta = 0;

            foreach (var state in States.ToList())
            {
                if (state.IsFinal)
                {
                    continue;                // ne modifie pas V_s si état final
                }
                if (!V_s.ContainsKey(state))
                {
                    AddNewStateToVS(state);
                }

                if (!policy.ContainsKey(state))
                {
                    if (state.HasActions)
                    {
                        AddNewStateToPolicy(state);
                    }
                    else
                    {
                        continue;  // skip if it's an no action state
                    }
                }

                float tmp = V_s[state];

                V_s[state] = GetRewardForAction(state, (int)policy[state], out newState);

                delta = Math.Max(delta, Math.Abs(tmp - V_s[state]));
            }

            if (delta < EPSILON)
            {
                break;
            }
        }
    }