Beispiel #1
0
    /// <summary>
    /// Test how good a policy is. Do this by, for each state in the state space, calculating the value of being in that space when following policy π
    /// In each state, for each action:
    /// - calculate how much reward you would get when following that action
    /// - then get the probability that state transition actually occurs and weigh it by the probability the agent actually takes the action (given that it still can taken each action given its probability)
    /// - use that probability to calculate the expected return when taking the action in this state under policy π
    /// - add the return to the total value of state S (which is the maximum reward one could get in this state following policy π from this state onward)
    /// </summary>
    /// <typeparam name="S"></typeparam>
    /// <typeparam name="A"></typeparam>
    /// <param name="mdp"></param>
    /// <param name="stateValues"></param>
    /// <param name="θ"></param>
    /// <returns></returns>
    public static Dictionary <S, double> Evaluate <S, A>(this Policy <S, A> π, Environment <S, A> mdp, Dictionary <S, double> stateValues, double θ) where S : notnull where A : notnull
    {
        double Δ = 0.0;

        do
        {
            Δ = 0.0;
            foreach (S state in mdp.States)
            {
                double oldValue = stateValues[state];
                double value    = 0.0;

                foreach (A action in mdp.Actions)
                {
                    (Transition <S> transition, Probability.Probability transitionProbability) = mdp.Dynamics(state, action);
                    Probability.Probability actionProbability = π(state).EventProbabilities.FirstOrDefault(eventProbability => [email protected](action)).probability;
                    value += actionProbability * transitionProbability * (mdp.Reward(transition.Origin) + mdp.γ * stateValues[transition.Destination]);
                }

                stateValues[state] = value;
                Δ = Max(Δ, Abs(oldValue - value));
            }
        } while (Δ >= θ);

        return(stateValues);
    }
Beispiel #2
0
 /// <summary>
 /// Bernoulli distribution, assigning probabilities to two given values according to a given bias.
 /// </summary>
 /// <typeparam name="T">The element type</typeparam>
 /// <param name="value1">The first value.</param>
 /// <param name="value2">The second value.</param>
 /// <param name="bias">The bias.</param>
 /// <returns>A biased distribution.</returns>
 public static Dist <T> OneOf <T>(T value1, T value2, Probability bias)
 {
     return(new Dist <T>(new[] { new ProbValue <T>(value1, bias), new ProbValue <T>(value2, new Probability(1M - bias)) }));
 }
Beispiel #3
0
 /// <summary>
 /// Bernoulli distribution, which takes value 1 with probability p and value 0 with probability 1 − p.
 /// </summary>
 /// <param name="p">The probability.</param>
 /// <returns>Bernoulli distribution with the specified probability.</returns>
 public static Dist <int> Bernoulli(Probability p)
 {
     return(Distribution.OneOf(1, 0, p));
 }