/// <summary> /// Update the existing policy based on the available information /// </summary> /// <param name="PastState">The state that the agent transistion from</param> /// <param name="CurrentState">The state that the agent has transitioned to</param> /// <param name="Action">The action taken by the agent to cause transition</param> /// <param name="Reward">The reward recieved from its transition</param> public void UpdatePolicy(List <int> PastState, List <int> CurrentState, int Action, double Reward) { /*prediction error*/ double error = TD(PastState, CurrentState, Action, Reward); /*current action state feature set*/ /*parameter increment values*/ List <double> parameters = Qfunc.Gradient(PastState.ConvertAll(v => Convert.ToDouble(v)), Action).ConvertAll(v => v * error * Epsilon); /*update parameters in approximator*/ Qfunc.update(parameters, Action); }
/// <summary> /// Calculates the predicted Q value for the state action pair /// </summary> /// <param name="State">The current state assumed to exibit the markov property</typeparam> /// <param name="Action">The select action</typeparam> /// <returns>predicted Q value</returns> public double GetQValue(List <int> State, int Action) { /*actions always come first as it assumed the action space is constant */ return(Qfunc.Value(State.ConvertAll(v => Convert.ToDouble(v)), Action)); }