Example #1
0
        /// <summary>
        /// Update the existing policy based on the available information
        /// </summary>
        /// <param name="PastState">The state that the agent transistion from</param>
        /// <param name="CurrentState">The state that the agent has transitioned to</param>
        /// <param name="Action">The action taken by the agent to cause transition</param>
        /// <param name="Reward">The reward recieved from its transition</param>
        public void UpdatePolicy(List <int> PastState, List <int> CurrentState, int Action, double Reward)
        {
            /*prediction error*/
            double error = TD(PastState, CurrentState, Action, Reward);
            /*current action state feature set*/

            /*parameter increment values*/
            List <double> parameters = Qfunc.Gradient(PastState.ConvertAll(v => Convert.ToDouble(v)), Action).ConvertAll(v => v * error * Epsilon);

            /*update parameters in approximator*/
            Qfunc.update(parameters, Action);
        }
Example #2
0
 /// <summary>
 /// Calculates the predicted Q value for the state action pair
 /// </summary>
 ///   <param name="State">The current state assumed to exibit the markov property</typeparam>
 /// <param name="Action">The select action</typeparam>
 /// <returns>predicted Q value</returns>
 public double GetQValue(List <int> State, int Action)
 {
     /*actions always come first as it assumed the action space is constant */
     return(Qfunc.Value(State.ConvertAll(v => Convert.ToDouble(v)), Action));
 }