예제 #1
0
        public override double update(StateTransition <stateType, actionType> transition)
        {
            stats.cumulativeReward += transition.reward;

            double q_s_a = value(transition.oldState, transition.action);

            if (!Qtable.ContainsKey(transition.newState))
            {
                Qtable.Add(transition.newState, new Dictionary <actionType, double>(actionComparer));
                foreach (actionType act in availableActions)
                {
                    Qtable[transition.newState].Add(act, defaultQ);
                }
            }
            if (!Qtable.ContainsKey(transition.oldState))
            {
                Qtable.Add(transition.oldState, new Dictionary <actionType, double>(actionComparer));
                foreach (actionType act in availableActions)
                {
                    Qtable[transition.oldState].Add(act, defaultQ);
                }
            }
            double maxNewQ = Qtable[transition.newState].Values.Max();

            Qtable[transition.oldState][transition.action] = q_s_a + alpha * (transition.reward + gamma * maxNewQ - q_s_a);
            double newVal = Qtable[transition.oldState][transition.action];

            return(Math.Abs(newVal - q_s_a));
        }
예제 #2
0
        public override double[] value(stateType state, List <actionType> actions)
        {
            double[] response = new double[actions.Count()];
            for (int i = 0; i < response.Length; i++)
            {
                // retrieve the table of q values for this state
                Dictionary <actionType, double> stateTable = new Dictionary <actionType, double>();

                if (Qtable.ContainsKey(state))
                {
                    stateTable = Qtable[state];
                }
                else
                {
                    response[i] = defaultQ;
                }

                // retrieve the q value for this action
                if (stateTable.ContainsKey(actions.ElementAt(i)))
                {
                    response[i] = stateTable[actions.ElementAt(i)];
                }
                else
                {
                    response[i] = defaultQ;
                }
            }
            return(response);
        }
예제 #3
0
        private void updateQ(stateType state, actionType action)
        {
            double P = T.GetStateValueTable(state, action).Values.Sum();

            if (P == 0)
            {
                return;
            }

            double newQ = 0, maxQ = 0;
            double T_s_a_s2;

            foreach (stateType s2 in T.GetStateValueTable(state, action).Keys)
            {
                if (!Qtable.ContainsKey(s2))
                {
                    Qtable.Add(s2, new Dictionary <actionType, double>(actionComparer));
                    foreach (actionType act in availableActions)
                    {
                        Qtable[s2].Add(act, defaultQ);
                    }
                }
                maxQ = Qtable[s2].Values.Max();


                double thisT    = T.Get(state, action, s2);
                double thisR    = R.Get(state, action, s2).Average();
                double thisProb = thisT / P;
                newQ += thisProb * (thisR + gamma * maxQ);
            }

            if (!Qtable.ContainsKey(state))
            {
                Qtable.Add(state, new Dictionary <actionType, double>(actionComparer));
                foreach (actionType act in availableActions)
                {
                    Qtable[state].Add(act, defaultQ);
                }
            }

            Qtable[state][action] = newQ;
        }
예제 #4
0
        public double value(stateType state, actionType action)
        {
            // retrieve the current estimate from the Q table
            Dictionary <actionType, double> stateTable = new Dictionary <actionType, double>();

            if (Qtable.ContainsKey(state))
            {
                stateTable = Qtable[state];
            }
            else
            {
                return(defaultQ);
            }

            // retrieve the q value for this action
            if (stateTable.ContainsKey(action))
            {
                return(stateTable[action]);
            }
            else
            {
                return(defaultQ);
            }
        }