예제 #1
0
        public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction)
        {
            double sumProbabilityOfActions = 0;

            foreach (var actionValue in actionValueFunction.GetActionValues(state))
            {
                sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key));
            }
            return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions);
        }
예제 #2
0
        public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction)
        {
            //TODO

            /*
             * int highestCount = ListUtils.GetAllHighest(actionValueFunction.GetActionValues(state), (x, y) => (x.Value - y.Value)).Count;
             * int allCount = actionValueFunction.GetActionValues(state).Count;
             * if (actionValueFunction.GetValue(state, action) == ListUtils.GetHighest(actionValueFunction.GetActionValues(state), (x, y) => x.Value > y.Value).Value)
             *  return greedy / highestCount;
             * return (1 - greedy) / allCount ;
             */
            double sumProbabilityOfActions = 0;

            foreach (var actionValue in actionValueFunction.GetActionValues(state))
            {
                sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key));
            }
            return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions);
        }
예제 #3
0
        private void UpdateActionValueFunction(State state, State nextState, AIAction action, double reward)
        {
            double value = ActionValueFunction.GetValue(state, action);
            double policyActionValueSum = 0;

            foreach (var actionValue in ActionValueFunction.GetActionValues(nextState))
            {
                policyActionValueSum += policy.GetProbabilityOfAction(actionValue.Key, nextState, ActionValueFunction) * ActionValueFunction.GetValue(nextState, actionValue.Key);
            }
            double newValue = value + stepSize * (reward + discount * policyActionValueSum - ActionValueFunction.GetValue(state, action));

            ActionValueFunction = ActionValueFunction.UpdateValue(state, action, newValue);
        }