示例#1
0
 public AIAction ChooseAction(State state, ActionValueFunction actionValueFunction)
 {
     if (Random() > greedy)
     {
         return(ChooseRandomAction(actionValueFunction.GetActionValues(state)));
     }
     return(ChooseRandomAction(ListUtils.GetAllHighest(GetActionProbabilities(state, actionValueFunction),
                                                       (x, y) => (x.Value - y.Value))));
 }
示例#2
0
        public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction)
        {
            double sumProbabilityOfActions = 0;

            foreach (var actionValue in actionValueFunction.GetActionValues(state))
            {
                sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key));
            }
            return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions);
        }
示例#3
0
        private void UpdateActionValueFunction(State state, State nextState, AIAction action, double reward)
        {
            double value = ActionValueFunction.GetValue(state, action);
            double policyActionValueSum = 0;

            foreach (var actionValue in ActionValueFunction.GetActionValues(nextState))
            {
                policyActionValueSum += policy.GetProbabilityOfAction(actionValue.Key, nextState, ActionValueFunction) * ActionValueFunction.GetValue(nextState, actionValue.Key);
            }
            double newValue = value + stepSize * (reward + discount * policyActionValueSum - ActionValueFunction.GetValue(state, action));

            ActionValueFunction = ActionValueFunction.UpdateValue(state, action, newValue);
        }
示例#4
0
        public double GetProbabilityOfAction(AIAction action, State state, ActionValueFunction actionValueFunction)
        {
            //TODO

            /*
             * int highestCount = ListUtils.GetAllHighest(actionValueFunction.GetActionValues(state), (x, y) => (x.Value - y.Value)).Count;
             * int allCount = actionValueFunction.GetActionValues(state).Count;
             * if (actionValueFunction.GetValue(state, action) == ListUtils.GetHighest(actionValueFunction.GetActionValues(state), (x, y) => x.Value > y.Value).Value)
             *  return greedy / highestCount;
             * return (1 - greedy) / allCount ;
             */
            double sumProbabilityOfActions = 0;

            foreach (var actionValue in actionValueFunction.GetActionValues(state))
            {
                sumProbabilityOfActions += Math.Exp(actionValueFunction.GetValue(state, actionValue.Key));
            }
            return(Math.Exp(actionValueFunction.GetValue(state, action)) / sumProbabilityOfActions);
        }
示例#5
0
 private List <KeyValuePair <AIAction, double> > GetActionProbabilities(State state, ActionValueFunction actionValueFunction)
 {
     return(actionValueFunction.GetActionValues(state).ConvertAll(actionValue =>
                                                                  new KeyValuePair <AIAction, double>(actionValue.Key, GetProbabilityOfAction(actionValue.Key, state, actionValueFunction))));
 }
示例#6
0
 private void InitState(State currentState)
 {
     ActionValueFunction = ActionValueFunction.InitActionValues(currentState);
 }