示例#1
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            if (!(utilityFunction.hasUtilityFor(perception.getState())))       // if
            // perceptionState
            // is
            // new
            {
                utilityFunction.setUtility(perception.getState(), perception
                                           .getReward());
                mdp.setReward(perception.getState(), perception.getReward());
            }
            if (!(previousState == null))
            {
                stateCount.incrementFor(previousState);
                utilityFunction = updateUtilityFunction(1.0);
            }

            if (mdp.isTerminalState(currentState))
            {
                previousState  = default(STATE_TYPE);
                previousAction = default(ACTION_TYPE);
                previousReward = double.MinValue;
            }
            else
            {
                previousState  = currentState;
                previousAction = policy.getAction(currentState);
                previousReward = currentReward;
            }
            return(previousAction);
        }
示例#2
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            currentState  = perception.getState();
            currentReward = perception.getReward();

            if (startingTrial())
            {
                ACTION_TYPE chosenAction = selectRandomAction();
                updateLearnerState(chosenAction);
                return(previousAction);
            }

            if (mdp.isTerminalState(currentState))
            {
                incrementStateActionCount(previousState, previousAction);
                updateQ(0.8);
                previousAction = default(ACTION_TYPE);
                previousState  = default(STATE_TYPE);
                previousReward = double.MinValue;
                return(previousAction);
            }

            else
            {
                incrementStateActionCount(previousState, previousAction);
                ACTION_TYPE chosenAction = updateQ(0.8);
                updateLearnerState(chosenAction);
                return(previousAction);
            }
        }
示例#3
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            if (!(utilityFunction.hasUtilityFor(perception.getState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.setUtility(perception.getState(), perception
                                           .getReward());
                mdp.setReward(perception.getState(), perception.getReward());
            }
            if (!(previousState == null))
            {
                Pair <STATE_TYPE, ACTION_TYPE> prevState = new Pair <STATE_TYPE, ACTION_TYPE>(previousState, previousAction);

                if (!nsa.ContainsKey(prevState))
                {
                    nsa.Add(prevState, 1.0);
                }
                else
                {
                    nsa[prevState]++;
                }
                MDPTransition <STATE_TYPE, ACTION_TYPE> prevTransition = new MDPTransition <STATE_TYPE, ACTION_TYPE>(
                    previousState, previousAction, currentState);

                if (!nsasdash.ContainsKey(prevTransition))
                {
                    nsasdash.Add(prevTransition, 1.0);
                }
                else
                {
                    nsasdash[prevTransition]++;
                }
                foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in nsasdash
                         .Keys)
                {
                    if (nsasdash[transition] != 0.0)
                    {
                        double newValue = nsasdash[transition]
                                          / nsa[new Pair <STATE_TYPE, ACTION_TYPE>(
                                                    transition.getInitialState(), transition
                                                    .getAction())];
                        mdp.setTransitionProbability(transition, newValue);
                    }
                }
                List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions = mdp
                                                                                   .getTransitionsWith(previousState, policy
                                                                                                       .getAction(previousState));
                utilityFunction = valueDetermination(validTransitions, 1);
            }

            if (mdp.isTerminalState(currentState))
            {
                previousState  = default(STATE_TYPE);
                previousAction = default(ACTION_TYPE);
            }
            else
            {
                previousState  = currentState;
                previousAction = policy.getAction(currentState);
            }
            return(previousAction);
        }
示例#4
0
 public void updateFromPerception(MDPPerception <STATE_TYPE> perception)
 {
     currentState  = perception.getState();
     currentReward = perception.getReward();
 }