Exemple #1
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            if (!(utilityFunction.hasUtilityFor(perception.getState())))       // if
            // perceptionState
            // is
            // new
            {
                utilityFunction.setUtility(perception.getState(), perception
                                           .getReward());
                mdp.setReward(perception.getState(), perception.getReward());
            }
            if (!(previousState == null))
            {
                stateCount.incrementFor(previousState);
                utilityFunction = updateUtilityFunction(1.0);
            }

            if (mdp.isTerminalState(currentState))
            {
                previousState  = default(STATE_TYPE);
                previousAction = default(ACTION_TYPE);
                previousReward = double.MinValue;
            }
            else
            {
                previousState  = currentState;
                previousAction = policy.getAction(currentState);
                previousReward = currentReward;
            }
            return(previousAction);
        }
Exemple #2
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            currentState  = perception.getState();
            currentReward = perception.getReward();

            if (startingTrial())
            {
                ACTION_TYPE chosenAction = selectRandomAction();
                updateLearnerState(chosenAction);
                return(previousAction);
            }

            if (mdp.isTerminalState(currentState))
            {
                incrementStateActionCount(previousState, previousAction);
                updateQ(0.8);
                previousAction = default(ACTION_TYPE);
                previousState  = default(STATE_TYPE);
                previousReward = double.MinValue;
                return(previousAction);
            }

            else
            {
                incrementStateActionCount(previousState, previousAction);
                ACTION_TYPE chosenAction = updateQ(0.8);
                updateLearnerState(chosenAction);
                return(previousAction);
            }
        }
Exemple #3
0
        public MDPPerception <TState> Execute(TAction action, IRandomizer r)
        {
            MDPPerception <TState> perception = this.MDP.Execute(this.CurrentState, action, r);

            this.UpdateFromPerception(perception);
            return(perception);
        }
Exemple #4
0
        public override TAction DecideAction(MDPPerception <TState> perception)
        {
            CurrentState  = perception.GetState();
            CurrentReward = perception.GetReward();

            if (this.StartingTrial())
            {
                TAction chosenAction = this.SelectRandomAction();
                this.UpdateLearnerState(chosenAction);
                return(PreviousAction);
            }

            if (MDP.IsTerminalState(CurrentState))
            {
                this.IncrementStateActionCount(PreviousState, PreviousAction);
                this.UpdateQ(0.8);
                PreviousAction = null;
                PreviousState  = null;
                //TODO: make sure that this is ok value for what used to be null in java
                previousReward = double.NegativeInfinity;
                return(PreviousAction);
            }

            else
            {
                this.IncrementStateActionCount(PreviousState, PreviousAction);
                TAction chosenAction = this.UpdateQ(0.8);
                this.UpdateLearnerState(chosenAction);
                return(PreviousAction);
            }
        }
        public override TAction DecideAction(MDPPerception <TState> perception)
        {
            if (!(utilityFunction.HasUtilityFor(perception.GetState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.SetUtility(perception.GetState(), perception.GetReward());
                MDP.SetReward(perception.GetState(), perception.GetReward());
            }
            if (!(PreviousState == null))
            {
                stateCount.IncrementFor(PreviousState);
                utilityFunction = this.UpdateUtilityFunction(1.0);
            }

            if (MDP.IsTerminalState(CurrentState))
            {
                PreviousState  = null;
                PreviousAction = null;
                //TODO: make sure that 0 is appropriate value for what used to be null in java
                previousReward = 0;
            }
            else
            {
                PreviousState  = CurrentState;
                PreviousAction = policy.GetAction(CurrentState);
                previousReward = CurrentReward;
            }
            return(PreviousAction);
        }
Exemple #6
0
        public MDPPerception <STATE_TYPE> execute(ACTION_TYPE action, Randomizer r)
        {
            MDPPerception <STATE_TYPE> perception = mdp.execute(currentState,
                                                                action, r);

            updateFromPerception(perception);
            return(perception);
        }
Exemple #7
0
        public override TAction DecideAction(MDPPerception <TState> perception)
        {
            if (!(utilityFunction.HasUtilityFor(perception.GetState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.SetUtility(perception.GetState(), perception
                                           .GetReward());
                MDP.SetReward(perception.GetState(), perception.GetReward());
            }
            if (!(PreviousState == null))
            {
                if (nsa.ContainsKey(new Pair <TState, TAction>(
                                        PreviousState, PreviousAction)))
                {
                    nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] += 1;
                }
                else
                {
                    nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] = 1.0;
                }
                if (nsasdash.ContainsKey(new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)))
                {
                    nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] += 1;
                }
                else
                {
                    nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] = 1.0;
                }

                foreach (MDPTransition <TState, TAction> transition in nsasdash.Keys)
                {
                    if (nsasdash[transition] != 0.0)
                    {
                        double newValue = nsasdash[transition]
                                          / nsa[new Pair <TState, TAction>(
                                                    transition.GetInitialState(), transition.GetAction())];
                        MDP.SetTransitionProbability(transition, newValue);
                    }
                }
                IList <MDPTransition <TState, TAction> > validTransitions = MDP
                                                                            .GetTransitionsWith(PreviousState, policy.GetAction(PreviousState));
                utilityFunction = this.ValueDetermination(validTransitions, 1);
            }

            if (MDP.IsTerminalState(CurrentState))
            {
                PreviousState  = null;
                PreviousAction = null;
            }
            else
            {
                PreviousState  = CurrentState;
                PreviousAction = policy.GetAction(CurrentState);
            }
            return(PreviousAction);
        }
Exemple #8
0
        public void executeTrial(Randomizer r)
        {
            currentState   = mdp.getInitialState();
            currentReward  = mdp.getRewardFor(mdp.getInitialState());
            previousState  = default(STATE_TYPE);
            previousAction = default(ACTION_TYPE);
            MDPPerception <STATE_TYPE> perception = new MDPPerception <STATE_TYPE>(
                currentState, currentReward);
            ACTION_TYPE action = default(ACTION_TYPE);

            do
            {
                action = decideAction(perception);
                if (action != null)
                {
                    perception = execute(action, r);
                }
            } while (action != null);
        }
Exemple #9
0
        public void ExecuteTrial(IRandomizer r)
        {
            this.CurrentState   = this.MDP.GetInitialState();
            this.CurrentReward  = this.MDP.GetRewardFor(this.MDP.GetInitialState());
            this.PreviousState  = null;
            this.PreviousAction = null;
            MDPPerception <TState> perception = new MDPPerception <TState>(
                this.CurrentState, this.CurrentReward);
            TAction action = null;

            do
            {
                action = this.DecideAction(perception);
                if (action != null)
                {
                    perception = this.Execute(action, r);
                }
            } while (action != null);
        }
Exemple #10
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            if (!(utilityFunction.hasUtilityFor(perception.getState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.setUtility(perception.getState(), perception
                                           .getReward());
                mdp.setReward(perception.getState(), perception.getReward());
            }
            if (!(previousState == null))
            {
                Pair <STATE_TYPE, ACTION_TYPE> prevState = new Pair <STATE_TYPE, ACTION_TYPE>(previousState, previousAction);

                if (!nsa.ContainsKey(prevState))
                {
                    nsa.Add(prevState, 1.0);
                }
                else
                {
                    nsa[prevState]++;
                }
                MDPTransition <STATE_TYPE, ACTION_TYPE> prevTransition = new MDPTransition <STATE_TYPE, ACTION_TYPE>(
                    previousState, previousAction, currentState);

                if (!nsasdash.ContainsKey(prevTransition))
                {
                    nsasdash.Add(prevTransition, 1.0);
                }
                else
                {
                    nsasdash[prevTransition]++;
                }
                foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in nsasdash
                         .Keys)
                {
                    if (nsasdash[transition] != 0.0)
                    {
                        double newValue = nsasdash[transition]
                                          / nsa[new Pair <STATE_TYPE, ACTION_TYPE>(
                                                    transition.getInitialState(), transition
                                                    .getAction())];
                        mdp.setTransitionProbability(transition, newValue);
                    }
                }
                List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions = mdp
                                                                                   .getTransitionsWith(previousState, policy
                                                                                                       .getAction(previousState));
                utilityFunction = valueDetermination(validTransitions, 1);
            }

            if (mdp.isTerminalState(currentState))
            {
                previousState  = default(STATE_TYPE);
                previousAction = default(ACTION_TYPE);
            }
            else
            {
                previousState  = currentState;
                previousAction = policy.getAction(currentState);
            }
            return(previousAction);
        }
Exemple #11
0
 public abstract ACTION_TYPE decideAction(
     MDPPerception <STATE_TYPE> perception);
Exemple #12
0
 public void updateFromPerception(MDPPerception <STATE_TYPE> perception)
 {
     currentState  = perception.getState();
     currentReward = perception.getReward();
 }
Exemple #13
0
 public abstract TAction DecideAction(
     MDPPerception <TState> perception);
Exemple #14
0
 public void UpdateFromPerception(MDPPerception <TState> perception)
 {
     this.CurrentState  = perception.GetState();
     this.CurrentReward = perception.GetReward();
 }