public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception) { if (!(utilityFunction.hasUtilityFor(perception.getState()))) // if // perceptionState // is // new { utilityFunction.setUtility(perception.getState(), perception .getReward()); mdp.setReward(perception.getState(), perception.getReward()); } if (!(previousState == null)) { stateCount.incrementFor(previousState); utilityFunction = updateUtilityFunction(1.0); } if (mdp.isTerminalState(currentState)) { previousState = default(STATE_TYPE); previousAction = default(ACTION_TYPE); previousReward = double.MinValue; } else { previousState = currentState; previousAction = policy.getAction(currentState); previousReward = currentReward; } return(previousAction); }
public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception) { currentState = perception.getState(); currentReward = perception.getReward(); if (startingTrial()) { ACTION_TYPE chosenAction = selectRandomAction(); updateLearnerState(chosenAction); return(previousAction); } if (mdp.isTerminalState(currentState)) { incrementStateActionCount(previousState, previousAction); updateQ(0.8); previousAction = default(ACTION_TYPE); previousState = default(STATE_TYPE); previousReward = double.MinValue; return(previousAction); } else { incrementStateActionCount(previousState, previousAction); ACTION_TYPE chosenAction = updateQ(0.8); updateLearnerState(chosenAction); return(previousAction); } }
public MDPPerception <TState> Execute(TAction action, IRandomizer r) { MDPPerception <TState> perception = this.MDP.Execute(this.CurrentState, action, r); this.UpdateFromPerception(perception); return(perception); }
public override TAction DecideAction(MDPPerception <TState> perception) { CurrentState = perception.GetState(); CurrentReward = perception.GetReward(); if (this.StartingTrial()) { TAction chosenAction = this.SelectRandomAction(); this.UpdateLearnerState(chosenAction); return(PreviousAction); } if (MDP.IsTerminalState(CurrentState)) { this.IncrementStateActionCount(PreviousState, PreviousAction); this.UpdateQ(0.8); PreviousAction = null; PreviousState = null; //TODO: make sure that this is ok value for what used to be null in java previousReward = double.NegativeInfinity; return(PreviousAction); } else { this.IncrementStateActionCount(PreviousState, PreviousAction); TAction chosenAction = this.UpdateQ(0.8); this.UpdateLearnerState(chosenAction); return(PreviousAction); } }
public override TAction DecideAction(MDPPerception <TState> perception) { if (!(utilityFunction.HasUtilityFor(perception.GetState()))) { // if // perceptionState // is // new utilityFunction.SetUtility(perception.GetState(), perception.GetReward()); MDP.SetReward(perception.GetState(), perception.GetReward()); } if (!(PreviousState == null)) { stateCount.IncrementFor(PreviousState); utilityFunction = this.UpdateUtilityFunction(1.0); } if (MDP.IsTerminalState(CurrentState)) { PreviousState = null; PreviousAction = null; //TODO: make sure that 0 is appropriate value for what used to be null in java previousReward = 0; } else { PreviousState = CurrentState; PreviousAction = policy.GetAction(CurrentState); previousReward = CurrentReward; } return(PreviousAction); }
public MDPPerception <STATE_TYPE> execute(ACTION_TYPE action, Randomizer r) { MDPPerception <STATE_TYPE> perception = mdp.execute(currentState, action, r); updateFromPerception(perception); return(perception); }
public override TAction DecideAction(MDPPerception <TState> perception) { if (!(utilityFunction.HasUtilityFor(perception.GetState()))) { // if // perceptionState // is // new utilityFunction.SetUtility(perception.GetState(), perception .GetReward()); MDP.SetReward(perception.GetState(), perception.GetReward()); } if (!(PreviousState == null)) { if (nsa.ContainsKey(new Pair <TState, TAction>( PreviousState, PreviousAction))) { nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] += 1; } else { nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] = 1.0; } if (nsasdash.ContainsKey(new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState))) { nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] += 1; } else { nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] = 1.0; } foreach (MDPTransition <TState, TAction> transition in nsasdash.Keys) { if (nsasdash[transition] != 0.0) { double newValue = nsasdash[transition] / nsa[new Pair <TState, TAction>( transition.GetInitialState(), transition.GetAction())]; MDP.SetTransitionProbability(transition, newValue); } } IList <MDPTransition <TState, TAction> > validTransitions = MDP .GetTransitionsWith(PreviousState, policy.GetAction(PreviousState)); utilityFunction = this.ValueDetermination(validTransitions, 1); } if (MDP.IsTerminalState(CurrentState)) { PreviousState = null; PreviousAction = null; } else { PreviousState = CurrentState; PreviousAction = policy.GetAction(CurrentState); } return(PreviousAction); }
public void executeTrial(Randomizer r) { currentState = mdp.getInitialState(); currentReward = mdp.getRewardFor(mdp.getInitialState()); previousState = default(STATE_TYPE); previousAction = default(ACTION_TYPE); MDPPerception <STATE_TYPE> perception = new MDPPerception <STATE_TYPE>( currentState, currentReward); ACTION_TYPE action = default(ACTION_TYPE); do { action = decideAction(perception); if (action != null) { perception = execute(action, r); } } while (action != null); }
public void ExecuteTrial(IRandomizer r) { this.CurrentState = this.MDP.GetInitialState(); this.CurrentReward = this.MDP.GetRewardFor(this.MDP.GetInitialState()); this.PreviousState = null; this.PreviousAction = null; MDPPerception <TState> perception = new MDPPerception <TState>( this.CurrentState, this.CurrentReward); TAction action = null; do { action = this.DecideAction(perception); if (action != null) { perception = this.Execute(action, r); } } while (action != null); }
public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception) { if (!(utilityFunction.hasUtilityFor(perception.getState()))) { // if // perceptionState // is // new utilityFunction.setUtility(perception.getState(), perception .getReward()); mdp.setReward(perception.getState(), perception.getReward()); } if (!(previousState == null)) { Pair <STATE_TYPE, ACTION_TYPE> prevState = new Pair <STATE_TYPE, ACTION_TYPE>(previousState, previousAction); if (!nsa.ContainsKey(prevState)) { nsa.Add(prevState, 1.0); } else { nsa[prevState]++; } MDPTransition <STATE_TYPE, ACTION_TYPE> prevTransition = new MDPTransition <STATE_TYPE, ACTION_TYPE>( previousState, previousAction, currentState); if (!nsasdash.ContainsKey(prevTransition)) { nsasdash.Add(prevTransition, 1.0); } else { nsasdash[prevTransition]++; } foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in nsasdash .Keys) { if (nsasdash[transition] != 0.0) { double newValue = nsasdash[transition] / nsa[new Pair <STATE_TYPE, ACTION_TYPE>( transition.getInitialState(), transition .getAction())]; mdp.setTransitionProbability(transition, newValue); } } List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions = mdp .getTransitionsWith(previousState, policy .getAction(previousState)); utilityFunction = valueDetermination(validTransitions, 1); } if (mdp.isTerminalState(currentState)) { previousState = default(STATE_TYPE); previousAction = default(ACTION_TYPE); } else { previousState = currentState; previousAction = policy.getAction(currentState); } return(previousAction); }
public abstract ACTION_TYPE decideAction( MDPPerception <STATE_TYPE> perception);
public void updateFromPerception(MDPPerception <STATE_TYPE> perception) { currentState = perception.getState(); currentReward = perception.getReward(); }
public abstract TAction DecideAction( MDPPerception <TState> perception);
public void UpdateFromPerception(MDPPerception <TState> perception) { this.CurrentState = perception.GetState(); this.CurrentReward = perception.GetReward(); }