public override TAction DecideAction(MDPPerception <TState> perception) { if (!(utilityFunction.HasUtilityFor(perception.GetState()))) { // if // perceptionState // is // new utilityFunction.SetUtility(perception.GetState(), perception.GetReward()); MDP.SetReward(perception.GetState(), perception.GetReward()); } if (!(PreviousState == null)) { stateCount.IncrementFor(PreviousState); utilityFunction = this.UpdateUtilityFunction(1.0); } if (MDP.IsTerminalState(CurrentState)) { PreviousState = null; PreviousAction = null; //TODO: make sure that 0 is appropriate value for what used to be null in java previousReward = 0; } else { PreviousState = CurrentState; PreviousAction = policy.GetAction(CurrentState); previousReward = CurrentReward; } return(PreviousAction); }
public override TAction DecideAction(MDPPerception <TState> perception) { if (!(utilityFunction.HasUtilityFor(perception.GetState()))) { // if // perceptionState // is // new utilityFunction.SetUtility(perception.GetState(), perception .GetReward()); MDP.SetReward(perception.GetState(), perception.GetReward()); } if (!(PreviousState == null)) { if (nsa.ContainsKey(new Pair <TState, TAction>( PreviousState, PreviousAction))) { nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] += 1; } else { nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] = 1.0; } if (nsasdash.ContainsKey(new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState))) { nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] += 1; } else { nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] = 1.0; } foreach (MDPTransition <TState, TAction> transition in nsasdash.Keys) { if (nsasdash[transition] != 0.0) { double newValue = nsasdash[transition] / nsa[new Pair <TState, TAction>( transition.GetInitialState(), transition.GetAction())]; MDP.SetTransitionProbability(transition, newValue); } } IList <MDPTransition <TState, TAction> > validTransitions = MDP .GetTransitionsWith(PreviousState, policy.GetAction(PreviousState)); utilityFunction = this.ValueDetermination(validTransitions, 1); } if (MDP.IsTerminalState(CurrentState)) { PreviousState = null; PreviousAction = null; } else { PreviousState = CurrentState; PreviousAction = policy.GetAction(CurrentState); } return(PreviousAction); }
public override TAction DecideAction(MDPPerception <TState> perception) { CurrentState = perception.GetState(); CurrentReward = perception.GetReward(); if (this.StartingTrial()) { TAction chosenAction = this.SelectRandomAction(); this.UpdateLearnerState(chosenAction); return(PreviousAction); } if (MDP.IsTerminalState(CurrentState)) { this.IncrementStateActionCount(PreviousState, PreviousAction); this.UpdateQ(0.8); PreviousAction = null; PreviousState = null; //TODO: make sure that this is ok value for what used to be null in java previousReward = double.NegativeInfinity; return(PreviousAction); } else { this.IncrementStateActionCount(PreviousState, PreviousAction); TAction chosenAction = this.UpdateQ(0.8); this.UpdateLearnerState(chosenAction); return(PreviousAction); } }
public void UpdateFromPerception(MDPPerception <TState> perception) { this.CurrentState = perception.GetState(); this.CurrentReward = perception.GetReward(); }