public override TAction DecideAction(MDPPerception <TState> perception) { if (!(utilityFunction.HasUtilityFor(perception.GetState()))) { // if // perceptionState // is // new utilityFunction.SetUtility(perception.GetState(), perception.GetReward()); MDP.SetReward(perception.GetState(), perception.GetReward()); } if (!(PreviousState == null)) { stateCount.IncrementFor(PreviousState); utilityFunction = this.UpdateUtilityFunction(1.0); } if (MDP.IsTerminalState(CurrentState)) { PreviousState = null; PreviousAction = null; //TODO: make sure that 0 is appropriate value for what used to be null in java previousReward = 0; } else { PreviousState = CurrentState; PreviousAction = policy.GetAction(CurrentState); previousReward = CurrentReward; } return(PreviousAction); }
public override TAction DecideAction(MDPPerception <TState> perception) { if (!(utilityFunction.HasUtilityFor(perception.GetState()))) { // if // perceptionState // is // new utilityFunction.SetUtility(perception.GetState(), perception .GetReward()); MDP.SetReward(perception.GetState(), perception.GetReward()); } if (!(PreviousState == null)) { if (nsa.ContainsKey(new Pair <TState, TAction>( PreviousState, PreviousAction))) { nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] += 1; } else { nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] = 1.0; } if (nsasdash.ContainsKey(new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState))) { nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] += 1; } else { nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] = 1.0; } foreach (MDPTransition <TState, TAction> transition in nsasdash.Keys) { if (nsasdash[transition] != 0.0) { double newValue = nsasdash[transition] / nsa[new Pair <TState, TAction>( transition.GetInitialState(), transition.GetAction())]; MDP.SetTransitionProbability(transition, newValue); } } IList <MDPTransition <TState, TAction> > validTransitions = MDP .GetTransitionsWith(PreviousState, policy.GetAction(PreviousState)); utilityFunction = this.ValueDetermination(validTransitions, 1); } if (MDP.IsTerminalState(CurrentState)) { PreviousState = null; PreviousAction = null; } else { PreviousState = CurrentState; PreviousAction = policy.GetAction(CurrentState); } return(PreviousAction); }
private MDPUtilityFunction <TState> UpdateUtilityFunction(double gamma) { MDPUtilityFunction <TState> uf = utilityFunction.Copy(); double u_s = utilityFunction.GetUtility(PreviousState); double gammaUtilDIff = ((gamma * utilityFunction .GetUtility(CurrentState)) - utilityFunction .GetUtility(PreviousState)); double alphaTerm = stateCount.ProbabilityOf(PreviousState) * (previousReward + gammaUtilDIff); uf.SetUtility(PreviousState, u_s + alphaTerm); return(uf); }
private MDPUtilityFunction <TState> ValueDetermination( IList <MDPTransition <TState, TAction> > validTransitions, double gamma) { MDPUtilityFunction <TState> uf = utilityFunction.Copy(); double additional = 0.0; if (validTransitions.Count > 0) { TState initState = validTransitions[0].GetInitialState(); double reward = MDP.GetRewardFor(initState); additional = validTransitions.Sum( transition => MDP.GetTransitionProbability(transition) * this.utilityFunction.GetUtility(transition.GetDestinationState())); uf.SetUtility(initState, reward + (gamma * additional)); } return(uf); }