public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception) { if (!(utilityFunction.hasUtilityFor(perception.getState()))) // if // perceptionState // is // new { utilityFunction.setUtility(perception.getState(), perception .getReward()); mdp.setReward(perception.getState(), perception.getReward()); } if (!(previousState == null)) { stateCount.incrementFor(previousState); utilityFunction = updateUtilityFunction(1.0); } if (mdp.isTerminalState(currentState)) { previousState = default(STATE_TYPE); previousAction = default(ACTION_TYPE); previousReward = double.MinValue; } else { previousState = currentState; previousAction = policy.getAction(currentState); previousReward = currentReward; } return(previousAction); }
public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception) { currentState = perception.getState(); currentReward = perception.getReward(); if (startingTrial()) { ACTION_TYPE chosenAction = selectRandomAction(); updateLearnerState(chosenAction); return(previousAction); } if (mdp.isTerminalState(currentState)) { incrementStateActionCount(previousState, previousAction); updateQ(0.8); previousAction = default(ACTION_TYPE); previousState = default(STATE_TYPE); previousReward = double.MinValue; return(previousAction); } else { incrementStateActionCount(previousState, previousAction); ACTION_TYPE chosenAction = updateQ(0.8); updateLearnerState(chosenAction); return(previousAction); } }
public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception) { if (!(utilityFunction.hasUtilityFor(perception.getState()))) { // if // perceptionState // is // new utilityFunction.setUtility(perception.getState(), perception .getReward()); mdp.setReward(perception.getState(), perception.getReward()); } if (!(previousState == null)) { Pair <STATE_TYPE, ACTION_TYPE> prevState = new Pair <STATE_TYPE, ACTION_TYPE>(previousState, previousAction); if (!nsa.ContainsKey(prevState)) { nsa.Add(prevState, 1.0); } else { nsa[prevState]++; } MDPTransition <STATE_TYPE, ACTION_TYPE> prevTransition = new MDPTransition <STATE_TYPE, ACTION_TYPE>( previousState, previousAction, currentState); if (!nsasdash.ContainsKey(prevTransition)) { nsasdash.Add(prevTransition, 1.0); } else { nsasdash[prevTransition]++; } foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in nsasdash .Keys) { if (nsasdash[transition] != 0.0) { double newValue = nsasdash[transition] / nsa[new Pair <STATE_TYPE, ACTION_TYPE>( transition.getInitialState(), transition .getAction())]; mdp.setTransitionProbability(transition, newValue); } } List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions = mdp .getTransitionsWith(previousState, policy .getAction(previousState)); utilityFunction = valueDetermination(validTransitions, 1); } if (mdp.isTerminalState(currentState)) { previousState = default(STATE_TYPE); previousAction = default(ACTION_TYPE); } else { previousState = currentState; previousAction = policy.getAction(currentState); } return(previousAction); }
public void updateFromPerception(MDPPerception <STATE_TYPE> perception) { currentState = perception.getState(); currentReward = perception.getReward(); }