private void sARSALAMBDA(State currentState, Strategie strategie) { firstRound(currentState, strategie); //Q(s,a) QValue qValue = strategie.getQValue(lastState, action, qValues); //e(s,a) <- e(s,a)+1 EValue eValue = strategie.getEValue(lastState, action, eValues); //Stacking //--> Replacing: eValue.setValue(1); eValue.setValue(eValue.getValue() + 1); // a ausführen ExecuteAction(qValue.getAction()); // r beobachten reward = getReward(); //s' beobachten currentState = DetermineState(); //Q(s',a') gemäß Strategie QValue currentQValue = strategie.getQValueForStrategie(currentState, qValues); //Tabelle aktualisieren qValues = SARSA_LAMBDA.updateQTable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), reward, qValues, eValues); //e(s,a) <- 𝛾𝜆e(s,a) //updateETable --> eValue.setValue(gamma * lambda * eValue.getValue()); eValues = SARSA_LAMBDA.updateETable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), eValues); // s<-s', a<-a' lastState = currentState; lastQValue = currentQValue; action = currentQValue.getAction(); }
private void sARSA(State currentState, Strategie strategie) { firstRound(currentState, strategie); //Q(s,a) QValue qValue = strategie.getQValue(lastState, action, qValues); // a ausführen ExecuteAction(qValue.getAction()); // r beobachten reward = getReward(); //s' beobachten currentState = DetermineState(); //Q(s',a') gemäß Strategie QValue currentQValue = strategie.getQValueForStrategie(currentState, qValues); //Tabelle aktualisieren qValues = SARSA.updateTable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), reward, qValues); // s<-s', a<-a' lastState = currentState; lastQValue = currentQValue; action = currentQValue.getAction(); }
private void firstRound(State currentState, Strategie strategie) { if (lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin)) { lastState = currentState; lastQValue = strategie.getQValueForStrategie(lastState, qValues); action = lastQValue.getAction(); } }
public static EValue getEValueEntry(State state, Assets.Scripts.Enum.Action action, List <EValue> eValues) { foreach (EValue value in eValues) { if (value.getState().Equals(state) && value.getAction().Equals(action)) { return(value); } } return(null); }
Assets.Scripts.Enum.Action ExecuteAction(Assets.Scripts.Enum.Action action) { if (action == Assets.Scripts.Enum.Action.Flap) { birdMvmt.Flap(); } else { // nop, do nothing } return(action); }