Exemplo n.º 1
0
    private void sARSA(State currentState, Strategie strategie)
    {
        firstRound(currentState, strategie);

        //Q(s,a)
        QValue qValue = strategie.getQValue(lastState, action, qValues);

        // a ausführen
        ExecuteAction(qValue.getAction());
        // r beobachten
        reward = getReward();
        //s' beobachten
        currentState = DetermineState();

        //Q(s',a') gemäß Strategie
        QValue currentQValue = strategie.getQValueForStrategie(currentState, qValues);

        //Tabelle aktualisieren
        qValues = SARSA.updateTable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), reward, qValues);

        // s<-s', a<-a'
        lastState  = currentState;
        lastQValue = currentQValue;
        action     = currentQValue.getAction();
    }
Exemplo n.º 2
0
    private void qLearning(State currentState, Strategie strategie)
    {
        var qValue = strategie.getQValueForStrategie(currentState, qValues);

        ExecuteAction(qValue.getAction());
        if (!lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin))
        {
            qValues = QLearning.updateTable(lastQValue, qValue, qValues, getReward());
        }
        lastState  = currentState;
        lastQValue = qValue;
    }
Exemplo n.º 3
0
    private void sARSALAMBDA(State currentState, Strategie strategie)
    {
        firstRound(currentState, strategie);

        //Q(s,a)
        QValue qValue = strategie.getQValue(lastState, action, qValues);
        //e(s,a) <- e(s,a)+1
        EValue eValue = strategie.getEValue(lastState, action, eValues);

        //Stacking
        //--> Replacing: eValue.setValue(1);
        eValue.setValue(eValue.getValue() + 1);
        // a ausführen
        ExecuteAction(qValue.getAction());
        // r beobachten
        reward = getReward();
        //s' beobachten
        currentState = DetermineState();

        //Q(s',a') gemäß Strategie
        QValue currentQValue = strategie.getQValueForStrategie(currentState, qValues);

        //Tabelle aktualisieren
        qValues = SARSA_LAMBDA.updateQTable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), reward, qValues, eValues);
        //e(s,a) <- 𝛾𝜆e(s,a)
        //updateETable --> eValue.setValue(gamma * lambda * eValue.getValue());
        eValues = SARSA_LAMBDA.updateETable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), eValues);

        // s<-s', a<-a'
        lastState  = currentState;
        lastQValue = currentQValue;
        action     = currentQValue.getAction();
    }
Exemplo n.º 4
0
 private void firstRound(State currentState, Strategie strategie)
 {
     if (lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin))
     {
         lastState  = currentState;
         lastQValue = strategie.getQValueForStrategie(lastState, qValues);
         action     = lastQValue.getAction();
     }
 }