public StateRewardPair getStateRewardPair(StateActionPair saPair) { if (!_modelTable.ContainsKey(saPair)) { // If the StateRewardPair is not yet in the table, // it has to be saved. // Therefore the possible next following state has to be saevd in a new // StateRewardPair. // To do so, all possible Movements for the next state have to extracted // -> new method in StateExtractor, that retrieves the possible Movements // for any State or Position. // Independently of the mower! _modelTable[saPair] = new StateRewardPair(saPair.state, _initVal); } return(_modelTable[saPair]); }
/// <summary> /// Learn using the observed reward. /// </summary> /// <param name="reward">The observed reward.</param> public override void Learn(float reward) { if (_showParamLabel) { _receivers[0].InitialValues(Greediness, DiscountValue, LearnRate, InitialQValue, Run_with_etraces, Gamma, ModelPlanning, Refined, N); } // Old state StateActionPair oldSAP = new StateActionPair(_lastState, _lastAction); float oldQvalue = _qTable.getQValue(oldSAP); // Current state State currentState = base.CurrentState; MovementAction bestCurrentAction = _qTable.getBestActionForState(currentState); StateActionPair currentSAP = new StateActionPair(currentState, bestCurrentAction); float bestCurrentQValue = _qTable.getQValue(currentSAP); if (Run_with_etraces) { float delta = reward + DiscountValue * bestCurrentQValue - oldQvalue; _eTable.SetEligibilityValue(oldSAP, 1f); _qTable.AddScaledValues(LearnRate * delta, _eTable); _eTable.ScaleAllEligibilityValues(DiscountValue * Gamma); } else { // Standard QLearning float newQValue = Mathf.Lerp(oldQvalue, reward + (DiscountValue * bestCurrentQValue), LearnRate); _qTable.setQValue(oldSAP, newQValue); } // Refined Model if (ModelPlanning && Refined) { // Update the model according to the observed state _mTable.IncorporateObservedState(currentState); State virtualFromState, virtualToState; MovementAction virtualPerformedAction; float virtualReward; // Perfrom N virtual steps for (int i = 0; i < N; i++) { // Generated the virtual step bool virtualStepGenerated = _mTable.GenerateRandomModelStep(out virtualFromState, out virtualPerformedAction, out virtualToState, out virtualReward); if (virtualStepGenerated) { StateActionPair virtualFromSAP = new StateActionPair(virtualFromState, virtualPerformedAction); // Standard QLearning float fromStateQVal = _qTable.getQValue(virtualFromSAP); // Get the best action after the virtual step MovementAction bestAction = _qTable.getBestActionForState(virtualToState); StateActionPair virtualToSAP = new StateActionPair(virtualToState, bestAction); // Q value update for the virtual step float toStateQVal = _qTable.getQValue(virtualToSAP); float newQVal = Mathf.Lerp(fromStateQVal, virtualReward + (DiscountValue * toStateQVal), LearnRate); _qTable.setQValue(virtualFromSAP, newQVal); } } } // DynaQ Model if (ModelPlanning && !Refined) { _simpleMTable.setStateRewardPairAtStateActionPair(oldSAP, new StateRewardPair(currentState, reward)); for (int i = 0; i < N; i++) { StateActionPair randSAP = _qTable.getRandomVisitedStateAndAction(); StateRewardPair srp = _simpleMTable.getStateRewardPair(randSAP); // Standard QLearning float qVal = _qTable.getQValue(randSAP); //MovementAction bAct = _qTable.getBestActionForState(currentState); MovementAction bAct = _qTable.getBestActionForState(srp.State); // new (current) parameters //StateActionPair cSAP = new StateActionPair(currentState, bestCurrentAction); StateActionPair cSAP = new StateActionPair(srp.State, bAct); float bQVal = _qTable.getQValue(cSAP); float newQVal = Mathf.Lerp(qVal, srp.Reward + (DiscountValue * bQVal), LearnRate); _qTable.setQValue(randSAP, newQVal); } } }
/// <summary> /// Use this method to insert the next state plus the reward /// at the current StateActionPair position /// </summary> /// <param name="saPair"></param> /// <param name="stateReward"></param> public void setStateRewardPairAtStateActionPair(StateActionPair saPair, StateRewardPair stateReward) { _modelTable[saPair] = stateReward; }
/// <summary> /// Initializes a new instance of the <see cref="ModelTableSimple+ModelTableSimpleKeyValuePair"/> class. /// </summary> /// <param name="stateActionPairKey">The state action pair key.</param> /// <param name="stateRewardPairValue">The state reward pair value.</param> public ModelTableSimpleKeyValuePair(StateActionPair stateActionPairKey, StateRewardPair stateRewardPairValue) { StateActionPairKey = stateActionPairKey; StateRewardPairValue = stateRewardPairValue; }