public void IncreaseReward_Q(string actionName, double alpha, double inc) { ActionReward action = FindAction(actionName); action.Reward *= 1 - alpha; action.Reward += alpha * inc; }
public ActionReward BestAction() { ActionReward bestAction = null; foreach (var actionReward in ActionRewards) { if (bestAction == null || actionReward.Reward > bestAction.Reward) { bestAction = actionReward; } } return(bestAction); }
public bool Learn(string chosenAction, string newState, double reward) { current_iteration += 1; if (current_iteration > maxIterations) { EndEpisode(); return(true); } State prevState = QTable.GetState(currentState); ActionReward bestActionNextState = QTable.GetState(newState).BestAction(); prevState.IncreaseReward_Q(chosenAction, alpha, reward + discount * bestActionNextState.Reward); time += 1; alpha = Math.Pow(time, -0.1); currentState = newState; return(false); }