public void Step(double reward, int nextState) { var nextAction = ExplorationPolicy.SelectAction(_q[nextState]); var target = reward + DiscountFactor * _q[nextState][nextAction]; var delta = target - _q[CurrentState][SelectedAction]; _q[CurrentState][SelectedAction] += LearningRate * delta; CurrentState = nextState; SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]); }
public void Step(double reward, int nextState) { if (!_visited.ContainsKey(CurrentState)) { var actions = new HashSet <int>(); actions.Add(SelectedAction); _visited[CurrentState] = actions; } UpdateQ(reward, nextState); Plan(); CurrentState = nextState; SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]); }
public void Step(double reward, int nextState) { var bestNext = _q[nextState][0]; for (var i = 1; i < ActionCount; i++) { if (_q[nextState][i] > bestNext) { bestNext = _q[nextState][i]; } } var target = reward + DiscountFactor * bestNext; var delta = target - _q[CurrentState][SelectedAction]; _q[CurrentState][SelectedAction] += LearningRate * delta; CurrentState = nextState; SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]); }
public void Begin(int state) { CurrentState = state; SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]); }
/// <summary> /// Gets the next recommended available action under the current exploration policy. /// </summary> /// <param name="state">The state.</param> /// <param name="availableActions">The available actions.</param> /// <returns>The chosen action</returns> public int GetAction(long state, List <int> availableActions) { return(ExplorationPolicy.ChooseAction(QualityValues[state], availableActions)); }