Пример #1
0
        public void Step(double reward, int nextState)
        {
            var nextAction = ExplorationPolicy.SelectAction(_q[nextState]);

            var target = reward + DiscountFactor * _q[nextState][nextAction];
            var delta  = target - _q[CurrentState][SelectedAction];

            _q[CurrentState][SelectedAction] += LearningRate * delta;

            CurrentState   = nextState;
            SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
        }
Пример #2
0
        public void Step(double reward, int nextState)
        {
            if (!_visited.ContainsKey(CurrentState))
            {
                var actions = new HashSet <int>();
                actions.Add(SelectedAction);
                _visited[CurrentState] = actions;
            }

            UpdateQ(reward, nextState);
            Plan();

            CurrentState   = nextState;
            SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
        }
Пример #3
0
        public void Step(double reward, int nextState)
        {
            var bestNext = _q[nextState][0];

            for (var i = 1; i < ActionCount; i++)
            {
                if (_q[nextState][i] > bestNext)
                {
                    bestNext = _q[nextState][i];
                }
            }

            var target = reward + DiscountFactor * bestNext;
            var delta  = target - _q[CurrentState][SelectedAction];

            _q[CurrentState][SelectedAction] += LearningRate * delta;

            CurrentState   = nextState;
            SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
        }
Пример #4
0
 public void Begin(int state)
 {
     CurrentState   = state;
     SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
 }
Пример #5
0
 /// <summary>
 /// Gets the next recommended available action under the current exploration policy.
 /// </summary>
 /// <param name="state">The state.</param>
 /// <param name="availableActions">The available actions.</param>
 /// <returns>The chosen action</returns>
 public int GetAction(long state, List <int> availableActions)
 {
     return(ExplorationPolicy.ChooseAction(QualityValues[state], availableActions));
 }