Esempio n. 1
0
        public void Step(double reward, int nextState)
        {
            var nextAction = ExplorationPolicy.SelectAction(_q[nextState]);

            var target = reward + DiscountFactor * _q[nextState][nextAction];
            var delta  = target - _q[CurrentState][SelectedAction];

            _q[CurrentState][SelectedAction] += LearningRate * delta;

            CurrentState   = nextState;
            SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
        }
Esempio n. 2
0
        public void Step(double reward, int nextState)
        {
            if (!_visited.ContainsKey(CurrentState))
            {
                var actions = new HashSet <int>();
                actions.Add(SelectedAction);
                _visited[CurrentState] = actions;
            }

            UpdateQ(reward, nextState);
            Plan();

            CurrentState   = nextState;
            SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
        }
Esempio n. 3
0
        public void Step(double reward, int nextState)
        {
            var bestNext = _q[nextState][0];

            for (var i = 1; i < ActionCount; i++)
            {
                if (_q[nextState][i] > bestNext)
                {
                    bestNext = _q[nextState][i];
                }
            }

            var target = reward + DiscountFactor * bestNext;
            var delta  = target - _q[CurrentState][SelectedAction];

            _q[CurrentState][SelectedAction] += LearningRate * delta;

            CurrentState   = nextState;
            SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
        }
Esempio n. 4
0
 public void Begin(int state)
 {
     CurrentState   = state;
     SelectedAction = ExplorationPolicy.SelectAction(_q[CurrentState]);
 }