Пример #1
0
        private static TAction FindBestAction(
            IProblem <TState, TAction> problem,
            TState state,
            ValueTable <TState, TAction> valueTable,
            IRewarder <TState, TAction> rewarder)
        {
            var maxActionValue = double.MinValue;
            var maxAction      = default(TAction);

            foreach (var action in problem.AvailableActions(state))
            {
                var actionValue = 0.0;

                foreach (var(nextState, pNextState) in problem.PossibleStates(state, action))
                {
                    var nextStateValue = valueTable.Value(nextState);
                    var reward         = rewarder.Reward(state, nextState, action);

                    actionValue += pNextState * (reward + nextStateValue);
                }

                if (actionValue > maxActionValue)
                {
                    maxActionValue = actionValue;
                    maxAction      = action;
                }
            }

            return(maxAction);
        }
Пример #2
0
        public TAction Action(TState state)
        {
            const double approxOne = 1 - double.Epsilon;

            return(_problem
                   .AvailableActions(state)
                   .FirstOrDefault(action => PAction(state, action) >= approxOne));
        }
Пример #3
0
        public double PAction(TState state, TAction action)
        {
            var numActions = _problem.AvailableActions(state).Count();

            if (numActions == 0)
            {
                return(0.0);
            }

            return(1.0 / numActions);
        }
Пример #4
0
        private double CalculateValue(
            TState state,
            IPolicy <TState, TAction> policy,
            IRewarder <TState, TAction> rewarder)
        {
            var newValue = 0.0;

            foreach (var action in _problem.AvailableActions(state))
            {
                foreach (var(nextState, pNextState) in _problem.PossibleStates(state, action))
                {
                    var reward = rewarder.Reward(state, nextState, action);
                    newValue +=
                        policy.PAction(state, action)
                        * pNextState
                        * (reward + Value(nextState));
                }
            }

            return(newValue);
        }