Пример #1
0
        private double CalculateValue(
            GridWorldState state,
            IGridWorldPolicy policy,
            IGridWorldRewarder gridWorldRewarder)
        {
            var newValue = 0.0;

            foreach (var action in _world.AvailableActions(state))
            {
                var nextState = _world.NextState(state, action);
                var reward    = gridWorldRewarder.Reward(state, action);
                newValue += policy.PAction(state, action) * (reward + Value(nextState));
            }

            return(newValue);
        }
Пример #2
0
        private static GridWorldAction FindBestAction(
            GridWorld world,
            GridWorldState state,
            GridWorldValueTable gridWorldValueTable,
            IGridWorldRewarder gridWorldRewarder)
        {
            var max       = double.MinValue;
            var maxAction = GridWorldAction.Down;

            foreach (var action in world.AvailableActions(state))
            {
                var nextState      = world.NextState(state, action);
                var nextStateValue = gridWorldValueTable.Value(nextState);
                var reward         = gridWorldRewarder.Reward(state, action);

                if (reward + nextStateValue > max)
                {
                    max       = reward + nextStateValue;
                    maxAction = action;
                }
            }

            return(maxAction);
        }
Пример #3
0
 public double PAction(GridWorldState state, GridWorldAction action)
 {
     return(action == _actions[state] ? 1 : 0);
 }
Пример #4
0
 public double Value(GridWorldState state)
 {
     return(_values[state.Position1D]);
 }