예제 #1
0
 private GamblersWorldAction MaxAction(GamblersWorldState state)
 {
     return(_world
            .AvailableActions(state)
            .MaxBy(action => _policy.PAction(state, action))
            .First());
 }
예제 #2
0
        public void Play()
        {
            const int numGamesPerStartingState = 100;
            var       numStates = _world.AllStates().Count();
            var       numWins   = new int[numStates];

            for (var startingDollars = 1; startingDollars < numStates; startingDollars++)
            {
                var startingState = new GamblersWorldState(startingDollars);

                for (var i = 0; i < numGamesPerStartingState; i++)
                {
                    var outcome = PlaySingleGame(startingState);

                    if (_world.IsWin(outcome))
                    {
                        numWins[startingDollars]++;
                    }
                }
            }

            Console.WriteLine("num wins:");
            for (var startingDollars = 0; startingDollars < numStates; startingDollars++)
            {
                Console.WriteLine($"${startingDollars}: {numWins[startingDollars]} wins");
            }
        }
예제 #3
0
        private static GamblersWorldAction FindBestAction(
            GamblersWorld world,
            GamblersWorldState state,
            GamblersValueTable valueTable,
            IGamblersWorldRewarder rewarder)
        {
            var maxActionValue = double.MinValue;
            var maxAction      = new GamblersWorldAction(0);

            foreach (var action in world.AvailableActions(state))
            {
                var actionValue = 0.0;

                foreach (var(nextState, pNextState) in world.PossibleStates(state, action))
                {
                    var nextStateValue = valueTable.Value(nextState);
                    var reward         = rewarder.Reward(state, nextState, action);

                    actionValue += pNextState * (reward + nextStateValue);
                }

                if (actionValue > maxActionValue)
                {
                    maxActionValue = actionValue;
                    maxAction      = action;
                }
            }

            return(maxAction);
        }
예제 #4
0
        private GamblersWorldState PlaySingleGame(GamblersWorldState initialState)
        {
            var state = initialState;

            while (!_world.IsTerminal(state))
            {
                var action = MaxAction(state);

                state = _world.NextState(state, action);
            }

            return(state);
        }
예제 #5
0
        private double CalculateValue(
            GamblersWorldState state, IGamblersPolicy policy, IGamblersWorldRewarder rewarder)
        {
            var newValue = 0.0;

            foreach (var action in _world.AvailableActions(state))
            {
                foreach (var(nextState, pNextState) in _world.PossibleStates(state, action))
                {
                    var reward = rewarder.Reward(state, nextState, action);
                    newValue +=
                        policy.PAction(state, action)
                        * pNextState
                        * (reward + Value(nextState));
                }
            }

            return(newValue);
        }
예제 #6
0
 public double Value(GamblersWorldState state)
 {
     return(_values[state.DollarsInHand]);
 }
예제 #7
0
 public double PAction(GamblersWorldState state, GamblersWorldAction action)
 {
     return(action.Stake == _actions[state.DollarsInHand] ? 1 : 0);
 }