private GamblersWorldAction MaxAction(GamblersWorldState state) { return(_world .AvailableActions(state) .MaxBy(action => _policy.PAction(state, action)) .First()); }
public void Play() { const int numGamesPerStartingState = 100; var numStates = _world.AllStates().Count(); var numWins = new int[numStates]; for (var startingDollars = 1; startingDollars < numStates; startingDollars++) { var startingState = new GamblersWorldState(startingDollars); for (var i = 0; i < numGamesPerStartingState; i++) { var outcome = PlaySingleGame(startingState); if (_world.IsWin(outcome)) { numWins[startingDollars]++; } } } Console.WriteLine("num wins:"); for (var startingDollars = 0; startingDollars < numStates; startingDollars++) { Console.WriteLine($"${startingDollars}: {numWins[startingDollars]} wins"); } }
private static GamblersWorldAction FindBestAction( GamblersWorld world, GamblersWorldState state, GamblersValueTable valueTable, IGamblersWorldRewarder rewarder) { var maxActionValue = double.MinValue; var maxAction = new GamblersWorldAction(0); foreach (var action in world.AvailableActions(state)) { var actionValue = 0.0; foreach (var(nextState, pNextState) in world.PossibleStates(state, action)) { var nextStateValue = valueTable.Value(nextState); var reward = rewarder.Reward(state, nextState, action); actionValue += pNextState * (reward + nextStateValue); } if (actionValue > maxActionValue) { maxActionValue = actionValue; maxAction = action; } } return(maxAction); }
private GamblersWorldState PlaySingleGame(GamblersWorldState initialState) { var state = initialState; while (!_world.IsTerminal(state)) { var action = MaxAction(state); state = _world.NextState(state, action); } return(state); }
private double CalculateValue( GamblersWorldState state, IGamblersPolicy policy, IGamblersWorldRewarder rewarder) { var newValue = 0.0; foreach (var action in _world.AvailableActions(state)) { foreach (var(nextState, pNextState) in _world.PossibleStates(state, action)) { var reward = rewarder.Reward(state, nextState, action); newValue += policy.PAction(state, action) * pNextState * (reward + Value(nextState)); } } return(newValue); }
public double Value(GamblersWorldState state) { return(_values[state.DollarsInHand]); }
public double PAction(GamblersWorldState state, GamblersWorldAction action) { return(action.Stake == _actions[state.DollarsInHand] ? 1 : 0); }