private static GamblersWorldAction FindBestAction( GamblersWorld world, GamblersWorldState state, GamblersValueTable valueTable, IGamblersWorldRewarder rewarder) { var maxActionValue = double.MinValue; var maxAction = new GamblersWorldAction(0); foreach (var action in world.AvailableActions(state)) { var actionValue = 0.0; foreach (var(nextState, pNextState) in world.PossibleStates(state, action)) { var nextStateValue = valueTable.Value(nextState); var reward = rewarder.Reward(state, nextState, action); actionValue += pNextState * (reward + nextStateValue); } if (actionValue > maxActionValue) { maxActionValue = actionValue; maxAction = action; } } return(maxAction); }
public static GreedyGamblersPolicy Create( GamblersWorld world, GamblersValueTable valueTable, IGamblersWorldRewarder rewarder) { var greedyPolicy = new GreedyGamblersPolicy(world); foreach (var state in world.AllStates()) { var bestAction = FindBestAction(world, state, valueTable, rewarder); greedyPolicy._actions[state.DollarsInHand] = bestAction.Stake; } return(greedyPolicy); }