private static GamblersWorldAction FindBestAction( GamblersWorld world, GamblersWorldState state, GamblersValueTable valueTable, IGamblersWorldRewarder rewarder) { var maxActionValue = double.MinValue; var maxAction = new GamblersWorldAction(0); foreach (var action in world.AvailableActions(state)) { var actionValue = 0.0; foreach (var(nextState, pNextState) in world.PossibleStates(state, action)) { var nextStateValue = valueTable.Value(nextState); var reward = rewarder.Reward(state, nextState, action); actionValue += pNextState * (reward + nextStateValue); } if (actionValue > maxActionValue) { maxActionValue = actionValue; maxAction = action; } } return(maxAction); }
private static void PrintPolicyActions( GamblersWorld world, IDeterministicPolicy <GamblersWorldState, GamblersWorldAction> policy) { foreach (var state in world.AllStates()) { Console.WriteLine($"{state}: {policy.Action(state)}"); } }
private static void PrintAllValues( GamblersWorld world, ValueTable <GamblersWorldState, GamblersWorldAction> values) { foreach (var state in world.AllStates()) { Console.WriteLine($"{state}: {values.Value(state)}"); } }
public static GreedyGamblersPolicy Create( GamblersWorld world, GamblersValueTable valueTable, IGamblersWorldRewarder rewarder) { var greedyPolicy = new GreedyGamblersPolicy(world); foreach (var state in world.AllStates()) { var bestAction = FindBestAction(world, state, valueTable, rewarder); greedyPolicy._actions[state.DollarsInHand] = bestAction.Stake; } return(greedyPolicy); }
private static void UseDpToFindOptimalPolicy() { const double probabilityOfHeads = 0.4; const int dollarsToWin = 100; const int evaluationSweepsPerPolicyUpdate = 1; var world = new GamblersWorld(probabilityOfHeads, dollarsToWin); var rewarder = new GamblersWorldRewarder(world); Console.WriteLine("Policy optimiser:"); var(policy, values) = DpPolicyOptimiser .FindOptimalPolicy(world, rewarder, evaluationSweepsPerPolicyUpdate); Console.WriteLine("Optimal policy values:"); PrintAllValues(world, values); Console.WriteLine("Optimal policy stakes:"); PrintPolicyActions(world, policy); }
private static void PlayGamesWithPolicies() { var probabilityOfHeads = 0.2; int dollarsToWin = 100; var world = new GamblersWorld(probabilityOfHeads, dollarsToWin); // Console.WriteLine("random policy:"); // PlayGamesWithPolicy(world, new UniformRandomGamblersPolicy()); // Console.WriteLine("random policy:"); // PlayGamesWithPolicy(world, new AlwaysStake1DollarPolicy()); Console.WriteLine("always stake max policy:"); PlayGamesWithPolicy(world, new AlwaysStakeMaxPolicy(world)); // var (optimalPolicy, _) = FindOptimalPolicy(world, new GamblersWorldRewarder(world)); // Console.WriteLine("optimal policy:"); // PlayGamesWithPolicy(world, optimalPolicy); }
private static void EvaluatePolicyValues() { const double probabilityOfHeads = 0.4; const int dollarsToWin = 100; var world = new GamblersWorld(probabilityOfHeads, dollarsToWin); var rewarder = new GamblersWorldRewarder(world); Console.WriteLine("Optimal policy:"); var(policy, values) = DpPolicyOptimiser.FindOptimalPolicy(world, rewarder); var evalValues = new ValueTable <GamblersWorldState, GamblersWorldAction>(world); evalValues.Evaluate(policy, rewarder); PrintPolicyActions(world, policy); PrintAllValues(world, values); // Console.WriteLine("Always stake max policy:"); // var policy = new AlwaysStakeMaxPolicy(world); // PrintPolicyActionsAndValues(world, policy, rewarder); }
private static void PlayGamesWithPolicy(GamblersWorld world, IGamblersPolicy policy) { var player = new GamblersProblemPlayer(world, policy); player.Play(); }
public GamblersValueTable(GamblersWorld world) { _world = world; _values = new double[_world.AllStates().Count()]; _values[_world.DollarsToWin] = 1.0; }
public GamblersProblemPlayer(GamblersWorld world, IGamblersPolicy policy) { _world = world; _policy = policy; }
public GamblersWorldRewarder(GamblersWorld world) { _world = world; }
public AlwaysStakeMaxPolicy(GamblersWorld world) { _world = world; }
private GreedyGamblersPolicy(GamblersWorld world) { _actions = new int[world.AllStates().Count()]; }