C# (CSharp) dp.Examples.GamblersProblem GamblersWorld示例

编程语言: C# (CSharp)

命名空间/包名称: dp.Examples.GamblersProblem

类/类型: GamblersWorld

hotexamples.com的示例: 13

C# (CSharp) dp.Examples.GamblersProblem GamblersWorld - 已找到13个示例。这些是从开源项目中提取的最受好评的dp.Examples.GamblersProblem.GamblersWorld现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

AllStates(7)

AvailableActions(3)

IsTerminal(2)

PossibleStates(2)

IsWin(1)

NextState(1)

示例#1

显示文件

        private static GamblersWorldAction FindBestAction(
            GamblersWorld world,
            GamblersWorldState state,
            GamblersValueTable valueTable,
            IGamblersWorldRewarder rewarder)
        {
            var maxActionValue = double.MinValue;
            var maxAction      = new GamblersWorldAction(0);

            foreach (var action in world.AvailableActions(state))
            {
                var actionValue = 0.0;

                foreach (var(nextState, pNextState) in world.PossibleStates(state, action))
                {
                    var nextStateValue = valueTable.Value(nextState);
                    var reward         = rewarder.Reward(state, nextState, action);

                    actionValue += pNextState * (reward + nextStateValue);
                }

                if (actionValue > maxActionValue)
                {
                    maxActionValue = actionValue;
                    maxAction      = action;
                }
            }

            return(maxAction);
        }

示例#2

显示文件

 private static void PrintPolicyActions(
     GamblersWorld world,
     IDeterministicPolicy <GamblersWorldState, GamblersWorldAction> policy)
 {
     foreach (var state in world.AllStates())
     {
         Console.WriteLine($"{state}: {policy.Action(state)}");
     }
 }

示例#3

显示文件

 private static void PrintAllValues(
     GamblersWorld world,
     ValueTable <GamblersWorldState, GamblersWorldAction> values)
 {
     foreach (var state in world.AllStates())
     {
         Console.WriteLine($"{state}: {values.Value(state)}");
     }
 }

示例#4

显示文件

        public static GreedyGamblersPolicy Create(
            GamblersWorld world,
            GamblersValueTable valueTable,
            IGamblersWorldRewarder rewarder)
        {
            var greedyPolicy = new GreedyGamblersPolicy(world);

            foreach (var state in world.AllStates())
            {
                var bestAction = FindBestAction(world, state, valueTable, rewarder);
                greedyPolicy._actions[state.DollarsInHand] = bestAction.Stake;
            }

            return(greedyPolicy);
        }

示例#5

显示文件

        private static void UseDpToFindOptimalPolicy()
        {
            const double probabilityOfHeads = 0.4;
            const int    dollarsToWin       = 100;
            const int    evaluationSweepsPerPolicyUpdate = 1;

            var world    = new GamblersWorld(probabilityOfHeads, dollarsToWin);
            var rewarder = new GamblersWorldRewarder(world);

            Console.WriteLine("Policy optimiser:");
            var(policy, values) = DpPolicyOptimiser
                                  .FindOptimalPolicy(world, rewarder, evaluationSweepsPerPolicyUpdate);
            Console.WriteLine("Optimal policy values:");
            PrintAllValues(world, values);
            Console.WriteLine("Optimal policy stakes:");
            PrintPolicyActions(world, policy);
        }

示例#6

显示文件

        private static void PlayGamesWithPolicies()
        {
            var probabilityOfHeads = 0.2;
            int dollarsToWin       = 100;

            var world = new GamblersWorld(probabilityOfHeads, dollarsToWin);

            // Console.WriteLine("random policy:");
            // PlayGamesWithPolicy(world, new UniformRandomGamblersPolicy());

            // Console.WriteLine("random policy:");
            // PlayGamesWithPolicy(world, new AlwaysStake1DollarPolicy());

            Console.WriteLine("always stake max policy:");
            PlayGamesWithPolicy(world, new AlwaysStakeMaxPolicy(world));

            // var (optimalPolicy, _) = FindOptimalPolicy(world, new GamblersWorldRewarder(world));
            // Console.WriteLine("optimal policy:");
            // PlayGamesWithPolicy(world, optimalPolicy);
        }

示例#7

显示文件

        private static void EvaluatePolicyValues()
        {
            const double probabilityOfHeads = 0.4;
            const int    dollarsToWin       = 100;

            var world    = new GamblersWorld(probabilityOfHeads, dollarsToWin);
            var rewarder = new GamblersWorldRewarder(world);

            Console.WriteLine("Optimal policy:");
            var(policy, values) = DpPolicyOptimiser.FindOptimalPolicy(world, rewarder);
            var evalValues = new ValueTable <GamblersWorldState, GamblersWorldAction>(world);

            evalValues.Evaluate(policy, rewarder);
            PrintPolicyActions(world, policy);
            PrintAllValues(world, values);

            // Console.WriteLine("Always stake max policy:");
            // var policy = new AlwaysStakeMaxPolicy(world);
            // PrintPolicyActionsAndValues(world, policy, rewarder);
        }

示例#8

显示文件

        private static void PlayGamesWithPolicy(GamblersWorld world, IGamblersPolicy policy)
        {
            var player = new GamblersProblemPlayer(world, policy);

            player.Play();
        }

示例#9

显示文件

 public GamblersValueTable(GamblersWorld world)
 {
     _world  = world;
     _values = new double[_world.AllStates().Count()];
     _values[_world.DollarsToWin] = 1.0;
 }

示例#10

显示文件

 public GamblersProblemPlayer(GamblersWorld world, IGamblersPolicy policy)
 {
     _world  = world;
     _policy = policy;
 }

示例#11

显示文件

 public GamblersWorldRewarder(GamblersWorld world)
 {
     _world = world;
 }

示例#12

显示文件

 public AlwaysStakeMaxPolicy(GamblersWorld world)
 {
     _world = world;
 }

示例#13

显示文件

 private GreedyGamblersPolicy(GamblersWorld world)
 {
     _actions = new int[world.AllStates().Count()];
 }