예제 #1
0
        private static void UseDpToFindOptimalPolicy()
        {
            const double probabilityOfHeads = 0.4;
            const int    dollarsToWin       = 100;
            const int    evaluationSweepsPerPolicyUpdate = 1;

            var world    = new GamblersWorld(probabilityOfHeads, dollarsToWin);
            var rewarder = new GamblersWorldRewarder(world);

            Console.WriteLine("Policy optimiser:");
            var(policy, values) = DpPolicyOptimiser
                                  .FindOptimalPolicy(world, rewarder, evaluationSweepsPerPolicyUpdate);
            Console.WriteLine("Optimal policy values:");
            PrintAllValues(world, values);
            Console.WriteLine("Optimal policy stakes:");
            PrintPolicyActions(world, policy);
        }
예제 #2
0
        private static void EvaluatePolicyValues()
        {
            const double probabilityOfHeads = 0.4;
            const int    dollarsToWin       = 100;

            var world    = new GamblersWorld(probabilityOfHeads, dollarsToWin);
            var rewarder = new GamblersWorldRewarder(world);

            Console.WriteLine("Optimal policy:");
            var(policy, values) = DpPolicyOptimiser.FindOptimalPolicy(world, rewarder);
            var evalValues = new ValueTable <GamblersWorldState, GamblersWorldAction>(world);

            evalValues.Evaluate(policy, rewarder);
            PrintPolicyActions(world, policy);
            PrintAllValues(world, values);

            // Console.WriteLine("Always stake max policy:");
            // var policy = new AlwaysStakeMaxPolicy(world);
            // PrintPolicyActionsAndValues(world, policy, rewarder);
        }