Пример #1
0
        public static void Evaluates_to_same_values_as_gridworld_value_table()
        {
            var gridWorld = new GridWorld();
            var rewarder  = new NegativeAtNonTerminalStatesGridWorldRewarder();
            var policy    = new UniformRandomGridWorldPolicy();

            var gridValues    = new GridWorldValueTable(gridWorld);
            var genericValues = new ValueTable <GridWorldState, GridWorldAction>(gridWorld);

            gridValues.Evaluate(policy, rewarder);
            genericValues.Evaluate(policy, rewarder);

            Assert.That(() => AllValuesAreEqual(gridWorld, genericValues, gridValues));
        }
Пример #2
0
        public void Evaluates_to_same_values_as_gamblers_value_table()
        {
            const double probabilityOfHeads = 0.4;
            const int    dollarsToWin       = 100;

            var gamblersWorld = new GamblersWorld(probabilityOfHeads, dollarsToWin);
            var rewarder      = new GamblersWorldRewarder(gamblersWorld);
            var policy        = new UniformRandomGamblersPolicy();

            var gamblersValues = new GamblersValueTable(gamblersWorld);
            var genericValues  = new ValueTable <GamblersWorldState, GamblersWorldAction>(gamblersWorld);

            gamblersValues.Evaluate(policy, rewarder);
            genericValues.Evaluate(policy, rewarder);

            Assert.That(() => AllValuesAreEqual(gamblersWorld, genericValues, gamblersValues));
        }
Пример #3
0
        private static void EvaluatePolicyValues()
        {
            const double probabilityOfHeads = 0.4;
            const int    dollarsToWin       = 100;

            var world    = new GamblersWorld(probabilityOfHeads, dollarsToWin);
            var rewarder = new GamblersWorldRewarder(world);

            Console.WriteLine("Optimal policy:");
            var(policy, values) = DpPolicyOptimiser.FindOptimalPolicy(world, rewarder);
            var evalValues = new ValueTable <GamblersWorldState, GamblersWorldAction>(world);

            evalValues.Evaluate(policy, rewarder);
            PrintPolicyActions(world, policy);
            PrintAllValues(world, values);

            // Console.WriteLine("Always stake max policy:");
            // var policy = new AlwaysStakeMaxPolicy(world);
            // PrintPolicyActionsAndValues(world, policy, rewarder);
        }