示例#1
0
        public static void Run()
        {
            var world        = new GridWorld();
            var randomPolicy = new UniformRandomGridWorldPolicy();
            var rewarder     = new NegativeAtNonTerminalStatesGridWorldRewarder();

            var values = new GridWorldValueTable(world);

            // manually iterate a couple of times - optimal policy is greedy wrt
            // initial random policy values

            values.Evaluate(randomPolicy, rewarder);
            values.Print();

            var greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder);

            values.Evaluate(greedyPolicy, rewarder);
            values.Print();

            greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder);

            values.Evaluate(greedyPolicy, rewarder);
            values.Print();

            greedyPolicy.Print();
        }
示例#2
0
        public static void Run()
        {
            var world    = new GridWorld();
            var policy   = new UniformRandomGridWorldPolicy();
            var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder();

            var gridValues = new GridWorldValueTable(world);

            gridValues.Evaluate(policy, rewarder);
            gridValues.Print();
        }
示例#3
0
        public static void Evaluates_to_same_values_as_gridworld_value_table()
        {
            var gridWorld = new GridWorld();
            var rewarder  = new NegativeAtNonTerminalStatesGridWorldRewarder();
            var policy    = new UniformRandomGridWorldPolicy();

            var gridValues    = new GridWorldValueTable(gridWorld);
            var genericValues = new ValueTable <GridWorldState, GridWorldAction>(gridWorld);

            gridValues.Evaluate(policy, rewarder);
            genericValues.Evaluate(policy, rewarder);

            Assert.That(() => AllValuesAreEqual(gridWorld, genericValues, gridValues));
        }