public static void Run() { var world = new GridWorld(); var randomPolicy = new UniformRandomGridWorldPolicy(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var values = new GridWorldValueTable(world); // manually iterate a couple of times - optimal policy is greedy wrt // initial random policy values values.Evaluate(randomPolicy, rewarder); values.Print(); var greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder); values.Evaluate(greedyPolicy, rewarder); values.Print(); greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder); values.Evaluate(greedyPolicy, rewarder); values.Print(); greedyPolicy.Print(); }
public static void Run() { var world = new GridWorld(); var policy = new UniformRandomGridWorldPolicy(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var gridValues = new GridWorldValueTable(world); gridValues.Evaluate(policy, rewarder); gridValues.Print(); }