public static void Run() { var world = new GridWorld(); var randomPolicy = new UniformRandomGridWorldPolicy(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var values = new GridWorldValueTable(world); // manually iterate a couple of times - optimal policy is greedy wrt // initial random policy values values.Evaluate(randomPolicy, rewarder); values.Print(); var greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder); values.Evaluate(greedyPolicy, rewarder); values.Print(); greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder); values.Evaluate(greedyPolicy, rewarder); values.Print(); greedyPolicy.Print(); }
public static void Run() { var world = new GridWorld(); var policy = new UniformRandomGridWorldPolicy(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var gridValues = new GridWorldValueTable(world); gridValues.Evaluate(policy, rewarder); gridValues.Print(); }
public static void Evaluates_to_same_values_as_gridworld_value_table() { var gridWorld = new GridWorld(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var policy = new UniformRandomGridWorldPolicy(); var gridValues = new GridWorldValueTable(gridWorld); var genericValues = new ValueTable <GridWorldState, GridWorldAction>(gridWorld); gridValues.Evaluate(policy, rewarder); genericValues.Evaluate(policy, rewarder); Assert.That(() => AllValuesAreEqual(gridWorld, genericValues, gridValues)); }