public static void Run() { var world = new GridWorld(); var randomPolicy = new UniformRandomGridWorldPolicy(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var values = new GridWorldValueTable(world); // manually iterate a couple of times - optimal policy is greedy wrt // initial random policy values values.Evaluate(randomPolicy, rewarder); values.Print(); var greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder); values.Evaluate(greedyPolicy, rewarder); values.Print(); greedyPolicy = GreedyGridWorldPolicy.Create(world, values, rewarder); values.Evaluate(greedyPolicy, rewarder); values.Print(); greedyPolicy.Print(); }
public static void Run() { var world = new GridWorld(); var policy = new UniformRandomGridWorldPolicy(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var gridValues = new GridWorldValueTable(world); gridValues.Evaluate(policy, rewarder); gridValues.Print(); }
public static void Evaluates_to_same_values_as_gridworld_value_table() { var gridWorld = new GridWorld(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var policy = new UniformRandomGridWorldPolicy(); var gridValues = new GridWorldValueTable(gridWorld); var genericValues = new ValueTable <GridWorldState, GridWorldAction>(gridWorld); gridValues.Evaluate(policy, rewarder); genericValues.Evaluate(policy, rewarder); Assert.That(() => AllValuesAreEqual(gridWorld, genericValues, gridValues)); }
private static bool AllValuesAreEqual( IProblem <GridWorldState, GridWorldAction> problem, ValueTable <GridWorldState, GridWorldAction> genericValues, GridWorldValueTable gridValues) { foreach (var state in problem.AllStates()) { var genericValue = genericValues.Value(state); var gridValue = gridValues.Value(state); Console.WriteLine($"generic: {genericValue}, grid {gridValue}"); // Assert.AreEqual(genericValue, gridValue, 0.01, // $"values not equal for state {state}. " + // $"generic: {genericValue}, grid: {gridValue}"); } return(true); }