public static void Evaluates_to_same_values_as_gridworld_value_table() { var gridWorld = new GridWorld(); var rewarder = new NegativeAtNonTerminalStatesGridWorldRewarder(); var policy = new UniformRandomGridWorldPolicy(); var gridValues = new GridWorldValueTable(gridWorld); var genericValues = new ValueTable <GridWorldState, GridWorldAction>(gridWorld); gridValues.Evaluate(policy, rewarder); genericValues.Evaluate(policy, rewarder); Assert.That(() => AllValuesAreEqual(gridWorld, genericValues, gridValues)); }
public void Evaluates_to_same_values_as_gamblers_value_table() { const double probabilityOfHeads = 0.4; const int dollarsToWin = 100; var gamblersWorld = new GamblersWorld(probabilityOfHeads, dollarsToWin); var rewarder = new GamblersWorldRewarder(gamblersWorld); var policy = new UniformRandomGamblersPolicy(); var gamblersValues = new GamblersValueTable(gamblersWorld); var genericValues = new ValueTable <GamblersWorldState, GamblersWorldAction>(gamblersWorld); gamblersValues.Evaluate(policy, rewarder); genericValues.Evaluate(policy, rewarder); Assert.That(() => AllValuesAreEqual(gamblersWorld, genericValues, gamblersValues)); }
private static void EvaluatePolicyValues() { const double probabilityOfHeads = 0.4; const int dollarsToWin = 100; var world = new GamblersWorld(probabilityOfHeads, dollarsToWin); var rewarder = new GamblersWorldRewarder(world); Console.WriteLine("Optimal policy:"); var(policy, values) = DpPolicyOptimiser.FindOptimalPolicy(world, rewarder); var evalValues = new ValueTable <GamblersWorldState, GamblersWorldAction>(world); evalValues.Evaluate(policy, rewarder); PrintPolicyActions(world, policy); PrintAllValues(world, values); // Console.WriteLine("Always stake max policy:"); // var policy = new AlwaysStakeMaxPolicy(world); // PrintPolicyActionsAndValues(world, policy, rewarder); }