public static GreedyPolicy <TState, TAction> Create( IProblem <TState, TAction> problem, ValueTable <TState, TAction> valueTable, IRewarder <TState, TAction> rewarder) { var greedyPolicy = new GreedyPolicy <TState, TAction>(problem); foreach (var state in problem.AllStates()) { var bestAction = FindBestAction(problem, state, valueTable, rewarder); greedyPolicy._actions[state] = bestAction; } return(greedyPolicy); }
FindOptimalPolicy <TState, TAction>( IProblem <TState, TAction> problem, IRewarder <TState, TAction> rewarder, int evaluationSweepsPerPolicyUpdate = 1) where TState : struct where TAction : struct { const int maxIterations = 100; var values = new ValueTable <TState, TAction>(problem); IPolicy <TState, TAction> initialPolicy = new UniformRandomPolicy <TState, TAction>(problem); values.Evaluate(initialPolicy, rewarder, evaluationSweepsPerPolicyUpdate); var greedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder); for (var i = 0; i < maxIterations; i++) { values.Evaluate(greedyPolicy, rewarder, evaluationSweepsPerPolicyUpdate); var newGreedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder); if (newGreedyPolicy.HasSameActionsAs(greedyPolicy)) { Console.WriteLine($"Found optimal policy at iteration {i}"); break; } greedyPolicy = newGreedyPolicy; if (i == maxIterations - 1) { Console.WriteLine($"Policy iteration did not converge by iteration {i}"); } } return(greedyPolicy, values); }