Example #1
0
        public static GreedyPolicy <TState, TAction> Create(
            IProblem <TState, TAction> problem,
            ValueTable <TState, TAction> valueTable,
            IRewarder <TState, TAction> rewarder)
        {
            var greedyPolicy = new GreedyPolicy <TState, TAction>(problem);

            foreach (var state in problem.AllStates())
            {
                var bestAction = FindBestAction(problem, state, valueTable, rewarder);
                greedyPolicy._actions[state] = bestAction;
            }

            return(greedyPolicy);
        }
Example #2
0
        FindOptimalPolicy <TState, TAction>(
            IProblem <TState, TAction> problem,
            IRewarder <TState, TAction> rewarder,
            int evaluationSweepsPerPolicyUpdate = 1)
            where TState : struct
            where TAction : struct
        {
            const int maxIterations = 100;
            var       values        = new ValueTable <TState, TAction>(problem);
            IPolicy <TState, TAction> initialPolicy = new UniformRandomPolicy <TState, TAction>(problem);

            values.Evaluate(initialPolicy, rewarder, evaluationSweepsPerPolicyUpdate);
            var greedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder);

            for (var i = 0; i < maxIterations; i++)
            {
                values.Evaluate(greedyPolicy, rewarder, evaluationSweepsPerPolicyUpdate);

                var newGreedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder);

                if (newGreedyPolicy.HasSameActionsAs(greedyPolicy))
                {
                    Console.WriteLine($"Found optimal policy at iteration {i}");
                    break;
                }

                greedyPolicy = newGreedyPolicy;

                if (i == maxIterations - 1)
                {
                    Console.WriteLine($"Policy iteration did not converge by iteration {i}");
                }
            }

            return(greedyPolicy, values);
        }