コード例 #1
0
        private static GamblersWorldAction FindBestAction(
            GamblersWorld world,
            GamblersWorldState state,
            GamblersValueTable valueTable,
            IGamblersWorldRewarder rewarder)
        {
            var maxActionValue = double.MinValue;
            var maxAction      = new GamblersWorldAction(0);

            foreach (var action in world.AvailableActions(state))
            {
                var actionValue = 0.0;

                foreach (var(nextState, pNextState) in world.PossibleStates(state, action))
                {
                    var nextStateValue = valueTable.Value(nextState);
                    var reward         = rewarder.Reward(state, nextState, action);

                    actionValue += pNextState * (reward + nextStateValue);
                }

                if (actionValue > maxActionValue)
                {
                    maxActionValue = actionValue;
                    maxAction      = action;
                }
            }

            return(maxAction);
        }
コード例 #2
0
        public static GreedyGamblersPolicy Create(
            GamblersWorld world,
            GamblersValueTable valueTable,
            IGamblersWorldRewarder rewarder)
        {
            var greedyPolicy = new GreedyGamblersPolicy(world);

            foreach (var state in world.AllStates())
            {
                var bestAction = FindBestAction(world, state, valueTable, rewarder);
                greedyPolicy._actions[state.DollarsInHand] = bestAction.Stake;
            }

            return(greedyPolicy);
        }