示例#1
0
        virtual protected Reward Playout(IWorldModel initialPlayoutState)
        {
            IWorldModel prevState = initialPlayoutState.GenerateChildWorldModel();

            CurrentDepth = 0;
            //Perform n playouts for each state [to deal with stochastic nature]
            while (!prevState.IsTerminal() && CurrentDepth < MaxPlayoutDepthAllowed)
            {
                GOB.Action[] actions      = prevState.GetExecutableActions();
                int          randomAction = RandomGenerator.Next(actions.Length);
                prevState = StochasticPlayout(actions[randomAction], prevState, MaxPlayoutSimulations);
                prevState.CalculateNextPlayer();
                CurrentDepth++;
            }
            Reward reward = new Reward(prevState, prevState.GetNextPlayer());

            return(reward);
        }
示例#2
0
        protected override Reward Playout(IWorldModel initialPlayoutState)
        {
            IWorldModel state = initialPlayoutState.GenerateChildWorldModel();

            CurrentDepth = 0;
            while (!state.IsTerminal() && CurrentDepth < MaxPlayoutDepthAllowed)
            {
                List <KeyValuePair <int, GOB.Action> > actions = new List <KeyValuePair <int, GOB.Action> >();
                foreach (GOB.Action action in state.GetExecutableActions())
                {
                    actions.Add(new KeyValuePair <int, GOB.Action>((int)action.GetHValue(state), action));
                }
                actions.Sort(
                    delegate(KeyValuePair <int, GOB.Action> p1, KeyValuePair <int, GOB.Action> p2)
                {
                    return(p1.Key.CompareTo(p2.Key));
                }
                    );
                if (actions.Count == 0)
                {
                    break;
                }

                int randomValue = this.RandomGenerator.Next((int)actions[actions.Count - 1].Key);

                foreach (KeyValuePair <int, GOB.Action> pair in actions)
                {
                    if (pair.Key > randomValue)
                    {
                        state = StochasticPlayout(pair.Value, state, MaxPlayoutSimulations);
                        pair.Value.ApplyActionEffects(state);
                        break;
                    }
                }
                state.CalculateNextPlayer();
                CurrentDepth++;
            }

            Reward reward = new Reward(state, state.GetNextPlayer());

            return(reward);
        }