virtual protected Reward Playout(IWorldModel initialPlayoutState) { IWorldModel prevState = initialPlayoutState.GenerateChildWorldModel(); CurrentDepth = 0; //Perform n playouts for each state [to deal with stochastic nature] while (!prevState.IsTerminal() && CurrentDepth < MaxPlayoutDepthAllowed) { GOB.Action[] actions = prevState.GetExecutableActions(); int randomAction = RandomGenerator.Next(actions.Length); prevState = StochasticPlayout(actions[randomAction], prevState, MaxPlayoutSimulations); prevState.CalculateNextPlayer(); CurrentDepth++; } Reward reward = new Reward(prevState, prevState.GetNextPlayer()); return(reward); }
protected override Reward Playout(IWorldModel initialPlayoutState) { IWorldModel state = initialPlayoutState.GenerateChildWorldModel(); CurrentDepth = 0; while (!state.IsTerminal() && CurrentDepth < MaxPlayoutDepthAllowed) { List <KeyValuePair <int, GOB.Action> > actions = new List <KeyValuePair <int, GOB.Action> >(); foreach (GOB.Action action in state.GetExecutableActions()) { actions.Add(new KeyValuePair <int, GOB.Action>((int)action.GetHValue(state), action)); } actions.Sort( delegate(KeyValuePair <int, GOB.Action> p1, KeyValuePair <int, GOB.Action> p2) { return(p1.Key.CompareTo(p2.Key)); } ); if (actions.Count == 0) { break; } int randomValue = this.RandomGenerator.Next((int)actions[actions.Count - 1].Key); foreach (KeyValuePair <int, GOB.Action> pair in actions) { if (pair.Key > randomValue) { state = StochasticPlayout(pair.Value, state, MaxPlayoutSimulations); pair.Value.ApplyActionEffects(state); break; } } state.CalculateNextPlayer(); CurrentDepth++; } Reward reward = new Reward(state, state.GetNextPlayer()); return(reward); }