コード例 #1
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            /*
             * while s is nonterminal do
             *  chose a from Actions(s) uniformly at random
             *  s <- Result(s,a)
             *  return reward for state s
             */
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            ActionHistory.Clear();
            int currentDepth       = 0;
            Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();

                par.Left  = currentState.GetNextPlayer();
                par.Right = randomAction;
                ActionHistory.Add(par);
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }