コード例 #1
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            /*
             * while s is nonterminal do
             *  chose a from Actions(s) uniformly at random
             *  s <- Result(s,a)
             *  return reward for state s
             */
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            ActionHistory.Clear();
            int currentDepth       = 0;
            Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();

                par.Left  = currentState.GetNextPlayer();
                par.Right = randomAction;
                ActionHistory.Add(par);
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
コード例 #2
0
ファイル: MCTS.cs プロジェクト: pmsmm/Terceiro-Projeto-de-IAJ
        virtual protected Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward = new Reward();

            while (!newState.IsTerminal())
            {
                GOB.Action[] possibleActions = newState.GetExecutableActions();

                int        actionIndex  = this.RandomGenerator.Next(0, possibleActions.Length);
                GOB.Action chosenAction = possibleActions[actionIndex];
                chosenAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = newState.GetScore();
                reward.PlayerID = 0;
            }
            return(reward);
        }
コード例 #3
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            //TODO: implement
            FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //escolher entre MCTS normal e MCTS bias
                //ChooseRandom(state).ApplyActionEffects(state);
                ChooseBias(state).ApplyActionEffects(state);
                state.CalculateNextPlayer();
                this.MaxPlayoutDepthReached++;
            }

            Reward reward = new Reward();

            reward.Value = state.GetScore();
            return(reward);
        }
コード例 #4
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            int        currentDepth = 0;

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextRandomAction(this.RandomGenerator);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }