Пример #1
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            /*
             * while s is nonterminal do
             *  chose a from Actions(s) uniformly at random
             *  s <- Result(s,a)
             *  return reward for state s
             */
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            ActionHistory.Clear();
            int currentDepth       = 0;
            Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();

                par.Left  = currentState.GetNextPlayer();
                par.Right = randomAction;
                ActionHistory.Add(par);
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
Пример #2
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            //WorldModel newState = initialPlayoutState.GenerateChildWorldModel();
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT)
            {
                Action[]     possibleActions = newState.GetExecutableActions();
                List <float> heuristics      = new List <float>();
                for (int i = 0; i < possibleActions.Length; i++)
                {
                    heuristics.Add(possibleActions[i].GetHValue(newState));
                }

                int    index      = chooseAction(possibleActions, heuristics);
                Action bestAction = possibleActions[index];
                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.PlayerID = newState.GetNextPlayer();
                reward.Value    = heuristics[index];
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
Пример #3
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel state = new FutureStateWorldModel(initialPlayoutState.GenerateChildWorldModel());

            while (!state.IsTerminal())
            {
                GOB.Action[] actions = state.GetExecutableActions();
                actions[RandomGenerator.Next() % actions.Length].ApplyActionEffects(state);
                this.MaxPlayoutDepthReached++;
            }
            return(new Reward());
        }
Пример #4
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT)))
            {
                GOB.Action[]  possibleActions = newState.GetExecutableActions();
                List <double> results         = new List <double>();
                float         chosenScore     = 0f;
                int           i;
                for (i = 0; i < possibleActions.Length; i++)
                {
                    //results.Add(Heuristic(newState, possibleActions[i]));
                    results.Add(possibleActions[i].GetUtility());
                }

                GOB.Action    bestAction         = null;
                List <double> exponentialResults = results.Select(Math.Exp).ToList();
                double        sumExponentials    = exponentialResults.Sum();
                List <double> softmax            = exponentialResults.Select(j => j / sumExponentials).ToList();

                double prob           = this.RandomGenerator.NextDouble();
                double probabilitySum = 0;
                for (i = 0; i < possibleActions.Length; i++)
                {
                    probabilitySum += softmax[i];
                    if (probabilitySum >= prob)
                    {
                        bestAction  = possibleActions[i];
                        chosenScore = (float)softmax[i];
                        break;
                    }
                }

                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = chosenScore;
                reward.PlayerID = 0;
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
Пример #5
0
        virtual protected Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward = new Reward();

            while (!newState.IsTerminal())
            {
                GOB.Action[] possibleActions = newState.GetExecutableActions();

                int        actionIndex  = this.RandomGenerator.Next(0, possibleActions.Length);
                GOB.Action chosenAction = possibleActions[actionIndex];
                chosenAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = newState.GetScore();
                reward.PlayerID = 0;
            }
            return(reward);
        }
Пример #6
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            //TODO: implement
            FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //escolher entre MCTS normal e MCTS bias
                //ChooseRandom(state).ApplyActionEffects(state);
                ChooseBias(state).ApplyActionEffects(state);
                state.CalculateNextPlayer();
                this.MaxPlayoutDepthReached++;
            }

            Reward reward = new Reward();

            reward.Value = state.GetScore();
            return(reward);
        }
Пример #7
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            int        currentDepth = 0;

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextRandomAction(this.RandomGenerator);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }