コード例 #1
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            /*
             * while s is nonterminal do
             *  chose a from Actions(s) uniformly at random
             *  s <- Result(s,a)
             *  return reward for state s
             */
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            ActionHistory.Clear();
            int currentDepth       = 0;
            Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();

                par.Left  = currentState.GetNextPlayer();
                par.Right = randomAction;
                ActionHistory.Add(par);
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
コード例 #2
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            //WorldModel newState = initialPlayoutState.GenerateChildWorldModel();
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT)
            {
                Action[]     possibleActions = newState.GetExecutableActions();
                List <float> heuristics      = new List <float>();
                for (int i = 0; i < possibleActions.Length; i++)
                {
                    heuristics.Add(possibleActions[i].GetHValue(newState));
                }

                int    index      = chooseAction(possibleActions, heuristics);
                Action bestAction = possibleActions[index];
                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.PlayerID = newState.GetNextPlayer();
                reward.Value    = heuristics[index];
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
コード例 #3
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            int        currentDepth = 0;

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextRandomAction(this.RandomGenerator);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }