Esempio n. 1
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel();

            int depthCount = 0;

            while (!worldModel.IsTerminal() && depthCount <= MaxPlayoutDepth)
            {
                Action[] actions      = worldModel.GetExecutableActions();
                Action   biasedAction = actions.First();
                foreach (Action action in actions)
                {
                    if (action.GetHValue(worldModel) < biasedAction.GetHValue(worldModel))
                    {
                        biasedAction = action;
                    }
                }
                biasedAction.ApplyActionEffects(worldModel);
                depthCount++;
            }
            if (depthCount > MaxPlayoutDepthReached)
            {
                base.MaxPlayoutDepthReached = depthCount;
            }

            return(new Reward()
            {
                Value = GetWorldModelScore(worldModel),
                PlayerID = initialPlayoutState.GetNextPlayer(),
            });
        }
Esempio n. 2
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            Action[] actions = initialPlayoutState.GetExecutableActions();
            float    score   = 0;

            foreach (Action action in actions)
            {
                WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel();
                int        depthCount = 0;
                while (!worldModel.IsTerminal())
                {
                    Action randomAction = actions[RandomGenerator.Next(actions.Length)];
                    randomAction.ApplyActionEffects(worldModel);
                    depthCount++;
                }
                if (depthCount > MaxPlayoutDepthReached)
                {
                    MaxPlayoutDepthReached = depthCount;
                }

                score += worldModel.GetScore();
            }

            return(new Reward()
            {
                Value = score / actions.Length,
                PlayerID = initialPlayoutState.GetNextPlayer(),
            });
        }
Esempio n. 3
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            //WorldModel newState = initialPlayoutState.GenerateChildWorldModel();
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT)
            {
                Action[]     possibleActions = newState.GetExecutableActions();
                List <float> heuristics      = new List <float>();
                for (int i = 0; i < possibleActions.Length; i++)
                {
                    heuristics.Add(possibleActions[i].GetHValue(newState));
                }

                int    index      = chooseAction(possibleActions, heuristics);
                Action bestAction = possibleActions[index];
                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.PlayerID = newState.GetNextPlayer();
                reward.Value    = heuristics[index];
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
Esempio n. 4
0
        protected MCTSNode Expand(MCTSNode parent, Action action)
        {
            WorldModel newState = parent.State.GenerateChildWorldModel();

            action.ApplyActionEffects(newState);

            MCTSNode child = new MCTSNode(newState)
            {
                Action   = action,
                Parent   = parent,
                PlayerID = newState.GetNextPlayer(),
            };

            parent.ChildNodes.Add(child);
            return(child);
        }
Esempio n. 5
0
        protected int chooseAction(Action[] possibleActions, List <float> heuristics)
        {
            Action bestAction   = null;
            float  maxHeuristic = -1.0f;
            int    best         = 0;

            for (int i = 0; i < possibleActions.Length; i++)
            {
                if (heuristics[i] > maxHeuristic)
                {
                    maxHeuristic = heuristics[i];
                    bestAction   = possibleActions[i];
                    best         = i;
                }
            }
            return(best);
        }
Esempio n. 6
0
        protected MCTSNode Expand(MCTSNode parent, Action action)
        {
            WorldModel newModel = parent.State.GenerateChildWorldModel();

            action.ApplyActionEffects(newModel);
            newModel.CalculateNextPlayer();
            MCTSNode node = new MCTSNode(newModel)
            {
                Action   = action,
                Parent   = parent,
                PlayerID = newModel.GetNextPlayer(),
                Q        = 0,
                N        = 0
            };

            parent.ChildNodes.Add(node);
            return(node);
        }
Esempio n. 7
0
        // Only worth running multiple playouts in case action is Sword Attack
        protected virtual WorldModel StochasticPlayout(Action action, WorldModel currState)
        {
            if (action.Name.Equals("SwordAttack") && this.MaxPlayouts > 0)
            {
                WorldModel[] tests = new WorldModel[this.MaxPlayouts];
                for (int i = 0; i < this.MaxPlayouts; i++)
                {
                    tests[i] = currState.GenerateChildWorldModel();
                    action.ApplyActionEffects(tests[i]);
                }

                currState = AverageState(tests, (SwordAttack)action);
            }
            else
            {
                currState = currState.GenerateChildWorldModel();
                action.ApplyActionEffects(currState);
            }

            return(currState);
        }
Esempio n. 8
0
 protected MCTSNode Expand(MCTSNode parent, Action action)
 {
     //TODO: implement
     throw new NotImplementedException();
 }