Esempio n. 1
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel();

            GOB.Action[] actions      = childWorldModel.GetExecutableActions();
            int          DepthReached = 0;

            while (!childWorldModel.IsTerminal())
            {
                if (actions.Length > 0)
                {
                    int        index = this.RandomGenerator.Next(actions.Length);
                    GOB.Action a     = actions[index];
                    //GOB.Action a = actions[6];
                    a.ApplyActionEffects(childWorldModel);
                    childWorldModel.CalculateNextPlayer();
                }
                DepthReached++;
            }

            if (DepthReached > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = DepthReached;
            }

            Reward reward = new Reward
            {
                PlayerID = childWorldModel.GetNextPlayer(),
                Value    = childWorldModel.GetScore()
            };

            return(reward);
        }
Esempio n. 2
0
        private MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            MCTSNode childNode = new MCTSNode(parent.State);

            childNode.Parent = parent;
            childNode.Action = action;
            action.ApplyActionEffects(childNode.State);
            parent.ChildNodes.Add(childNode);
            return(childNode);
        }
Esempio n. 3
0
        private MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            WorldModel state = parent.State.GenerateChildWorldModel();
            MCTSNode   child = new MCTSNode(state);

            child.Parent = parent;
            action.ApplyActionEffects(state);
            child.State.CalculateNextPlayer();
            child.Action = action;
            parent.ChildNodes.Add(child);

            return(child);
        }
Esempio n. 4
0
        protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            WorldModel currentState = parent.State.GenerateChildWorldModel();

            action.ApplyActionEffects(currentState);
            MCTSNode newChild = new MCTSNode(currentState)
            {
                Parent = parent,
                Action = action
            };

            parent.ChildNodes.Add(newChild);

            return(newChild);
        }
Esempio n. 5
0
        virtual protected MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)parent.State);

            action.ApplyActionEffects(newState);
            newState.CalculateNextPlayer();
            MCTSNode newNode = new MCTSNode(newState);

            newNode.Parent = parent;
            newNode.Q      = 0;
            newNode.N      = 0;
            newNode.Action = action;
            parent.ChildNodes.Add(newNode);
            return(newNode);
        }
Esempio n. 6
0
        protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            FutureStateWorldModel newModel = parent.State.GenerateChildWorldModel() as FutureStateWorldModel;

            action.ApplyActionEffects(newModel);
            newModel.CalculateNextPlayer();

            MCTSNode childNode = new MCTSNode(newModel);

            childNode.Action = action;
            childNode.Parent = parent;

            parent.ChildNodes.Add(childNode);

            return(childNode);
        }
Esempio n. 7
0
        private MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            WorldModel worldmodel = CurrentStateWorldModel.GenerateChildWorldModel();

            action.ApplyActionEffects(worldmodel);
            MCTSNode n = new MCTSNode(worldmodel)
            {
                Action = action,
                Parent = parent,
                N      = 0,
                Q      = 0
            };

            parent.ChildNodes.Add(n);
            return(n);
        }
Esempio n. 8
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT)))
            {
                GOB.Action[]  possibleActions = newState.GetExecutableActions();
                List <double> results         = new List <double>();
                float         chosenScore     = 0f;
                int           i;
                for (i = 0; i < possibleActions.Length; i++)
                {
                    //results.Add(Heuristic(newState, possibleActions[i]));
                    results.Add(possibleActions[i].GetUtility());
                }

                GOB.Action    bestAction         = null;
                List <double> exponentialResults = results.Select(Math.Exp).ToList();
                double        sumExponentials    = exponentialResults.Sum();
                List <double> softmax            = exponentialResults.Select(j => j / sumExponentials).ToList();

                double prob           = this.RandomGenerator.NextDouble();
                double probabilitySum = 0;
                for (i = 0; i < possibleActions.Length; i++)
                {
                    probabilitySum += softmax[i];
                    if (probabilitySum >= prob)
                    {
                        bestAction  = possibleActions[i];
                        chosenScore = (float)softmax[i];
                        break;
                    }
                }

                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = chosenScore;
                reward.PlayerID = 0;
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
Esempio n. 9
0
        protected override MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            WorldModel currentState = parent.State.GenerateChildWorldModel();

            action.ApplyActionEffects(currentState);
            currentState.CalculateNextPlayer();

            MCTSNode newChild = new MCTSNode(currentState)
            {
                Parent = parent,
                Action = action
            };

            parent.ChildNodes.Add(newChild);

            return(newChild);
        }
Esempio n. 10
0
        private MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            WorldModel state  = parent.State.GenerateChildWorldModel();
            MCTSNode   expand = new MCTSNode(state);

            expand.Parent = parent;
            action.ApplyActionEffects(state);
            expand.Action = action;
            state.CalculateNextPlayer();
            expand.PlayerID = state.GetNextPlayer();
            parent.ChildNodes.Add(expand);

            expand.N = 0;
            expand.Q = 0;

            return(expand);
        }
Esempio n. 11
0
        private MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            var childWorldModel = parent.State.GenerateChildWorldModel();

            action.ApplyActionEffects(childWorldModel);
            childWorldModel.CalculateNextPlayer();
            var childNode = new MCTSNode(childWorldModel)
            {
                Action   = action,
                PlayerID = parent.PlayerID,
                Parent   = parent
            };

            parent.ChildNodes.Add(childNode);

            return(childNode);
        }
Esempio n. 12
0
        virtual protected Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward = new Reward();

            while (!newState.IsTerminal())
            {
                GOB.Action[] possibleActions = newState.GetExecutableActions();

                int        actionIndex  = this.RandomGenerator.Next(0, possibleActions.Length);
                GOB.Action chosenAction = possibleActions[actionIndex];
                chosenAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = newState.GetScore();
                reward.PlayerID = 0;
            }
            return(reward);
        }
Esempio n. 13
0
        private MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            //TODO: implement
            //WorldModel worldmodel = CurrentStateWorldModel.GenerateChildWorldModel();
            WorldModel worldmodel = parent.State.GenerateChildWorldModel();

            action.ApplyActionEffects(worldmodel);
            worldmodel.CalculateNextPlayer();
            MCTSNode n = new MCTSNode(worldmodel)
            {
                Action = action,
                Parent = parent,
                N      = 0,
                Q      = 0
            };

            parent.ChildNodes.Add(n);
            return(n);
        }
Esempio n. 14
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel currentState = initialPlayoutState;

            while (!currentState.IsTerminal())
            {
                GOB.Action[] actions = currentState.GetExecutableActions();
                if (actions.Length == 0)
                {
                    continue;
                }
                int        index  = this.RandomGenerator.Next(0, actions.Length);
                GOB.Action action = actions[index];
                currentState = currentState.GenerateChildWorldModel();
                action.ApplyActionEffects(currentState);
                this.CurrentDepth++;
            }
            Reward reward = new Reward();

            reward.Value = currentState.GetScore();
            return(reward);
        }
Esempio n. 15
0
        public override Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action   action = null;
            GOB.Action[] actions;

            WorldModel state = initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //should choose randomly
                actions = state.GetExecutableActions();


                float best = float.MinValue;
                foreach (var a in actions)
                {
                    WorldModel w = state.GenerateChildWorldModel();
                    a.ApplyActionEffects(w);
                    var heuristic = w.GetGoalValue("BeQuick") + 1 / w.GetGoalValue("GainXP") + w.GetGoalValue("Survive") + w.GetGoalValue("GetRich");

                    if (heuristic > best)
                    {
                        best   = heuristic;
                        action = a;
                    }
                }



                action.ApplyActionEffects(state);
            }

            Reward r = new Reward();

            r.Value    = state.GetScore();
            r.PlayerID = state.GetNextPlayer();

            return(r);
        }
Esempio n. 16
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel        model = initialPlayoutState.GenerateChildWorldModel();
            List <GOB.Action> actions;
            List <GOB.Action> executableActions = new List <GOB.Action>();

            GOB.Action nextAction = null;
            Reward     reward     = new Reward();
            double     heuristicValue;
            double     accumulatedHeuristicValue;
            double     bestValue, minValue;
            SortedDictionary <double, GOB.Action> heuristicList = new SortedDictionary <double, GOB.Action>();

            actions = model.GetActions();

            while (!model.IsTerminal())
            {
                heuristicList.Clear();
                executableActions.Clear();
                heuristicValue            = 0;
                accumulatedHeuristicValue = 0;

                bestValue = -1;
                minValue  = float.MaxValue;

                if (actions.Count == 0)
                {
                    break;
                }

                foreach (GOB.Action action in actions)
                {
                    if (action.CanExecute(model))
                    {
                        accumulatedHeuristicValue += Math.Pow(Math.E, action.H(model));
                        executableActions.Add(action);
                    }
                }

                foreach (GOB.Action action in executableActions)
                {
                    heuristicValue = Math.Pow(Math.E, action.H(model)) / accumulatedHeuristicValue;

                    if (!heuristicList.ContainsKey(heuristicValue))
                    {
                        heuristicList.Add(heuristicValue, action);
                    }

                    if (heuristicValue > bestValue)
                    {
                        bestValue = heuristicValue;
                    }
                    if (heuristicValue < minValue)
                    {
                        minValue = heuristicValue;
                    }
                }

                double randomNumber = GetRandomNumber(minValue, bestValue);

                foreach (KeyValuePair <double, GOB.Action> actionHeuristic in heuristicList)
                {
                    if (actionHeuristic.Key >= randomNumber)
                    {
                        nextAction = actionHeuristic.Value;
                        break;
                    }
                }

                if (nextAction == null)
                {
                    break;
                }

                nextAction.ApplyActionEffects(model);
                model.CalculateNextPlayer();
            }

            reward.PlayerID = model.GetNextPlayer();
            reward.Value    = model.GetScore();
            return(reward);
        }