Ejemplo n.º 1
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            Action[] actions = initialPlayoutState.GetExecutableActions();
            float    score   = 0;

            foreach (Action action in actions)
            {
                WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel();
                int        depthCount = 0;
                while (!worldModel.IsTerminal())
                {
                    Action randomAction = actions[RandomGenerator.Next(actions.Length)];
                    randomAction.ApplyActionEffects(worldModel);
                    depthCount++;
                }
                if (depthCount > MaxPlayoutDepthReached)
                {
                    MaxPlayoutDepthReached = depthCount;
                }

                score += worldModel.GetScore();
            }

            return(new Reward()
            {
                Value = score / actions.Length,
                PlayerID = initialPlayoutState.GetNextPlayer(),
            });
        }
Ejemplo n.º 2
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel worldModel = initialPlayoutState.GenerateChildWorldModel();

            int depthCount = 0;

            while (!worldModel.IsTerminal() && depthCount <= MaxPlayoutDepth)
            {
                Action[] actions      = worldModel.GetExecutableActions();
                Action   biasedAction = actions.First();
                foreach (Action action in actions)
                {
                    if (action.GetHValue(worldModel) < biasedAction.GetHValue(worldModel))
                    {
                        biasedAction = action;
                    }
                }
                biasedAction.ApplyActionEffects(worldModel);
                depthCount++;
            }
            if (depthCount > MaxPlayoutDepthReached)
            {
                base.MaxPlayoutDepthReached = depthCount;
            }

            return(new Reward()
            {
                Value = GetWorldModelScore(worldModel),
                PlayerID = initialPlayoutState.GetNextPlayer(),
            });
        }
Ejemplo n.º 3
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel();

            GOB.Action[] actions      = childWorldModel.GetExecutableActions();
            int          DepthReached = 0;

            while (!childWorldModel.IsTerminal())
            {
                if (actions.Length > 0)
                {
                    int        index = this.RandomGenerator.Next(actions.Length);
                    GOB.Action a     = actions[index];
                    //GOB.Action a = actions[6];
                    a.ApplyActionEffects(childWorldModel);
                    childWorldModel.CalculateNextPlayer();
                }
                DepthReached++;
            }

            if (DepthReached > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = DepthReached;
            }

            Reward reward = new Reward
            {
                PlayerID = childWorldModel.GetNextPlayer(),
                Value    = childWorldModel.GetScore()
            };

            return(reward);
        }
Ejemplo n.º 4
0
        public virtual Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action   action;
            GOB.Action[] actions;
            int          random;

            WorldModel state = initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //should choose randomly
                actions = state.GetExecutableActions();

                if (actions.Length == 0)
                {
                    continue;
                }

                random = RandomGenerator.Next(0, actions.Length);
                action = actions [random];
                action.ApplyActionEffects(state);
            }

            Reward r = new Reward();

            r.Value    = state.GetScore();
            r.PlayerID = state.GetNextPlayer();

            return(r);
        }
Ejemplo n.º 5
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action action;
            WorldModel model = initialPlayoutState.GenerateChildWorldModel();

            GOB.Action[] actions;
            Reward       reward = new Reward();

            while (!model.IsTerminal())
            {
                actions = model.GetExecutableActions();

                if (actions.Length == 0)
                {
                    break;
                }

                action = actions[RandomGenerator.Next(0, actions.Length)];
                action.ApplyActionEffects(model);
                model.CalculateNextPlayer();
            }

            reward.PlayerID = model.GetNextPlayer();
            reward.Value    = model.GetScore();
            return(reward);
        }
Ejemplo n.º 6
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            /*
             * while s is nonterminal do
             *  chose a from Actions(s) uniformly at random
             *  s <- Result(s,a)
             *  return reward for state s
             */
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            ActionHistory.Clear();
            int currentDepth       = 0;
            Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();

                par.Left  = currentState.GetNextPlayer();
                par.Right = randomAction;
                ActionHistory.Add(par);
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
        protected override float RunPlayout(WorldModel initialPlayoutState)
        {
            GOB.Action nextAction;
            WorldModel currentState        = initialPlayoutState;
            var        currentPlayoutDepth = 0;

            while (!currentState.IsTerminal())
            {
                var executableActions = currentState.GetExecutableActions();

                //Bias: Choose among the 50% best
                var maxIndex = Convert.ToInt32(Math.Ceiling(executableActions.Count * 0.5));
                var orderedExecutableActions = executableActions.OrderByDescending(x => this.Heuristic.H(currentState, x)).Take(maxIndex).ToList();

                var index = this.RandomGenerator.Next(0, maxIndex);
                nextAction = executableActions[index];

                currentState = currentState.GenerateChildWorldModel(nextAction);

                currentPlayoutDepth++;
            }

            if (currentPlayoutDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentPlayoutDepth;
            }

            //var currentPlayer = currentState.GetNextPlayer();
            //var value = initialPlayoutState.GetNextPlayer() == currentPlayer ? score : -score;

            return(currentState.GetScore());
        }
Ejemplo n.º 8
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action[] actions         = initialPlayoutState.GetExecutableActions();
            int          bestHvalue      = int.MaxValue;
            int          bestActionIndex = -1;
            WorldModel   currentState    = initialPlayoutState;

            while (!currentState.IsTerminal())
            {
                for (int i = 0; i < actions.Length; i++)
                {
                    GOB.Action action = actions[i];
                    int        h      = action.getHvalue();
                    if (h < bestHvalue)
                    {
                        bestActionIndex = i;
                        bestHvalue      = h;
                    }
                }
                WorldModel childState = initialPlayoutState.GenerateChildWorldModel();
                actions[bestActionIndex].ApplyActionEffects(childState);
                childState.CalculateNextPlayer();
                currentState = childState;
                base.CurrentDepth++;
            }
            Reward r = new Reward();

            r.Value = currentState.GetScore();
            return(r);
        }
Ejemplo n.º 9
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action   action;
            GOB.Action[] actions;
            Reward       reward  = new Reward();
            WorldModel   current = initialPlayoutState;
            int          random;

            actions = current.GetExecutableActions();
            if (actions.Length == 0)
            {
                reward.PlayerID = current.GetNextPlayer();
                reward.Value    = 0;
            }

            while (!current.IsTerminal())
            {
                current = current.GenerateChildWorldModel();
                random  = RandomGenerator.Next(0, actions.Length);
                action  = actions[random];
                action.ApplyActionEffects(current);
                current.CalculateNextPlayer();
            }

            reward.PlayerID = current.GetNextPlayer();
            reward.Value    = current.GetScore();
            return(reward);
        }
Ejemplo n.º 10
0
        //Rave + Biased
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            //throw new NotImplementedException();
            ActionHistory = new List <Pair <int, GOB.Action> >();
            WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel();

            GOB.Action action;

            int playoutReach = 0;

            while (!childWorldModel.IsTerminal())
            {
                //Select a random Action

                GOB.Action[] actions                   = childWorldModel.GetExecutableActions();
                double[]     actionIndexes             = new double[actions.Length];
                double       heuristicValue            = 0.0;
                double       accumulatedHeuristicValue = 0.0;
                double       randomIndex;
                int          chosenActionIndex = 0;
                for (int i = 0; i < actions.Length; i++)
                {
                    heuristicValue             = actions[i].H(childWorldModel);
                    accumulatedHeuristicValue += Math.Pow(Math.E, -heuristicValue);
                    actionIndexes[i]           = accumulatedHeuristicValue;
                }

                randomIndex = this.RandomGenerator.NextDouble() * accumulatedHeuristicValue;
                //Debug.Log("Acumulated: " + accumulatedHeuristicValue);
                for (int i = 0; i < actions.Length; i++)
                {
                    if (randomIndex <= actionIndexes[i])
                    {
                        chosenActionIndex = i;
                        break;
                    }
                }
                ActionHistory.Add(new Pair <int, GOB.Action>(childWorldModel.GetNextPlayer(), actions[chosenActionIndex]));
                actions[chosenActionIndex].ApplyActionEffects(childWorldModel);
                childWorldModel.CalculateNextPlayer();
                playoutReach += 1;
            }

            if (playoutReach > MaxPlayoutDepthReached)
            {
                MaxPlayoutDepthReached = playoutReach;
            }

            Reward reward = new Reward
            {
                PlayerID = childWorldModel.GetNextPlayer(),
                Value    = childWorldModel.GetScore()
            };

            return(reward);
        }
Ejemplo n.º 11
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel state = new FutureStateWorldModel(initialPlayoutState.GenerateChildWorldModel());

            while (!state.IsTerminal())
            {
                GOB.Action[] actions = state.GetExecutableActions();
                actions[RandomGenerator.Next() % actions.Length].ApplyActionEffects(state);
                this.MaxPlayoutDepthReached++;
            }
            return(new Reward());
        }
Ejemplo n.º 12
0
        // Only worth running multiple playouts in case action is Sword Attack
        protected virtual WorldModel StochasticPlayout(Action action, WorldModel currState)
        {
            if (action.Name.Equals("SwordAttack") && this.MaxPlayouts > 0)
            {
                WorldModel[] tests = new WorldModel[this.MaxPlayouts];
                for (int i = 0; i < this.MaxPlayouts; i++)
                {
                    tests[i] = currState.GenerateChildWorldModel();
                    action.ApplyActionEffects(tests[i]);
                }

                currState = AverageState(tests, (SwordAttack)action);
            }
            else
            {
                currState = currState.GenerateChildWorldModel();
                action.ApplyActionEffects(currState);
            }

            return(currState);
        }
Ejemplo n.º 13
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel state = initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                getRandomAction(state).ApplyActionEffects(state);
            }
            Reward r = new Reward();

            r.Value = r.GetRewardForNode(new MCTSNode(state));
            return(r);
        }
Ejemplo n.º 14
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel childWorldModel = initialPlayoutState.GenerateChildWorldModel();
            int        DepthReached    = 0;

            while (!childWorldModel.IsTerminal())
            {
                GOB.Action[] actions                   = childWorldModel.GetExecutableActions();
                double[]     actionIndexes             = new double[actions.Length];
                double       heuristicValue            = 0.0;
                double       accumulatedHeuristicValue = 0.0;
                double       randomIndex;
                int          chosenActionIndex = 0;
                for (int i = 0; i < actions.Length; i++)
                {
                    heuristicValue             = actions[i].H(childWorldModel);
                    accumulatedHeuristicValue += Math.Pow(Math.E, -heuristicValue);
                    actionIndexes[i]           = accumulatedHeuristicValue;
                }

                randomIndex = this.RandomGenerator.NextDouble() * accumulatedHeuristicValue;
                for (int i = 0; i < actions.Length; i++)
                {
                    if (randomIndex <= actionIndexes[i])
                    {
                        chosenActionIndex = i;
                        break;
                    }
                }
                actions[chosenActionIndex].ApplyActionEffects(childWorldModel);
                childWorldModel.CalculateNextPlayer();
                DepthReached++;
            }

            if (DepthReached > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = DepthReached;
            }

            Reward reward = new Reward
            {
                PlayerID = this.InitialNode.PlayerID,
                Value    = childWorldModel.GetScore()
            };

            return(reward);
        }
Ejemplo n.º 15
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            //TODO: implement
            FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //escolher entre MCTS normal e MCTS bias
                //ChooseRandom(state).ApplyActionEffects(state);
                ChooseBias(state).ApplyActionEffects(state);
                state.CalculateNextPlayer();
                this.MaxPlayoutDepthReached++;
            }

            Reward reward = new Reward();

            reward.Value = state.GetScore();
            return(reward);
        }
Ejemplo n.º 16
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel currentState = initialPlayoutState;

            while (!currentState.IsTerminal())
            {
                GOB.Action[] actions = currentState.GetExecutableActions();
                if (actions.Length == 0)
                {
                    continue;
                }
                int        index  = this.RandomGenerator.Next(0, actions.Length);
                GOB.Action action = actions[index];
                currentState = currentState.GenerateChildWorldModel();
                action.ApplyActionEffects(currentState);
                this.CurrentDepth++;
            }
            Reward reward = new Reward();

            reward.Value = currentState.GetScore();
            return(reward);
        }
Ejemplo n.º 17
0
        public override Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action   action = null;
            GOB.Action[] actions;

            WorldModel state = initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //should choose randomly
                actions = state.GetExecutableActions();


                float best = float.MinValue;
                foreach (var a in actions)
                {
                    WorldModel w = state.GenerateChildWorldModel();
                    a.ApplyActionEffects(w);
                    var heuristic = w.GetGoalValue("BeQuick") + 1 / w.GetGoalValue("GainXP") + w.GetGoalValue("Survive") + w.GetGoalValue("GetRich");

                    if (heuristic > best)
                    {
                        best   = heuristic;
                        action = a;
                    }
                }



                action.ApplyActionEffects(state);
            }

            Reward r = new Reward();

            r.Value    = state.GetScore();
            r.PlayerID = state.GetNextPlayer();

            return(r);
        }
Ejemplo n.º 18
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            ActionHistory = new List <Pair <int, GOB.Action> >();
            WorldModel state = initialPlayoutState.GenerateChildWorldModel();
            Action     nextAction;

            while (!state.IsTerminal())
            {
                Action[] actions = state.GetExecutableActions();
                if (actions.Length > 0)
                {
                    nextAction = actions[RandomGenerator.Next() % actions.Length];
                    ActionHistory.Add(new Pair <int, GOB.Action>(state.GetNextPlayer(), nextAction));
                    nextAction.ApplyActionEffects(state);
                    state.CalculateNextPlayer();
                }
            }
            Reward r = new Reward();

            r.Value = state.GetScore();
            return(r);
        }
Ejemplo n.º 19
0
        protected virtual float RunPlayout(WorldModel currentState)
        {
            GOB.Action nextAction;
            var        currentPlayoutDepth = 0;

            while (!currentState.IsTerminal())
            {
                var executableActions = currentState.GetExecutableActions();
                nextAction = executableActions[this.RandomGenerator.Next(0, executableActions.Count)];

                currentState = currentState.GenerateChildWorldModel(nextAction);
                currentPlayoutDepth++;
            }

            if (currentPlayoutDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentPlayoutDepth;
            }

            //var value = initialPlayoutState.GetNextPlayer() == currentPlayer ? score : -score;

            return(currentState.GetScore());
        }
Ejemplo n.º 20
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            int        currentDepth = 0;

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextRandomAction(this.RandomGenerator);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
Ejemplo n.º 21
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            WorldModel        model = initialPlayoutState.GenerateChildWorldModel();
            List <GOB.Action> actions;
            List <GOB.Action> executableActions = new List <GOB.Action>();

            GOB.Action nextAction = null;
            Reward     reward     = new Reward();
            double     heuristicValue;
            double     accumulatedHeuristicValue;
            double     bestValue, minValue;
            SortedDictionary <double, GOB.Action> heuristicList = new SortedDictionary <double, GOB.Action>();

            actions = model.GetActions();

            while (!model.IsTerminal())
            {
                heuristicList.Clear();
                executableActions.Clear();
                heuristicValue            = 0;
                accumulatedHeuristicValue = 0;

                bestValue = -1;
                minValue  = float.MaxValue;

                if (actions.Count == 0)
                {
                    break;
                }

                foreach (GOB.Action action in actions)
                {
                    if (action.CanExecute(model))
                    {
                        accumulatedHeuristicValue += Math.Pow(Math.E, action.H(model));
                        executableActions.Add(action);
                    }
                }

                foreach (GOB.Action action in executableActions)
                {
                    heuristicValue = Math.Pow(Math.E, action.H(model)) / accumulatedHeuristicValue;

                    if (!heuristicList.ContainsKey(heuristicValue))
                    {
                        heuristicList.Add(heuristicValue, action);
                    }

                    if (heuristicValue > bestValue)
                    {
                        bestValue = heuristicValue;
                    }
                    if (heuristicValue < minValue)
                    {
                        minValue = heuristicValue;
                    }
                }

                double randomNumber = GetRandomNumber(minValue, bestValue);

                foreach (KeyValuePair <double, GOB.Action> actionHeuristic in heuristicList)
                {
                    if (actionHeuristic.Key >= randomNumber)
                    {
                        nextAction = actionHeuristic.Value;
                        break;
                    }
                }

                if (nextAction == null)
                {
                    break;
                }

                nextAction.ApplyActionEffects(model);
                model.CalculateNextPlayer();
            }

            reward.PlayerID = model.GetNextPlayer();
            reward.Value    = model.GetScore();
            return(reward);
        }
 public float H(WorldModel state, Action action)
 {
     return(H(state.GenerateChildWorldModel(action)));
 }
Ejemplo n.º 23
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            GOB.Action   action;
            GOB.Action[] actions;
            Reward       reward  = new Reward();
            WorldModel   current = initialPlayoutState;

            double        random;
            float         h          = 0;
            double        accumulate = 0;
            float         euclidean  = 0;
            double        softmax    = 0;
            List <double> interval   = new List <double>();
            WalkToTargetAndExecuteAction wa;

            actions = current.GetExecutableActions();
            if (actions.Length == 0)
            {
                reward.PlayerID = current.GetNextPlayer();
                reward.Value    = 0;
            }

            while (!current.IsTerminal())
            {
                accumulate = 0;
                interval.Clear();
                //if (actions.Length == 0)
                //    break;

                foreach (var a in actions)
                {
                    h = 0;
                    var gameMan   = this.CurrentStateWorldModel.GetGameManager();
                    var character = gameMan.characterData;
                    wa = a as WalkToTargetAndExecuteAction;
                    if (wa != null)
                    {
                        euclidean = (wa.Target.transform.position - wa.Character.transform.position).magnitude;
                        if (euclidean <= 0)
                        {
                            euclidean = 1;
                        }
                    }

                    if (a.Name.Contains("LevelUp"))                                                      //1000
                    {
                        h = 1000;
                    }
                    if (a.Name.Contains("GetHealthPotion"))                                              //0-25
                    {
                        h = (character.MaxHP - character.HP) * 1.5f;
                    }
                    else if (a.Name.Contains("PickUpChest"))                                                                 //5-25
                    {
                        h = (character.Money + 5) * 3.5f;
                    }
                    else if (a.Name.Contains("FireballSkeleton") || a.Name.Contains("FireballOrc"))                          //0-25
                    {
                        h = character.Mana * 30;
                    }
                    else if (a.Name.Contains("SwordAttackSkeleton"))
                    {
                        h = (character.HP - 5) * 2;
                    }
                    else if (a.Name.Contains("SwordAttackOrc"))
                    {
                        h = (character.HP - 10) * 2;
                    }
                    else if (a.Name.Contains("SwordAttackDragon"))
                    {
                        h = character.HP - 20;
                    }

                    if (h < 0)
                    {
                        h = 0;
                    }

                    h = h * 1000 / euclidean;

                    accumulate += h;
                    if (h > 0)
                    {
                        softmax += Math.Pow(Math.E, -h / accumulate);
                        interval.Add(softmax);
                        Debug.Log(softmax);
                    }
                    else
                    {
                        interval.Add(0);
                    }
                }

                random = RandomGenerator.NextDouble() * softmax;
                for (int j = 0; j < interval.Count; j++)
                {
                    if (random <= interval[j])
                    {
                        action  = actions[j];
                        current = current.GenerateChildWorldModel();
                        action.ApplyActionEffects(current);
                        current.CalculateNextPlayer();
                        break;
                    }

                    if (j == interval.Count - 1)
                    {
                        current         = current.GenerateChildWorldModel();
                        reward.Value    = 0;
                        reward.PlayerID = current.GetNextPlayer();
                        return(reward);
                    }
                }
            }

            reward.PlayerID = current.GetNextPlayer();
            reward.Value    = current.GetScore();
            return(reward);
        }