コード例 #1
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            /*
             * while s is nonterminal do
             *  chose a from Actions(s) uniformly at random
             *  s <- Result(s,a)
             *  return reward for state s
             */
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            ActionHistory.Clear();
            int currentDepth       = 0;
            Pair <int, Action> par = new Pair <int, Action>(0, new Action("asdasdas"));//cria com lixo para depois ser substituido

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextBiasRandomAction(this.RandomGenerator, currentState);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();

                par.Left  = currentState.GetNextPlayer();
                par.Right = randomAction;
                ActionHistory.Add(par);
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
コード例 #2
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            //WorldModel newState = initialPlayoutState.GenerateChildWorldModel();
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && numberOfIterations <= DEPTH_LIMIT)
            {
                Action[]     possibleActions = newState.GetExecutableActions();
                List <float> heuristics      = new List <float>();
                for (int i = 0; i < possibleActions.Length; i++)
                {
                    heuristics.Add(possibleActions[i].GetHValue(newState));
                }

                int    index      = chooseAction(possibleActions, heuristics);
                Action bestAction = possibleActions[index];
                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.PlayerID = newState.GetNextPlayer();
                reward.Value    = heuristics[index];
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
コード例 #3
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel state = new FutureStateWorldModel(initialPlayoutState.GenerateChildWorldModel());

            while (!state.IsTerminal())
            {
                GOB.Action[] actions = state.GetExecutableActions();
                actions[RandomGenerator.Next() % actions.Length].ApplyActionEffects(state);
                this.MaxPlayoutDepthReached++;
            }
            return(new Reward());
        }
コード例 #4
0
ファイル: MCTS.cs プロジェクト: pmsmm/Terceiro-Projeto-de-IAJ
        virtual protected MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)parent.State);

            action.ApplyActionEffects(newState);
            newState.CalculateNextPlayer();
            MCTSNode newNode = new MCTSNode(newState);

            newNode.Parent = parent;
            newNode.Q      = 0;
            newNode.N      = 0;
            newNode.Action = action;
            parent.ChildNodes.Add(newNode);
            return(newNode);
        }
コード例 #5
0
        protected virtual MCTSNode Expand(MCTSNode parent, GOB.Action action)
        {
            FutureStateWorldModel newModel = parent.State.GenerateChildWorldModel() as FutureStateWorldModel;

            action.ApplyActionEffects(newModel);
            newModel.CalculateNextPlayer();

            MCTSNode childNode = new MCTSNode(newModel);

            childNode.Action = action;
            childNode.Parent = parent;

            parent.ChildNodes.Add(childNode);

            return(childNode);
        }
コード例 #6
0
        protected override Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward             = new Reward();
            int    numberOfIterations = 0;

            while (!newState.IsTerminal() && (DEPTH_LIMIT <= 0 || !(numberOfIterations >= DEPTH_LIMIT)))
            {
                GOB.Action[]  possibleActions = newState.GetExecutableActions();
                List <double> results         = new List <double>();
                float         chosenScore     = 0f;
                int           i;
                for (i = 0; i < possibleActions.Length; i++)
                {
                    //results.Add(Heuristic(newState, possibleActions[i]));
                    results.Add(possibleActions[i].GetUtility());
                }

                GOB.Action    bestAction         = null;
                List <double> exponentialResults = results.Select(Math.Exp).ToList();
                double        sumExponentials    = exponentialResults.Sum();
                List <double> softmax            = exponentialResults.Select(j => j / sumExponentials).ToList();

                double prob           = this.RandomGenerator.NextDouble();
                double probabilitySum = 0;
                for (i = 0; i < possibleActions.Length; i++)
                {
                    probabilitySum += softmax[i];
                    if (probabilitySum >= prob)
                    {
                        bestAction  = possibleActions[i];
                        chosenScore = (float)softmax[i];
                        break;
                    }
                }

                bestAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = chosenScore;
                reward.PlayerID = 0;
                if (DEPTH_LIMIT > 0)
                {
                    numberOfIterations++;
                }
            }
            return(reward);
        }
コード例 #7
0
ファイル: MCTS.cs プロジェクト: pmsmm/Terceiro-Projeto-de-IAJ
        virtual protected Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel newState = new FutureStateWorldModel((FutureStateWorldModel)initialPlayoutState);
            Reward reward = new Reward();

            while (!newState.IsTerminal())
            {
                GOB.Action[] possibleActions = newState.GetExecutableActions();

                int        actionIndex  = this.RandomGenerator.Next(0, possibleActions.Length);
                GOB.Action chosenAction = possibleActions[actionIndex];
                chosenAction.ApplyActionEffects(newState);
                newState.CalculateNextPlayer();
                reward.Value    = newState.GetScore();
                reward.PlayerID = 0;
            }
            return(reward);
        }
コード例 #8
0
        private bool ChestDead(FutureStateWorldModel state, GOB.Action action, string enemyName, string chestName)
        {
            //bool cond1 = !(bool)state.GetProperty(enemyName);
            bool cond1 = GameObject.Find(enemyName) == null;
            bool cond2 = GameObject.Find(chestName) != null;
            //bool cond2 = (bool)state.GetProperty(chestName);
            bool cond3 = action is PickUpChest;
            bool cond4 = action is PickUpChest && ((PickUpChest)action).Target.name.Equals(chestName);

            /*
             * if (cond3)
             *  cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName);
             * else
             *  cond4 = false;
             */
            return(cond1 && cond2 && cond3 && cond4);

            //return !(bool)state.GetProperty(enemyName) && (bool)state.GetProperty(chestName) && action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName);
        }
コード例 #9
0
        private Reward Playout(WorldModel initialPlayoutState)
        {
            //TODO: implement
            FutureStateWorldModel state = (FutureStateWorldModel)initialPlayoutState.GenerateChildWorldModel();

            while (!state.IsTerminal())
            {
                //escolher entre MCTS normal e MCTS bias
                //ChooseRandom(state).ApplyActionEffects(state);
                ChooseBias(state).ApplyActionEffects(state);
                state.CalculateNextPlayer();
                this.MaxPlayoutDepthReached++;
            }

            Reward reward = new Reward();

            reward.Value = state.GetScore();
            return(reward);
        }
コード例 #10
0
        protected virtual Reward Playout(WorldModel initialPlayoutState)
        {
            FutureStateWorldModel currentState = initialPlayoutState.GenerateChildWorldModel() as FutureStateWorldModel;

            GOB.Action randomAction;
            int        currentDepth = 0;

            while (!currentState.IsTerminal())
            {
                randomAction = currentState.getNextRandomAction(this.RandomGenerator);
                randomAction.ApplyActionEffects(currentState);
                currentState.CalculateNextPlayer();
                currentDepth++;
            }
            if (currentDepth > this.MaxPlayoutDepthReached)
            {
                this.MaxPlayoutDepthReached = currentDepth;
            }
            return(new Reward()
            {
                PlayerID = currentState.GetNextPlayer(), Value = currentState.GetScore()
            });
        }
コード例 #11
0
        /*
         * private bool ChestReallyDead(FutureStateWorldModel state, GOB.Action action, string enemyName, string chestName)
         * {
         *  bool cond1 = !(bool)state.GetProperty(enemyName);
         *  bool cond1b = GameObject.Find(enemyName)
         *  bool cond2 = (bool)state.GetProperty(chestName);
         *  bool cond3 = action is PickUpChest;
         *  bool cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName);
         *  /*
         *  if (cond3)
         *      cond4 = action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName);
         *  else
         *      cond4 = false;
         */
        //return cond1 && cond2 && cond3 && cond4;

        //return !(bool)state.GetProperty(enemyName) && (bool)state.GetProperty(chestName) && action is PickUpChest && ((PickUpChest)action).Target.tag.Equals(chestName);

        //}

        private GOB.Action ChooseBias(FutureStateWorldModel state)
        {
            GOB.Action[] actions  = state.GetExecutableActions();
            int[]        features = new int[2];

            int    size = features.Length;
            double H    = 0;

            double[] exp = new double[actions.Length];  //array com as exponenciais ja calculadas
            double[] P   = new double[actions.Length];  //array com as probabilidades ja calculadas para escolher a melhor

            for (int j = 0; j < actions.Length; j++)
            {
                float h = 0;

                if (actions[j] is SwordAttack && (int)state.GetProperty(Properties.HP) + ((SwordAttack)actions[j]).hpChange <= 0)
                {
                    //actions = actions.Where(val => val != action).ToArray();  //para a nao optimizacao
                    exp[j] = 0;
                    continue;  //do for, para passa a proxima accao
                }
                if (ChestDead(state, actions[j], "Skeleton1", "Chest1") || ChestDead(state, actions[j], "Skeleton2", "Chest4") ||
                    ChestDead(state, actions[j], "Orc1", "Chest3") || ChestDead(state, actions[j], "Orc2", "Chest2") || ChestDead(state, actions[j], "Dragon", "Chest5"))
                {
                    h      = 91;
                    exp[j] = Mathf.Exp(h);
                    H     += Mathf.Exp(h);
                    continue;  //do for, para passa a proxima accao
                }



                else
                {
                    FutureStateWorldModel possibleState = (FutureStateWorldModel)state.GenerateChildWorldModel();
                    actions[j].ApplyActionEffects(possibleState);
                    possibleState.CalculateNextPlayer();

                    features[WMoney] = (int)possibleState.GetProperty(Properties.MONEY);
                    //features[WTime] = (int) (float) possibleState.GetProperty(Properties.TIME);
                    features[WXP] = (int)possibleState.GetProperty(Properties.XP);
                    //features[WLevel] = (int)possibleState.GetProperty(Properties.LEVEL);

                    for (int i = 0; i < size; i++)
                    {
                        h += features[i] * weights[i]; //cada peso para uma accao
                    }
                    exp[j] = Mathf.Exp(h);             //queremos guardar logo a exponencial para nao ter de calcular outra vez
                    H     += Mathf.Exp(h);
                }
            }

            if (H == 0)
            {
                return(actions[0]);
            }
            else
            {
                P[0] = exp[0] / H;      //o primeiro nao acumula
                for (int j = 1; j < actions.Length; j++)
                {
                    P[j] = P[j - 1] + exp[j] / H;   //para ser cumulativo
                }
                double rand = RandomGenerator.NextDouble();

                //prob maior mais pequena que o random
                return(actions[Array.FindIndex(P, val => val >= rand)]);
            }
        }
コード例 #12
0
 private GOB.Action ChooseRandom(FutureStateWorldModel state)
 {
     GOB.Action[] actions = state.GetExecutableActions();
     return(actions[RandomGenerator.Next() % actions.Length]);
 }