Ejemplo n.º 1
0
        public GOB.Action Run()
        {
            //Default actions that should always be taken
            Action a = CheckAlwaysBestAction(InitialNodes[0]);

            if (a != null)
            {
                return(a);
            }

            MCTSNode selectedNode;
            Reward   reward;
            var      startTime = Time.realtimeSinceStartup;

            this.CurrentIterationsInFrame = 0;
            int currentMCTS = 0;

            while (CurrentIterations < MaxIterations)
            {
                if (CurrentIterationsInFrame >= MaxIterationsProcessedPerFrame)
                {
                    TotalProcessingTime += Time.realtimeSinceStartup - startTime;
                    return(null);
                }
                selectedNode = Selection(InitialNodes[currentMCTS]);
                if (selectedNode == InitialNodes[currentMCTS])
                {
                    break;
                }
                reward = Playout(selectedNode.State);
                Backpropagate(selectedNode, reward);
                CurrentIterationsInFrame++;
                CurrentIterations++;
                currentMCTS++;
                if (currentMCTS == NumberOfRuns)
                {
                    currentMCTS = 0;
                }
            }

            BestFirstChild = BestChildFromSeveral(InitialNodes);
            MCTSNode child = BestFirstChild;

            BestActionSequence.Clear();
            while (child != null)
            {
                BestActionSequence.Add(child.Action);
                child = BestChild(child);
            }
            InProgress = false;
            if (BestFirstChild != null)
            {
                TotalProcessingTime += Time.realtimeSinceStartup - startTime;
                return(BestFirstChild.Action);
            }
            return(null);
        }
Ejemplo n.º 2
0
        public GOB.Action Run()
        {
            //TODO: Execute MCTS PlayoutIterations times
            var startTime = Time.realtimeSinceStartup;

            this.CurrentIterationsInFrame = 0;
            int          MaxIterations = this.MaxIterationsProcessedPerFrame / this.PlayoutIterations;
            MCTSNode     selectedNode  = new MCTSNode(this.CurrentStateWorldModel.GenerateChildWorldModel());
            List <float> results       = new List <float>();

            int i = 0;

            for (i = 0; i < this.PlayoutIterations; i++)
            {
                Reward reward;

                while (this.CurrentIterationsInFrame < MaxIterations)
                {
                    MCTSNode newNode = Selection(selectedNode);
                    reward = Playout(newNode.State);
                    Backpropagate(newNode, reward);
                    this.CurrentIterationsInFrame++;
                }

                for (int j = 0; j < selectedNode.ChildNodes.Count; j++)
                {
                    if (results.Count <= j)
                    {
                        results.Add(selectedNode.ChildNodes[j].Q / selectedNode.ChildNodes[j].N);
                    }
                    else
                    {
                        results[j] += selectedNode.ChildNodes[j].Q / selectedNode.ChildNodes[j].N;
                    }

                    if (i == this.PlayoutIterations - 1)
                    {
                        results[j] /= this.PlayoutIterations;
                    }
                }
            }

            this.TotalProcessingTime += Time.realtimeSinceStartup - startTime;
            this.InProgress           = false;

            //MCTSNode best = BestChild(selectedNode);
            MCTSNode best = BestAverageChild(selectedNode, results);

            BestActionSequence.Clear();
            GOB.Action bestAction = best != null ? best.Action : null;
            if (bestAction != null)
            {
                BestActionSequence.Add(bestAction);
            }
            return(bestAction);
        }
Ejemplo n.º 3
0
        //this method is very similar to the bestUCTChild, but it is used to return the final action of the MCTS search, and so we do not care about
        //the exploration factor
        private MCTSNode BestChild(MCTSNode node)
        {
            if (node.ChildNodes.Count == 0)
            {
                return(null);
            }

            float    A          = C * (float)Math.Sqrt(node.N);
            MCTSNode bestChild  = null;
            float    bestReward = 0;

            for (int i = 0; i < node.ChildNodes.Count; i++)
            {
                float newReward = 0;
                switch (strategy)
                {
                case BestStrategy.Max:
                    newReward = node.ChildNodes[i].Q / node.ChildNodes[i].N;
                    break;

                case BestStrategy.Robust:
                    newReward = node.ChildNodes[i].N;
                    break;

                case BestStrategy.MaxRobust:
                    newReward = node.ChildNodes[i].Q / node.ChildNodes[i].N + node.ChildNodes[i].N;
                    break;

                case BestStrategy.Secure:
                    newReward = node.ChildNodes[i].Q / node.ChildNodes[i].N - A / Mathf.Sqrt(node.ChildNodes[i].N);
                    break;
                }
                if (newReward > bestReward || bestChild == null)
                {
                    bestChild  = node.ChildNodes[i];
                    bestReward = newReward;
                }
            }
            if (bestChild == null || bestChild.Action == null)
            {
                BestActionSequence.Clear();
            }
            return(bestChild);
        }
Ejemplo n.º 4
0
        private MCTSNode BestAverageChild(MCTSNode node, List <float> results)
        {
            MCTSNode bestChild  = null;
            float    bestReward = 0;

            for (int i = 0; i < results.Count; i++)
            {
                float newReward = results[i];
                if (newReward > bestReward || bestChild == null)
                {
                    bestChild  = node.ChildNodes[i];
                    bestReward = newReward;
                }
            }
            if (bestChild == null || bestChild.Action == null)
            {
                BestActionSequence.Clear();
            }
            return(bestChild);
        }
Ejemplo n.º 5
0
        public GOB.Action Run()
        {
            Reward reward;

            this.CurrentIterationsInFrame = 0;
            MCTSNode initialNode = this.InitialNode, selectedNode;

            while (CurrentIterations < MaxIterations && CurrentIterationsInFrame < MaxIterationsProcessedPerFrame)
            {
                selectedNode = Selection(initialNode);
                reward       = Playout(selectedNode.State);
                Backpropagate(selectedNode, reward);
                CurrentIterations++;
                CurrentIterationsInFrame++;
            }

            if (CurrentIterations >= MaxIterations)
            {
                InProgress = false;
            }

            TotalProcessingTime += Time.deltaTime;
            BestActionSequence.Clear();
            BestFirstChild = BestChild(initialNode);

            if (BestFirstChild == null)
            {
                return(null);
            }

            MCTSNode child = BestFirstChild;

            while (child != null)
            {
                BestActionSequence.Add(child.Action);
                child = BestChild(child);
            }

            return(BestFirstChild.Action);
        }