Exemple #1
0
        public void Execute(int index)
        {
            var epochs = 100;
            var agent  = rdmAgent;

            var gsCopy = Rules.Clone(ref gs);

            for (var n = 0; n < epochs; n++)
            {
                Rules.CopyTo(ref gs, ref gsCopy);
                Rules.Step(ref gameParameters, ref gsCopy, availableActions[index], 0);

                //Rules.Step(ref gameParameters, ref gsCopy, availableActions[index], 0);
                //agent.Act(ref gsCopy, availableActions[0], 0),
                //agent.Act(ref gsCopy, availableActions, 1));

                var currentDepth = 0;
                var maxIteration = 200;
                while (!gsCopy.players[0].isGameOver || !gsCopy.players[1].isGameOver)
                {
                    Rules.Step(ref gameParameters, ref gsCopy,
                               agent.Act(ref gsCopy, availableActions, 0),
                               agent.Act(ref gsCopy, availableActions, 1));
                    currentDepth++;
                    if (currentDepth > maxIteration)
                    {
                        break;
                    }
                }

                summedScores[index] += gsCopy.players[playerId].score;
                gsCopy.projectiles.Dispose();
                gsCopy.asteroids.Dispose();
            }
        }
            public void Execute()
            {
                var iterations = 5;

                int indexI = (int) ActionsAvailable.NONE;
                int indexJ = (int) ActionsAvailable.NONE;
                
                var gsCopy = GameStateRules.Clone(ref gs);
                int summedScore = gsCopy.players[id].PlayerScore;
                
                for (int i = 0; i <= (int) ActionsAvailable.NONE; i++)
                {
                    getIntent.moveIntent = (ActionsAvailable) i;
                    for (int j = (int) ActionsAvailable.NONE; j <= (int) ActionsAvailable.BLOCK; j++)
                    {
                        getIntent.actionIntent = (ActionsAvailable) j;
                        for (int k = 0; k < iterations; k++)
                        {
                            GameStateRules.CopyTo(ref gs, ref gsCopy);
                            
                            GameStateRules.Step(ref gsCopy, getIntent, id);

                            var nbLoop = 100;
                            while (nbLoop != 0)
                            {
                                GameStateRules.Step(ref gsCopy, rdmAgent.Act(ref gsCopy, id), id);
                                nbLoop--;
                            }

                            if (gsCopy.players[id].PlayerScore > summedScore)
                            {
                                summedScore = gsCopy.players[id].PlayerScore;
                                indexI = i;
                                indexJ = j;
                            }
                        }
                    }
                }

                intent[0] = (ActionsAvailable) indexI;
                intent[1] = (ActionsAvailable) indexJ;
            }
        public void Execute()
        {
            var epochs = 5;
            var agent  = rdmAgent;

            var gsCopy = Rules.Clone(ref gs);

            var rootHash = Rules.GetHashCode(ref gsCopy, 0);
            // CREATION DE LA MEMOIRE (Arbre)
            var memory = new NativeHashMap <long, NativeList <NodeMCTS> >(360, Allocator.Temp);

            memory.TryAdd(rootHash, new NativeList <NodeMCTS>(availableActions.Length, Allocator.Temp));

            for (var i = 0; i < availableActions.Length; i++)
            {
                memory[rootHash]
                .Add(new NodeMCTS
                {
                    action = availableActions[i],
                    nc     = 0,
                    npc    = 0,
                    rc     = 0
                });
            }

            for (var n = 0; n < epochs; n++)
            {
                Rules.CopyTo(ref gs, ref gsCopy);
                var currentHash = rootHash;

                var selectedNodes = new NativeList <SelectedNodeInfo>(Allocator.Temp);
                var it            = 0;
                //SELECT
                while (!gsCopy.players[0].isGameOver && it < 10)
                {
                    var hasUnexploredNodes = false;
                    it++;
                    for (var i = 0; i < memory[currentHash].Length; i++)
                    {
                        if (memory[currentHash][i].nc == 0)
                        {
                            hasUnexploredNodes = true;
                            break;
                        }
                    }

                    if (hasUnexploredNodes)
                    {
                        break;
                    }

                    var bestNodeIndex = -1;
                    var bestNodeScore = float.MinValue;

                    for (var i = 0; i < memory[currentHash].Length; i++)
                    {
                        var list = memory[currentHash];
                        var node = list[i];
                        node.npc           += 1;
                        list[i]             = node;
                        memory[currentHash] = list;

                        var score = (float)memory[currentHash][i].rc / memory[currentHash][i].nc
                                    + math.sqrt(2 * math.log(memory[currentHash][i].npc) / memory[currentHash][i].nc);

                        if (score >= bestNodeScore)
                        {
                            bestNodeIndex = i;
                            bestNodeScore = score;
                        }
                    }

                    selectedNodes.Add(new SelectedNodeInfo
                    {
                        hash      = currentHash,
                        nodeIndex = bestNodeIndex
                    });
                    Rules.Step(ref gameParameters, ref gsCopy, memory[currentHash][bestNodeIndex].action, availableActions[7]);
                    currentHash = Rules.GetHashCode(ref gsCopy, 0);

                    if (!memory.ContainsKey(currentHash))
                    {
                        memory.TryAdd(currentHash, new NativeList <NodeMCTS>(availableActions.Length, Allocator.Temp));

                        for (var i = 0; i < availableActions.Length; i++)
                        {
                            memory[currentHash]
                            .Add(new NodeMCTS
                            {
                                action = availableActions[i],
                                nc     = 0,
                                npc    = 0,
                                rc     = 0
                            });
                        }
                    }
                }

                //EXPAND
                if (!gsCopy.players[0].isGameOver || !gsCopy.players[1].isGameOver)
                {
                    var unexploredActions = new NativeList <int>(Allocator.Temp);

                    for (var i = 0; i < memory[currentHash].Length; i++)
                    {
                        if (memory[currentHash][i].nc == 0)
                        {
                            unexploredActions.Add(i);
                        }
                    }

                    var chosenNodeIndex = agent.rdm.NextInt(0, unexploredActions.Length);

                    selectedNodes.Add(new SelectedNodeInfo
                    {
                        hash      = currentHash,
                        nodeIndex = unexploredActions[chosenNodeIndex]
                    });
                    Rules.Step(ref gameParameters, ref gsCopy, memory[currentHash][unexploredActions[chosenNodeIndex]].action, availableActions[7]);
                    currentHash = Rules.GetHashCode(ref gsCopy, 0);

                    if (!memory.ContainsKey(currentHash))
                    {
                        memory.TryAdd(currentHash, new NativeList <NodeMCTS>(availableActions.Length, Allocator.Temp));

                        for (var i = 0; i < availableActions.Length; i++)
                        {
                            memory[currentHash]
                            .Add(new NodeMCTS
                            {
                                action = availableActions[i],
                                nc     = 0,
                                npc    = 0,
                                rc     = 0
                            });
                        }
                    }
                }

                //SIMULATE
                while (!gsCopy.players[0].isGameOver || !gsCopy.players[1].isGameOver && it < 10)
                {
                    var chosenActionIndex = agent.rdm.NextInt(0, availableActions.Length);
                    Rules.Step(ref gameParameters, ref gsCopy, (ActionsTypes)chosenActionIndex, availableActions[7]);
                    it++;
                }


                //BACKPROPAGATE
                for (var i = 0; i < selectedNodes.Length; i++)
                {
                    var list = memory[selectedNodes[i].hash];
                    var node = list[selectedNodes[i].nodeIndex];

                    //node.rc += gsCopy.score;
                    node.nc += 1;

                    list[selectedNodes[i].nodeIndex] = node;

                    memory[selectedNodes[i].hash] = list;
                }
            }

            for (var i = 0; i < memory[rootHash].Length; i++)
            {
                summedScores[i] = memory[rootHash][i].nc;
            }
        }
        public Intent Act(ref GameStateData gs, int id)
        {
            MCTSTreeNode parentNode = new MCTSTreeNode();

            parentNode.nodeChoosingValue = -1000f;
            parentNode.nSelect           = 1;
            parentNode.depth             = 0;
            int iterations = 300;

            parentNode.childNodes = new List <MCTSTreeNode>();
            for (int i = 0; i <= (int)ActionsAvailable.NONE; i++)
            {
                for (int j = (int)ActionsAvailable.NONE; j <= (int)ActionsAvailable.BLOCK; j++)
                {
                    parentNode.childNodes.Add(new MCTSTreeNode());
                    parentNode.actions = new List <ActionsAvailable>();
                    parentNode.actions.Add(ActionsAvailable.NONE);
                    parentNode.actions.Add(ActionsAvailable.NONE);
                    var parentNodeChildNode = parentNode.childNodes[parentNode.childNodes.Count - 1];
                    parentNodeChildNode.actions = new List <ActionsAvailable>();
                    parentNodeChildNode.actions.Add((ActionsAvailable)i);
                    parentNodeChildNode.actions.Add((ActionsAvailable)j);

                    var gsCopy = GameStateRules.Clone(ref gs);

                    Intent currentIntents = new Intent();
                    currentIntents.moveIntent   = (ActionsAvailable)i;
                    currentIntents.actionIntent = (ActionsAvailable)j;
                    Debug.Log("damn" + currentIntents.moveIntent + " " + currentIntents.actionIntent);
                    GameStateRules.Step(ref gsCopy, currentIntents, id);
                    parentNodeChildNode.currentGS  = GameStateRules.Clone(ref gsCopy);
                    parentNodeChildNode.sumScore   = parentNode.sumScore + gsCopy.players[id].PlayerScore;
                    parentNodeChildNode.nSelect    = 1;
                    parentNodeChildNode.depth      = 1;
                    parentNodeChildNode.parentNode = new List <MCTSTreeNode>();
                    parentNodeChildNode.parentNode.Add(parentNode);
                    parentNodeChildNode.nodeChoosingValue =
                        (float)parentNodeChildNode.sumScore / parentNodeChildNode.nSelect + math.sqrt(2) *
                        math.sqrt(math.log(parentNode.nSelect) / parentNodeChildNode.nSelect);
                    parentNode.childNodes[parentNode.childNodes.Count - 1] = parentNodeChildNode;
                    actions.Clear();
                }
            }
            for (int i = 0; i < iterations; i++)
            {
                ref MCTSTreeNode nodeMaxChoosingValue = ref GetMaxChoosingValue(ref parentNode, ref parentNode);
                nodeMaxChoosingValue.nSelect++;

                if (nodeMaxChoosingValue.childNodes == null)
                {
                    nodeMaxChoosingValue.childNodes = new List <MCTSTreeNode>();
                }
                nodeMaxChoosingValue.childNodes.Add(new MCTSTreeNode());
                MCTSTreeNode currentNode = nodeMaxChoosingValue.childNodes[nodeMaxChoosingValue.childNodes.Count - 1];
                currentNode.parentNode = new List <MCTSTreeNode>();
                currentNode.parentNode.Add(nodeMaxChoosingValue);
                currentNode.depth     = parentNode.depth++;
                currentNode.currentGS = GameStateRules.Clone(ref nodeMaxChoosingValue.currentGS);
                currentNode.actions   = new List <ActionsAvailable>();
                currentNode.actions.Add((ActionsAvailable)(nodeMaxChoosingValue.childNodes.Count / ((int)ActionsAvailable.NONE)));
                currentNode.actions.Add((ActionsAvailable)(nodeMaxChoosingValue.childNodes.Count % (int)ActionsAvailable.NONE + (int)ActionsAvailable.MOVE_BACK_LEFT));
                Intent currentIntents = new Intent();
                currentIntents.moveIntent   = currentNode.actions[0];
                currentIntents.actionIntent = currentNode.actions[1];
                Debug.Log(nodeMaxChoosingValue.nSelect + " " + nodeMaxChoosingValue.depth + " " + nodeMaxChoosingValue.childNodes.Count + " " + nodeMaxChoosingValue.actions[0] + " " + nodeMaxChoosingValue.actions[1] + " " + currentIntents.moveIntent + " " + currentIntents.actionIntent);
                GameStateRules.Step(ref currentNode.currentGS, currentIntents, id);
                currentNode.sumScore          = nodeMaxChoosingValue.sumScore + currentNode.currentGS.players[id].PlayerScore;
                currentNode.nSelect           = 1;
                currentNode.nodeChoosingValue =
                    (float)currentNode.sumScore / currentNode.nSelect + math.sqrt(2) *
                    math.sqrt(math.log(nodeMaxChoosingValue.nSelect) / currentNode.nSelect);
                //int currentDepth = currentNode.depth;
                MCTSTreeNode rollbackNode = currentNode.parentNode[0];

                while (rollbackNode.depth > 0)
                {
                    rollbackNode.nodeChoosingValue =
                        (float)rollbackNode.sumScore / rollbackNode.nSelect + math.sqrt(2) *
                        math.sqrt(math.log(rollbackNode.parentNode[0].nSelect) / rollbackNode.nSelect);
                    rollbackNode = rollbackNode.parentNode[0];
                    //currentDepth = rollbackNode.depth;
                }
            }
Exemple #5
0
            public void Execute()
            {
                //Debug.Log("Init");
                var iterations = 100;
                var agent      = rdmAgent;
                var gsCopy     = GameStateRules.Clone(ref gs);
                var rootHash   = GameStateRules.GetHashCode(ref gsCopy);
                //Debug.Log("EndInit");
                var memory = new NativeHashMap <long, NativeList <Node> >(10000000, Allocator.Temp);

                //Debug.Log("EndInit2");
                memory.TryAdd(rootHash, new NativeList <Node>(54, Allocator.Temp));


                for (var i = 0; i < (int)ActionsAvailable.NONE + 1; i++)
                {
                    for (var j = (int)ActionsAvailable.NONE; j < (int)ActionsAvailable.BLOCK + 1; j++)
                    {
                        memory[rootHash]
                        .Add(new Node
                        {
                            moveIntent   = i,
                            actionIntent = j,
                            nc           = 0,
                            npc          = 0,
                            rc           = 0
                        });
                    }
                }
                for (int i = 0; i < memory[rootHash].Length; i++)
                {
                    sumScore[i] = 0;
                }

                for (var n = 0; n < iterations; n++)
                {
                    GameStateRules.CopyTo(ref gs, ref gsCopy);
                    var currentHash   = rootHash;
                    var selectedNodes = new NativeList <SelectedNodeInfo>(Allocator.Temp);

                    var currentTime = gsCopy.currentGameStep;
                    //var countSelect = 0;
                    //SELECT
                    while (!gsCopy.EndOfGame && gsCopy.currentGameStep - 100 <= currentTime)
                    {
                        //countSelect++;
                        var hasUnexploredNodes = false;

                        for (var i = 0; i < memory[currentHash].Length; i++)
                        {
                            if (memory[currentHash][i].nc == 0)
                            {
                                hasUnexploredNodes = true;
                                break;
                            }
                        }

                        if (hasUnexploredNodes)
                        {
                            break;
                        }
                        //sumScore[1] = gsCopy.currentGameStep;
                        //sumScore[2] = currentTime;

                        var bestNodeIndex = -1;
                        var bestNodeScore = float.MinValue;

                        for (var i = 0; i < memory[currentHash].Length; i++)
                        {
                            var list = memory[currentHash];
                            var node = list[i];
                            node.npc           += 1;
                            list[i]             = node;
                            memory[currentHash] = list;


                            var score = (float)memory[currentHash][i].rc / memory[currentHash][i].nc
                                        + math.sqrt(2 * math.log(memory[currentHash][i].npc) / memory[currentHash][i].nc);

                            if (score >= bestNodeScore)
                            {
                                bestNodeIndex = i;
                                bestNodeScore = score;
                            }
                        }

                        selectedNodes.Add(new SelectedNodeInfo
                        {
                            hash      = currentHash,
                            nodeIndex = bestNodeIndex
                        });
                        Intent currentIntent = new Intent();
                        currentIntent.moveIntent   = (ActionsAvailable)memory[currentHash][bestNodeIndex].moveIntent;
                        currentIntent.actionIntent = (ActionsAvailable)memory[currentHash][bestNodeIndex].actionIntent;
                        GameStateRules.Step(ref gsCopy, currentIntent, id);
                        currentHash = GameStateRules.GetHashCode(ref gsCopy);

                        if (!memory.ContainsKey(currentHash))
                        {
                            memory.TryAdd(currentHash, new NativeList <Node>(54, Allocator.Temp));

                            for (var i = 0; i < (int)ActionsAvailable.NONE + 1; i++)
                            {
                                for (var j = (int)ActionsAvailable.NONE; j < (int)ActionsAvailable.BLOCK + 1; j++)
                                {
                                    memory[currentHash]
                                    .Add(new Node
                                    {
                                        moveIntent   = i,
                                        actionIntent = j,
                                        nc           = 0,
                                        npc          = 0,
                                        rc           = 0
                                    });
                                }
                            }
                        }
                    }

                    //sumScore[0] = countSelect;
                    //EXPAND
                    if (!gsCopy.EndOfGame)
                    {
                        var unexploredActions = new NativeList <int>(Allocator.Temp);

                        for (var i = 0; i < memory[currentHash].Length; i++)
                        {
                            if (memory[currentHash][i].nc == 0)
                            {
                                unexploredActions.Add(i);
                                //sumScore[i] = i;
                            }
                        }


                        var chosenNodeIndex = agent.rdm.NextInt(0, unexploredActions.Length);

                        selectedNodes.Add(new SelectedNodeInfo
                        {
                            hash      = currentHash,
                            nodeIndex = unexploredActions[chosenNodeIndex]
                        });

                        Intent currentIntent = new Intent();
                        currentIntent.moveIntent   = (ActionsAvailable)memory[currentHash][unexploredActions[chosenNodeIndex]].moveIntent;
                        currentIntent.actionIntent = (ActionsAvailable)memory[currentHash][unexploredActions[chosenNodeIndex]].actionIntent;

                        /*sumScore[0] = (int)currentIntent.moveIntent;
                         * sumScore[1] = (int)currentIntent.actionIntent;*/
                        GameStateRules.Step(ref gsCopy, currentIntent, id);

                        currentHash = GameStateRules.GetHashCode(ref gsCopy);

                        if (!memory.ContainsKey(currentHash))
                        {
                            memory.TryAdd(currentHash, new NativeList <Node>(54, Allocator.Temp));

                            for (var i = 0; i < (int)ActionsAvailable.NONE + 1; i++)
                            {
                                for (var j = (int)ActionsAvailable.NONE; j < (int)ActionsAvailable.BLOCK + 1; j++)
                                {
                                    memory[currentHash]
                                    .Add(new Node
                                    {
                                        moveIntent   = i,
                                        actionIntent = j,
                                        nc           = 0,
                                        npc          = 0,
                                        rc           = 0
                                    });
                                }
                            }
                        }
                    }


                    //SIMULATE
                    currentTime = gsCopy.currentGameStep;
                    Intent currentIntent1 = new Intent();
                    currentIntent1.moveIntent   = ActionsAvailable.NONE;
                    currentIntent1.actionIntent = ActionsAvailable.NONE;
                    while (!gsCopy.EndOfGame && gsCopy.currentGameStep - 100 <= currentTime)
                    {
                        var chosenActionIndex  = agent.rdm.NextInt(0, 9);
                        var chosenActionIndex1 = agent.rdm.NextInt(9, 13);


                        //sumScore[0] = chosenActionIndex;

                        currentIntent1.moveIntent =
                            (ActionsAvailable)chosenActionIndex;
                        currentIntent1.actionIntent =
                            (ActionsAvailable)chosenActionIndex1;

                        GameStateRules.Step(ref gsCopy, currentIntent1, id);
                    }

                    //BACKPROPAGATE
                    //sumScore[0] = selectedNodes.Length;
                    for (var i = 0; i < selectedNodes.Length; i++)
                    {
                        var list = memory[selectedNodes[i].hash];
                        var node = list[selectedNodes[i].nodeIndex];

                        node.rc += gsCopy.players[id].PlayerScore;
                        node.nc += 1;

                        list[selectedNodes[i].nodeIndex] = node;

                        memory[selectedNodes[i].hash] = list;
                    }
                }
                //sumScore[0] = memory[rootHash].Length;

                for (var i = 0; i < memory[rootHash].Length; i++)
                {
                    sumScore[i] = memory[rootHash][i].nc;
                }
            }