public void Execute(int index) { var epochs = 100; var agent = rdmAgent; var gsCopy = Rules.Clone(ref gs); for (var n = 0; n < epochs; n++) { Rules.CopyTo(ref gs, ref gsCopy); Rules.Step(ref gameParameters, ref gsCopy, availableActions[index], 0); //Rules.Step(ref gameParameters, ref gsCopy, availableActions[index], 0); //agent.Act(ref gsCopy, availableActions[0], 0), //agent.Act(ref gsCopy, availableActions, 1)); var currentDepth = 0; var maxIteration = 200; while (!gsCopy.players[0].isGameOver || !gsCopy.players[1].isGameOver) { Rules.Step(ref gameParameters, ref gsCopy, agent.Act(ref gsCopy, availableActions, 0), agent.Act(ref gsCopy, availableActions, 1)); currentDepth++; if (currentDepth > maxIteration) { break; } } summedScores[index] += gsCopy.players[playerId].score; gsCopy.projectiles.Dispose(); gsCopy.asteroids.Dispose(); } }
public void Execute() { var iterations = 5; int indexI = (int) ActionsAvailable.NONE; int indexJ = (int) ActionsAvailable.NONE; var gsCopy = GameStateRules.Clone(ref gs); int summedScore = gsCopy.players[id].PlayerScore; for (int i = 0; i <= (int) ActionsAvailable.NONE; i++) { getIntent.moveIntent = (ActionsAvailable) i; for (int j = (int) ActionsAvailable.NONE; j <= (int) ActionsAvailable.BLOCK; j++) { getIntent.actionIntent = (ActionsAvailable) j; for (int k = 0; k < iterations; k++) { GameStateRules.CopyTo(ref gs, ref gsCopy); GameStateRules.Step(ref gsCopy, getIntent, id); var nbLoop = 100; while (nbLoop != 0) { GameStateRules.Step(ref gsCopy, rdmAgent.Act(ref gsCopy, id), id); nbLoop--; } if (gsCopy.players[id].PlayerScore > summedScore) { summedScore = gsCopy.players[id].PlayerScore; indexI = i; indexJ = j; } } } } intent[0] = (ActionsAvailable) indexI; intent[1] = (ActionsAvailable) indexJ; }
public void Execute() { var epochs = 5; var agent = rdmAgent; var gsCopy = Rules.Clone(ref gs); var rootHash = Rules.GetHashCode(ref gsCopy, 0); // CREATION DE LA MEMOIRE (Arbre) var memory = new NativeHashMap <long, NativeList <NodeMCTS> >(360, Allocator.Temp); memory.TryAdd(rootHash, new NativeList <NodeMCTS>(availableActions.Length, Allocator.Temp)); for (var i = 0; i < availableActions.Length; i++) { memory[rootHash] .Add(new NodeMCTS { action = availableActions[i], nc = 0, npc = 0, rc = 0 }); } for (var n = 0; n < epochs; n++) { Rules.CopyTo(ref gs, ref gsCopy); var currentHash = rootHash; var selectedNodes = new NativeList <SelectedNodeInfo>(Allocator.Temp); var it = 0; //SELECT while (!gsCopy.players[0].isGameOver && it < 10) { var hasUnexploredNodes = false; it++; for (var i = 0; i < memory[currentHash].Length; i++) { if (memory[currentHash][i].nc == 0) { hasUnexploredNodes = true; break; } } if (hasUnexploredNodes) { break; } var bestNodeIndex = -1; var bestNodeScore = float.MinValue; for (var i = 0; i < memory[currentHash].Length; i++) { var list = memory[currentHash]; var node = list[i]; node.npc += 1; list[i] = node; memory[currentHash] = list; var score = (float)memory[currentHash][i].rc / memory[currentHash][i].nc + math.sqrt(2 * math.log(memory[currentHash][i].npc) / memory[currentHash][i].nc); if (score >= bestNodeScore) { bestNodeIndex = i; bestNodeScore = score; } } selectedNodes.Add(new SelectedNodeInfo { hash = currentHash, nodeIndex = bestNodeIndex }); Rules.Step(ref gameParameters, ref gsCopy, memory[currentHash][bestNodeIndex].action, availableActions[7]); currentHash = Rules.GetHashCode(ref gsCopy, 0); if (!memory.ContainsKey(currentHash)) { memory.TryAdd(currentHash, new NativeList <NodeMCTS>(availableActions.Length, Allocator.Temp)); for (var i = 0; i < availableActions.Length; i++) { memory[currentHash] .Add(new NodeMCTS { action = availableActions[i], nc = 0, npc = 0, rc = 0 }); } } } //EXPAND if (!gsCopy.players[0].isGameOver || !gsCopy.players[1].isGameOver) { var unexploredActions = new NativeList <int>(Allocator.Temp); for (var i = 0; i < memory[currentHash].Length; i++) { if (memory[currentHash][i].nc == 0) { unexploredActions.Add(i); } } var chosenNodeIndex = agent.rdm.NextInt(0, unexploredActions.Length); selectedNodes.Add(new SelectedNodeInfo { hash = currentHash, nodeIndex = unexploredActions[chosenNodeIndex] }); Rules.Step(ref gameParameters, ref gsCopy, memory[currentHash][unexploredActions[chosenNodeIndex]].action, availableActions[7]); currentHash = Rules.GetHashCode(ref gsCopy, 0); if (!memory.ContainsKey(currentHash)) { memory.TryAdd(currentHash, new NativeList <NodeMCTS>(availableActions.Length, Allocator.Temp)); for (var i = 0; i < availableActions.Length; i++) { memory[currentHash] .Add(new NodeMCTS { action = availableActions[i], nc = 0, npc = 0, rc = 0 }); } } } //SIMULATE while (!gsCopy.players[0].isGameOver || !gsCopy.players[1].isGameOver && it < 10) { var chosenActionIndex = agent.rdm.NextInt(0, availableActions.Length); Rules.Step(ref gameParameters, ref gsCopy, (ActionsTypes)chosenActionIndex, availableActions[7]); it++; } //BACKPROPAGATE for (var i = 0; i < selectedNodes.Length; i++) { var list = memory[selectedNodes[i].hash]; var node = list[selectedNodes[i].nodeIndex]; //node.rc += gsCopy.score; node.nc += 1; list[selectedNodes[i].nodeIndex] = node; memory[selectedNodes[i].hash] = list; } } for (var i = 0; i < memory[rootHash].Length; i++) { summedScores[i] = memory[rootHash][i].nc; } }
public Intent Act(ref GameStateData gs, int id) { MCTSTreeNode parentNode = new MCTSTreeNode(); parentNode.nodeChoosingValue = -1000f; parentNode.nSelect = 1; parentNode.depth = 0; int iterations = 300; parentNode.childNodes = new List <MCTSTreeNode>(); for (int i = 0; i <= (int)ActionsAvailable.NONE; i++) { for (int j = (int)ActionsAvailable.NONE; j <= (int)ActionsAvailable.BLOCK; j++) { parentNode.childNodes.Add(new MCTSTreeNode()); parentNode.actions = new List <ActionsAvailable>(); parentNode.actions.Add(ActionsAvailable.NONE); parentNode.actions.Add(ActionsAvailable.NONE); var parentNodeChildNode = parentNode.childNodes[parentNode.childNodes.Count - 1]; parentNodeChildNode.actions = new List <ActionsAvailable>(); parentNodeChildNode.actions.Add((ActionsAvailable)i); parentNodeChildNode.actions.Add((ActionsAvailable)j); var gsCopy = GameStateRules.Clone(ref gs); Intent currentIntents = new Intent(); currentIntents.moveIntent = (ActionsAvailable)i; currentIntents.actionIntent = (ActionsAvailable)j; Debug.Log("damn" + currentIntents.moveIntent + " " + currentIntents.actionIntent); GameStateRules.Step(ref gsCopy, currentIntents, id); parentNodeChildNode.currentGS = GameStateRules.Clone(ref gsCopy); parentNodeChildNode.sumScore = parentNode.sumScore + gsCopy.players[id].PlayerScore; parentNodeChildNode.nSelect = 1; parentNodeChildNode.depth = 1; parentNodeChildNode.parentNode = new List <MCTSTreeNode>(); parentNodeChildNode.parentNode.Add(parentNode); parentNodeChildNode.nodeChoosingValue = (float)parentNodeChildNode.sumScore / parentNodeChildNode.nSelect + math.sqrt(2) * math.sqrt(math.log(parentNode.nSelect) / parentNodeChildNode.nSelect); parentNode.childNodes[parentNode.childNodes.Count - 1] = parentNodeChildNode; actions.Clear(); } } for (int i = 0; i < iterations; i++) { ref MCTSTreeNode nodeMaxChoosingValue = ref GetMaxChoosingValue(ref parentNode, ref parentNode); nodeMaxChoosingValue.nSelect++; if (nodeMaxChoosingValue.childNodes == null) { nodeMaxChoosingValue.childNodes = new List <MCTSTreeNode>(); } nodeMaxChoosingValue.childNodes.Add(new MCTSTreeNode()); MCTSTreeNode currentNode = nodeMaxChoosingValue.childNodes[nodeMaxChoosingValue.childNodes.Count - 1]; currentNode.parentNode = new List <MCTSTreeNode>(); currentNode.parentNode.Add(nodeMaxChoosingValue); currentNode.depth = parentNode.depth++; currentNode.currentGS = GameStateRules.Clone(ref nodeMaxChoosingValue.currentGS); currentNode.actions = new List <ActionsAvailable>(); currentNode.actions.Add((ActionsAvailable)(nodeMaxChoosingValue.childNodes.Count / ((int)ActionsAvailable.NONE))); currentNode.actions.Add((ActionsAvailable)(nodeMaxChoosingValue.childNodes.Count % (int)ActionsAvailable.NONE + (int)ActionsAvailable.MOVE_BACK_LEFT)); Intent currentIntents = new Intent(); currentIntents.moveIntent = currentNode.actions[0]; currentIntents.actionIntent = currentNode.actions[1]; Debug.Log(nodeMaxChoosingValue.nSelect + " " + nodeMaxChoosingValue.depth + " " + nodeMaxChoosingValue.childNodes.Count + " " + nodeMaxChoosingValue.actions[0] + " " + nodeMaxChoosingValue.actions[1] + " " + currentIntents.moveIntent + " " + currentIntents.actionIntent); GameStateRules.Step(ref currentNode.currentGS, currentIntents, id); currentNode.sumScore = nodeMaxChoosingValue.sumScore + currentNode.currentGS.players[id].PlayerScore; currentNode.nSelect = 1; currentNode.nodeChoosingValue = (float)currentNode.sumScore / currentNode.nSelect + math.sqrt(2) * math.sqrt(math.log(nodeMaxChoosingValue.nSelect) / currentNode.nSelect); //int currentDepth = currentNode.depth; MCTSTreeNode rollbackNode = currentNode.parentNode[0]; while (rollbackNode.depth > 0) { rollbackNode.nodeChoosingValue = (float)rollbackNode.sumScore / rollbackNode.nSelect + math.sqrt(2) * math.sqrt(math.log(rollbackNode.parentNode[0].nSelect) / rollbackNode.nSelect); rollbackNode = rollbackNode.parentNode[0]; //currentDepth = rollbackNode.depth; } }
public void Execute() { //Debug.Log("Init"); var iterations = 100; var agent = rdmAgent; var gsCopy = GameStateRules.Clone(ref gs); var rootHash = GameStateRules.GetHashCode(ref gsCopy); //Debug.Log("EndInit"); var memory = new NativeHashMap <long, NativeList <Node> >(10000000, Allocator.Temp); //Debug.Log("EndInit2"); memory.TryAdd(rootHash, new NativeList <Node>(54, Allocator.Temp)); for (var i = 0; i < (int)ActionsAvailable.NONE + 1; i++) { for (var j = (int)ActionsAvailable.NONE; j < (int)ActionsAvailable.BLOCK + 1; j++) { memory[rootHash] .Add(new Node { moveIntent = i, actionIntent = j, nc = 0, npc = 0, rc = 0 }); } } for (int i = 0; i < memory[rootHash].Length; i++) { sumScore[i] = 0; } for (var n = 0; n < iterations; n++) { GameStateRules.CopyTo(ref gs, ref gsCopy); var currentHash = rootHash; var selectedNodes = new NativeList <SelectedNodeInfo>(Allocator.Temp); var currentTime = gsCopy.currentGameStep; //var countSelect = 0; //SELECT while (!gsCopy.EndOfGame && gsCopy.currentGameStep - 100 <= currentTime) { //countSelect++; var hasUnexploredNodes = false; for (var i = 0; i < memory[currentHash].Length; i++) { if (memory[currentHash][i].nc == 0) { hasUnexploredNodes = true; break; } } if (hasUnexploredNodes) { break; } //sumScore[1] = gsCopy.currentGameStep; //sumScore[2] = currentTime; var bestNodeIndex = -1; var bestNodeScore = float.MinValue; for (var i = 0; i < memory[currentHash].Length; i++) { var list = memory[currentHash]; var node = list[i]; node.npc += 1; list[i] = node; memory[currentHash] = list; var score = (float)memory[currentHash][i].rc / memory[currentHash][i].nc + math.sqrt(2 * math.log(memory[currentHash][i].npc) / memory[currentHash][i].nc); if (score >= bestNodeScore) { bestNodeIndex = i; bestNodeScore = score; } } selectedNodes.Add(new SelectedNodeInfo { hash = currentHash, nodeIndex = bestNodeIndex }); Intent currentIntent = new Intent(); currentIntent.moveIntent = (ActionsAvailable)memory[currentHash][bestNodeIndex].moveIntent; currentIntent.actionIntent = (ActionsAvailable)memory[currentHash][bestNodeIndex].actionIntent; GameStateRules.Step(ref gsCopy, currentIntent, id); currentHash = GameStateRules.GetHashCode(ref gsCopy); if (!memory.ContainsKey(currentHash)) { memory.TryAdd(currentHash, new NativeList <Node>(54, Allocator.Temp)); for (var i = 0; i < (int)ActionsAvailable.NONE + 1; i++) { for (var j = (int)ActionsAvailable.NONE; j < (int)ActionsAvailable.BLOCK + 1; j++) { memory[currentHash] .Add(new Node { moveIntent = i, actionIntent = j, nc = 0, npc = 0, rc = 0 }); } } } } //sumScore[0] = countSelect; //EXPAND if (!gsCopy.EndOfGame) { var unexploredActions = new NativeList <int>(Allocator.Temp); for (var i = 0; i < memory[currentHash].Length; i++) { if (memory[currentHash][i].nc == 0) { unexploredActions.Add(i); //sumScore[i] = i; } } var chosenNodeIndex = agent.rdm.NextInt(0, unexploredActions.Length); selectedNodes.Add(new SelectedNodeInfo { hash = currentHash, nodeIndex = unexploredActions[chosenNodeIndex] }); Intent currentIntent = new Intent(); currentIntent.moveIntent = (ActionsAvailable)memory[currentHash][unexploredActions[chosenNodeIndex]].moveIntent; currentIntent.actionIntent = (ActionsAvailable)memory[currentHash][unexploredActions[chosenNodeIndex]].actionIntent; /*sumScore[0] = (int)currentIntent.moveIntent; * sumScore[1] = (int)currentIntent.actionIntent;*/ GameStateRules.Step(ref gsCopy, currentIntent, id); currentHash = GameStateRules.GetHashCode(ref gsCopy); if (!memory.ContainsKey(currentHash)) { memory.TryAdd(currentHash, new NativeList <Node>(54, Allocator.Temp)); for (var i = 0; i < (int)ActionsAvailable.NONE + 1; i++) { for (var j = (int)ActionsAvailable.NONE; j < (int)ActionsAvailable.BLOCK + 1; j++) { memory[currentHash] .Add(new Node { moveIntent = i, actionIntent = j, nc = 0, npc = 0, rc = 0 }); } } } } //SIMULATE currentTime = gsCopy.currentGameStep; Intent currentIntent1 = new Intent(); currentIntent1.moveIntent = ActionsAvailable.NONE; currentIntent1.actionIntent = ActionsAvailable.NONE; while (!gsCopy.EndOfGame && gsCopy.currentGameStep - 100 <= currentTime) { var chosenActionIndex = agent.rdm.NextInt(0, 9); var chosenActionIndex1 = agent.rdm.NextInt(9, 13); //sumScore[0] = chosenActionIndex; currentIntent1.moveIntent = (ActionsAvailable)chosenActionIndex; currentIntent1.actionIntent = (ActionsAvailable)chosenActionIndex1; GameStateRules.Step(ref gsCopy, currentIntent1, id); } //BACKPROPAGATE //sumScore[0] = selectedNodes.Length; for (var i = 0; i < selectedNodes.Length; i++) { var list = memory[selectedNodes[i].hash]; var node = list[selectedNodes[i].nodeIndex]; node.rc += gsCopy.players[id].PlayerScore; node.nc += 1; list[selectedNodes[i].nodeIndex] = node; memory[selectedNodes[i].hash] = list; } } //sumScore[0] = memory[rootHash].Length; for (var i = 0; i < memory[rootHash].Length; i++) { sumScore[i] = memory[rootHash][i].nc; } }