public void Execute() { var epochs = 10; var agent = rdmAgent; var gsCopy = Rules.Clone(ref gs); if (Playerid == 0) { var rootHash = Rules.GetHashCodeJ1(ref gsCopy); var memory = new NativeHashMap <long, NativeList <Node> >(2048, Allocator.Temp); var possibleChoice = Rules.GetAvailableActions2int(ref gsCopy); memory.TryAdd(rootHash, new NativeList <Node>(availableActionsFree.Length, Allocator.Temp)); for (var i = 0; i < availableActionsFree.Length; i++) { memory[rootHash] .Add(new Node { action = availableActionsFree[i], nc = 0, npc = 0, rc = 0 }); } for (var n = 0; n < epochs; n++) { gsCopy = Rules.Clone(ref gs); var currentHash = rootHash; var selectedNodes = new NativeList <SelectedNodeInfo>(Allocator.Temp); //SELECT while (!gsCopy.isGameOver) { var hasUnexploredNodes = false; for (var i = 0; i < memory[currentHash].Length; i++) { if (memory[currentHash][i].nc == 0) { hasUnexploredNodes = true; break; } } if (hasUnexploredNodes) { break; } var bestNodeIndex = -1; var bestNodeScore = float.MinValue; for (var i = 0; i < memory[currentHash].Length; i++) { var list = memory[currentHash]; var node = list[i]; node.npc += 1; list[i] = node; memory[currentHash] = list; var score = (float)memory[currentHash][i].rc / memory[currentHash][i].nc + math.sqrt(2 * math.log(memory[currentHash][i].npc) / memory[currentHash][i].nc); if (score >= bestNodeScore) { bestNodeIndex = i; bestNodeScore = score; } } selectedNodes.Add(new SelectedNodeInfo { hash = currentHash, nodeIndex = bestNodeIndex }); Rules.Step(ref gsCopy, memory[currentHash][bestNodeIndex].action, 0); currentHash = Rules.GetHashCodeJ1(ref gsCopy); if (!memory.ContainsKey(currentHash)) { memory.TryAdd(currentHash, new NativeList <Node>(availableActionsFree.Length, Allocator.Temp)); for (var i = 0; i < availableActionsFree.Length; i++) { memory[currentHash] .Add(new Node { action = availableActionsFree[i], nc = 0, npc = 0, rc = 0 }); } } } //EXPAND if (!gsCopy.isGameOver) { var unexploredActions = new NativeList <int>(Allocator.Temp); for (var i = 0; i < memory[currentHash].Length; i++) { if (memory[currentHash][i].nc == 0) { unexploredActions.Add(i); } } var chosenNodeIndex = agent.rdm.NextInt(0, unexploredActions.Length); selectedNodes.Add(new SelectedNodeInfo { hash = currentHash, nodeIndex = unexploredActions[chosenNodeIndex] }); Rules.Step(ref gsCopy, memory[currentHash][unexploredActions[chosenNodeIndex]].action, 0); currentHash = Rules.GetHashCodeJ1(ref gsCopy); if (!memory.ContainsKey(currentHash)) { memory.TryAdd(currentHash, new NativeList <Node>(availableActionsFree.Length, Allocator.Temp)); for (var i = 0; i < availableActionsFree.Length; i++) { memory[currentHash] .Add(new Node { action = availableActionsFree[i], nc = 0, npc = 0, rc = 0 }); } } } //SIMULATE while (!gsCopy.isGameOver) { var chosenActionIndex = agent.rdm.NextInt(0, availableActionsFree.Length); Rules.Step(ref gsCopy, chosenActionIndex, 0); } //BACKPROPAGATE for (var i = 0; i < selectedNodes.Length; i++) { var list = memory[selectedNodes[i].hash]; var node = list[selectedNodes[i].nodeIndex]; node.rc += gsCopy.playerScore1; node.nc += 1; list[selectedNodes[i].nodeIndex] = node; memory[selectedNodes[i].hash] = list; } for (var i = 0; i < memory[rootHash].Length; i++) { summedScores[i] = memory[rootHash][i].nc; } } } else if (Playerid == 1) { var rootHash = Rules.GetHashCodeJ2(ref gsCopy); } }
public void Execute(int index) { var epochs = 10; var agent = rdmAgent; if (Playerid == 0) { for (var n = 0; n < epochs; n++) { var gsCopy = Rules.Clone(ref gs); Rules.Step(ref gsCopy, availableActions[index], 0); var currentDepth = 0; while (!gsCopy.isGameOver) { var possibleChoice = Rules.GetAvailableActions1int(ref gsCopy); switch (possibleChoice) { case 0: Rules.Step(ref gsCopy, agent.Act(ref gsCopy, availableActionsFree), 0); break; case 1: Rules.Step(ref gsCopy, agent.Act(ref gsCopy, availableActionsFrozen), 0); break; case 2: Rules.Step(ref gsCopy, agent.Act(ref gsCopy, availableActionsStun), 0); break; } currentDepth++; if (currentDepth > 500) { break; } } summedScores[index] += gsCopy.playerScore1; } } else if (Playerid == 1) { for (var n = 0; n < epochs; n++) { var gsCopy = Rules.Clone(ref gs); Rules.Step(ref gsCopy, 0, availableActions[index]); var currentDepth = 0; while (!gsCopy.isGameOver) { var possibleChoice = Rules.GetAvailableActions2int(ref gsCopy); switch (possibleChoice) { case 0: Rules.Step(ref gsCopy, 0, agent.Act(ref gsCopy, availableActionsFree)); break; case 1: Rules.Step(ref gsCopy, 0, agent.Act(ref gsCopy, availableActionsFrozen)); break; case 2: Rules.Step(ref gsCopy, 0, agent.Act(ref gsCopy, availableActionsStun)); break; } currentDepth++; if (currentDepth > 500) { break; } } summedScores[index] += gsCopy.playerScore2; } } }