//Main MCTS algortim protected override void mainAlgorithm(AIState initialState) { //Make the intial children initialState.generateChildren(); //Loop through all of them foreach (var child in initialState.children) { //If any of them are winning moves if (child.getWinner() == child.playerIndex) { //Just make that move and save on all of the comuptation next = child; done = true; return; } } //If no childern are generated if (initialState.children.Count == 0) { //Report this error and return. Console.WriteLine("Monte: Error: State supplied has no children."); next = null; done = true; return; } //Start a count int count = 0; //Whilst time allows while (count < numbSimulations) { //Increment the count count++; //Start at the inital state AIState bestNode = initialState; //And loop through it's child while (bestNode.children.Count > 0) { //Set the scores as a base line double bestScore = -1; int bestIndex = -1; //Loop thorugh all of the children for (int i = 0; i < bestNode.children.Count; i++) { //win score is basically just wins/games unless no games have been played, then it is 1 double wins = bestNode.children[i].wins; double games = bestNode.children[i].totGames; double score = (games > 0) ? wins / games : 1.0; //UBT (Upper Confidence Bound 1 applied to trees) function balances explore vs exploit. //Because we want to change things the constant is configurable. double exploreRating = exploreWeight * Math.Sqrt((2 * Math.Log(initialState.totGames + 1) / (games + 0.1))); //Total score is win score + explore socre double totalScore = score + exploreRating; //If the score is better update if (!(totalScore > bestScore)) { continue; } bestScore = totalScore; bestIndex = i; } //Set the best child for the next iteration bestNode = bestNode.children[bestIndex]; } //Finally roll out this node. rollout(bestNode); } //Onces all the simulations have taken place we select the best move... int mostGames = -1; int bestMove = -1; //Loop through all children for (int i = 0; i < initialState.children.Count; i++) { //Find the one that was played the most (this is the best move as we are selecting the robust child) int games = initialState.children[i].totGames; if (games >= mostGames) { mostGames = games; bestMove = i; } } //Set that child to the next move next = initialState.children[bestMove]; //And we are done done = true; }
//Rollout function (plays random moves till it hits a termination) protected override void rollout(AIState rolloutStart) { //If the rollout start is a terminal state int rolloutStartResult = rolloutStart.getWinner(); if (rolloutStartResult >= 0) { //Add a win is it is a win, or a loss is a loss or otherwise a draw if (rolloutStartResult == rolloutStart.playerIndex) { rolloutStart.addWin(); } else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2) { rolloutStart.addLoss(); } else { rolloutStart.addDraw(drawScore); } return; } bool terminalStateFound = false; //Get the children List <AIState> children = rolloutStart.generateChildren(); int loopCount = 0; while (!terminalStateFound) { //Loop through till a terminal state is found loopCount++; //If max roll out is hit or no childern were generated if (loopCount >= maxRollout || children.Count == 0) { //Record a draw rolloutStart.addDraw(drawScore); break; } //Get a random child index int index = randGen.Next(children.Count); //and see if that node is terminal int endResult = children[index].getWinner(); if (endResult >= 0) { terminalStateFound = true; if (endResult == 2) { rolloutStart.addDraw(drawScore); } //If it is a win add a win else if (endResult == rolloutStart.playerIndex) { rolloutStart.addWin(); } //Else add a loss else { rolloutStart.addLoss(); } } else { //Otherwise select that nodes as the childern and continue children = children [index].generateChildren(); } } //Reset the children as these are not 'real' children but just ones for the roll out. foreach (AIState child in rolloutStart.children) { child.children = new List <AIState>(); } }
//Simulates a single game game private void playForward(StateCreator stateCreator, List <int[]> inputs, List <double[, ]> hiddenLayers, List <double> results, List <double> rewards, List <int> playerIndxs) { //Makes a new stating state AIState currentState = stateCreator(); //Loop count (for detecting drawn games) int count = 0; //While there is no winner while (currentState.getWinner() < 0) { //Increment the move count count++; //And generate all possible moves from this state. List <AIState> children = currentState.generateChildren(); //If we have hit the maximum number of moves or there are no children generated if (count == maxForwardIters || children.Count == 0) { //It is a draw so work back through the moves while (currentState.parent != null) { //And save the data inputs.Add(preprocess(currentState)); results.Add(currentState.stateScore.Value); double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex); hiddenLayers.Add(hiddenLayer); playerIndxs.Add(currentState.playerIndex); //Adding the reward to the user defined reward for a draw rewards.Add(drawReward); //And set the current state as the parent currentState = currentState.parent; } //Once done we are done with this game return; } //Evaluate all moves foreach (AIState child in children) { child.stateScore = evaluate(child); } //and then sort them children = AIState.mergeSort(children); //Move selection: //Default to the best know move is one is not selected. int selectedChild = children.Count - 1; //Loop backwards through the children for (int i = children.Count - 1; i >= 0; i--) { double randNum = randGen.NextDouble(); //Moves are selected with a probablity = thier score but with a confidence threshold //This forces some exploration even when the network has a high confidence on the moe double numberToBeat = children[i].stateScore > confThreshold ? confThreshold : children[i].stateScore.Value; if (randNum < numberToBeat || children[i].getWinner() == currentState.playerIndex) { selectedChild = i; break; } } //Once we have selected a move find out if it is a terminal state int endResult = children[selectedChild].getWinner(); if (endResult >= 0) { //if it is we have reased the end of the game //If it is winning add a win (which will add a loss to it's parent etc.) if (endResult == children[selectedChild].playerIndex) { children[selectedChild].addWin(); } //Else add a loss else { children[selectedChild].addLoss(); } break; } //Otherwise set the current state to that move a repeat. currentState = children[selectedChild]; } //Once the game has ended and score have set set etc. store all of the data (for use in backprop) while (currentState.parent != null) { inputs.Add(preprocess(currentState)); results.Add(currentState.stateScore.Value); rewards.Add(currentState.wins > currentState.losses ? 1 : 0); double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex); hiddenLayers.Add(hiddenLayer); playerIndxs.Add(currentState.playerIndex); currentState = currentState.parent; } }
//Main algortim which is implemented by the various agents. protected abstract void mainAlgorithm(AIState initalState);
//Rollout function (plays random moves till it hits a termination) protected override void rollout(AIState rolloutStart) { //If the rollout start is a terminal state int rolloutStartResult = rolloutStart.getWinner(); if (rolloutStartResult >= 0) { //Add a win is it is a win, or a loss is a loss or otherwise a draw if (rolloutStartResult == rolloutStart.playerIndex) { rolloutStart.addWin(); } else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2) { rolloutStart.addLoss(); } else { rolloutStart.addDraw(drawScore); } return; } bool terminalStateFound = false; //Get the children List <AIState> children = rolloutStart.generateChildren(); int loopCount = 0; while (!terminalStateFound) { //Loop through till a terminal state is found loopCount++; //If max roll out is hit or no childern were generated if (loopCount >= maxRollout || children.Count == 0) { //record a draw rolloutStart.addDraw(drawScore); break; } //Default is the end of the array (because that will be the best move in a sorted list) int selectedChild = children.Count - 1; //epsilon greedy move selection. if (randGen.NextDouble() < epsilon) { //Sort the array (we have all ready selected the most move indx above foreach (AIState child in children) { if (child.stateScore == null) { child.stateScore = model.evaluate(child); } } children = AIState.mergeSort(children); } else { //Just select a random move selectedChild = randGen.Next(children.Count); } //and see if that node is terminal int endResult = children[selectedChild].getWinner(); if (endResult >= 0) { terminalStateFound = true; if (endResult == 2) { rolloutStart.addDraw(drawScore); } //If it is a win add a win else if (endResult == rolloutStart.playerIndex) { rolloutStart.addWin(); } //Else add a loss else { rolloutStart.addLoss(); } } else { //Otherwise select that nodes as the childern and continue children = children [selectedChild].generateChildren(); } } //Reset the children as these are not 'real' children but just ones for the roll out. foreach (AIState child in rolloutStart.children) { child.treeNode = true; } }
//Rollout function (to be written by the implementing agent) protected abstract void rollout(AIState rolloutStart);