//Function to prune the children of a state private void prune(AIState initState) { //If we are pruning nothing then return if (pruningFactor == 0) { return; } //Get the children List <AIState> children = initState.children; if (children.Count < stopPruningAt) { return; } //evaluate foreach (AIState state in children) { state.stateScore = model.evaluate(state); } //Sort the children children = AIState.mergeSort(children); //Work out how many nodes to remove int numbNodesToRemove = (int)Math.Floor(children.Count * pruningFactor); //Remove them from 0 onwards (the worse end) children.RemoveRange(0, numbNodesToRemove); //Update the children and set unpruned to false. initState.children = children; initState.unpruned = false; }
protected override void mainAlgorithm(AIState initialState) { //Create the children List <AIState> children = initialState.generateChildren(); //If no childern are generated if (children.Count == 0) { //Report this error and return. Console.WriteLine("Monte Error: State supplied has no children."); next = null; done = true; return; } //Otherwise loop through all the children for (int i = 0; i < children.Count; i++) { //Is the state is a winning state if (children[i].getWinner() == children[i].playerIndex) { //Just set it as the next move (to save computation as it is obviously a good move) next = children[i]; done = true; return; } //Evaluate this move children[i].stateScore = model.evaluate(children[i]); } //If no move wins then sort the moves List <AIState> sortedchildren = AIState.mergeSort(children); //Set the next node as the best child next = sortedchildren[sortedchildren.Count - 1]; //Then we are done done = true; }
//Simulates a single game game private void playForward(StateCreator stateCreator, List <int[]> inputs, List <double[, ]> hiddenLayers, List <double> results, List <double> rewards, List <int> playerIndxs) { //Makes a new stating state AIState currentState = stateCreator(); //Loop count (for detecting drawn games) int count = 0; //While there is no winner while (currentState.getWinner() < 0) { //Increment the move count count++; //And generate all possible moves from this state. List <AIState> children = currentState.generateChildren(); //If we have hit the maximum number of moves or there are no children generated if (count == maxForwardIters || children.Count == 0) { //It is a draw so work back through the moves while (currentState.parent != null) { //And save the data inputs.Add(preprocess(currentState)); results.Add(currentState.stateScore.Value); double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex); hiddenLayers.Add(hiddenLayer); playerIndxs.Add(currentState.playerIndex); //Adding the reward to the user defined reward for a draw rewards.Add(drawReward); //And set the current state as the parent currentState = currentState.parent; } //Once done we are done with this game return; } //Evaluate all moves foreach (AIState child in children) { child.stateScore = evaluate(child); } //and then sort them children = AIState.mergeSort(children); //Move selection: //Default to the best know move is one is not selected. int selectedChild = children.Count - 1; //Loop backwards through the children for (int i = children.Count - 1; i >= 0; i--) { double randNum = randGen.NextDouble(); //Moves are selected with a probablity = thier score but with a confidence threshold //This forces some exploration even when the network has a high confidence on the moe double numberToBeat = children[i].stateScore > confThreshold ? confThreshold : children[i].stateScore.Value; if (randNum < numberToBeat || children[i].getWinner() == currentState.playerIndex) { selectedChild = i; break; } } //Once we have selected a move find out if it is a terminal state int endResult = children[selectedChild].getWinner(); if (endResult >= 0) { //if it is we have reased the end of the game //If it is winning add a win (which will add a loss to it's parent etc.) if (endResult == children[selectedChild].playerIndex) { children[selectedChild].addWin(); } //Else add a loss else { children[selectedChild].addLoss(); } break; } //Otherwise set the current state to that move a repeat. currentState = children[selectedChild]; } //Once the game has ended and score have set set etc. store all of the data (for use in backprop) while (currentState.parent != null) { inputs.Add(preprocess(currentState)); results.Add(currentState.stateScore.Value); rewards.Add(currentState.wins > currentState.losses ? 1 : 0); double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex); hiddenLayers.Add(hiddenLayer); playerIndxs.Add(currentState.playerIndex); currentState = currentState.parent; } }
//Rollout function (plays random moves till it hits a termination) protected override void rollout(AIState rolloutStart) { //If the rollout start is a terminal state int rolloutStartResult = rolloutStart.getWinner(); if (rolloutStartResult >= 0) { //Add a win is it is a win, or a loss is a loss or otherwise a draw if (rolloutStartResult == rolloutStart.playerIndex) { rolloutStart.addWin(); } else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2) { rolloutStart.addLoss(); } else { rolloutStart.addDraw(drawScore); } return; } bool terminalStateFound = false; //Get the children List <AIState> children = rolloutStart.generateChildren(); int loopCount = 0; while (!terminalStateFound) { //Loop through till a terminal state is found loopCount++; //If max roll out is hit or no childern were generated if (loopCount >= maxRollout || children.Count == 0) { //record a draw rolloutStart.addDraw(drawScore); break; } //Default is the end of the array (because that will be the best move in a sorted list) int selectedChild = children.Count - 1; //epsilon greedy move selection. if (randGen.NextDouble() < epsilon) { //Sort the array (we have all ready selected the most move indx above foreach (AIState child in children) { if (child.stateScore == null) { child.stateScore = model.evaluate(child); } } children = AIState.mergeSort(children); } else { //Just select a random move selectedChild = randGen.Next(children.Count); } //and see if that node is terminal int endResult = children[selectedChild].getWinner(); if (endResult >= 0) { terminalStateFound = true; if (endResult == 2) { rolloutStart.addDraw(drawScore); } //If it is a win add a win else if (endResult == rolloutStart.playerIndex) { rolloutStart.addWin(); } //Else add a loss else { rolloutStart.addLoss(); } } else { //Otherwise select that nodes as the childern and continue children = children [selectedChild].generateChildren(); } } //Reset the children as these are not 'real' children but just ones for the roll out. foreach (AIState child in rolloutStart.children) { child.treeNode = true; } }