示例#1
0
        //Main MCTS algortim
        protected override void mainAlgorithm(AIState initialState)
        {
            //Make the intial children
            initialState.generateChildren();
            //Loop through all of them
            foreach (var child in initialState.children)
            {
                //If any of them are winning moves
                if (child.getWinner() == child.playerIndex)
                {
                    //Just make that move and save on all of the comuptation
                    next = child;
                    done = true;
                    return;
                }
            }
            //If no childern are generated
            if (initialState.children.Count == 0)
            {
                //Report this error and return.
                Console.WriteLine("Monte: Error: State supplied has no children.");
                next = null;
                done = true;
                return;
            }
            //Start a count
            int count = 0;

            //Whilst time allows
            while (count < numbSimulations)
            {
                //Increment the count
                count++;
                //Start at the inital state
                AIState bestNode = initialState;
                //And loop through it's child
                while (bestNode.children.Count > 0)
                {
                    //Set the scores as a base line
                    double bestScore = -1;
                    int    bestIndex = -1;
                    //Loop thorugh all of the children
                    for (int i = 0; i < bestNode.children.Count; i++)
                    {
                        //win score is basically just wins/games unless no games have been played, then it is 1
                        double wins  = bestNode.children[i].wins;
                        double games = bestNode.children[i].totGames;
                        double score = (games > 0) ? wins / games : 1.0;

                        //UBT (Upper Confidence Bound 1 applied to trees) function balances explore vs exploit.
                        //Because we want to change things the constant is configurable.
                        double exploreRating = exploreWeight * Math.Sqrt((2 * Math.Log(initialState.totGames + 1) / (games + 0.1)));
                        //Total score is win score + explore socre
                        double totalScore = score + exploreRating;
                        //If the score is better update
                        if (!(totalScore > bestScore))
                        {
                            continue;
                        }
                        bestScore = totalScore;
                        bestIndex = i;
                    }
                    //Set the best child for the next iteration
                    bestNode = bestNode.children[bestIndex];
                }
                //Finally roll out this node.
                rollout(bestNode);
            }

            //Onces all the simulations have taken place we select the best move...
            int mostGames = -1;
            int bestMove  = -1;

            //Loop through all children
            for (int i = 0; i < initialState.children.Count; i++)
            {
                //Find the one that was played the most (this is the best move as we are selecting the robust child)
                int games = initialState.children[i].totGames;
                if (games >= mostGames)
                {
                    mostGames = games;
                    bestMove  = i;
                }
            }
            //Set that child to the next move
            next = initialState.children[bestMove];
            //And we are done
            done = true;
        }
示例#2
0
        //Rollout function (plays random moves till it hits a termination)
        protected override void rollout(AIState rolloutStart)
        {
            //If the rollout start is a terminal state
            int rolloutStartResult = rolloutStart.getWinner();

            if (rolloutStartResult >= 0)
            {
                //Add a win is it is a win, or a loss is a loss or otherwise a draw
                if (rolloutStartResult == rolloutStart.playerIndex)
                {
                    rolloutStart.addWin();
                }
                else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2)
                {
                    rolloutStart.addLoss();
                }
                else
                {
                    rolloutStart.addDraw(drawScore);
                }
                return;
            }
            bool terminalStateFound = false;
            //Get the children
            List <AIState> children = rolloutStart.generateChildren();

            int loopCount = 0;

            while (!terminalStateFound)
            {
                //Loop through till a terminal state is found
                loopCount++;
                //If max roll out is hit or no childern were generated
                if (loopCount >= maxRollout || children.Count == 0)
                {
                    //Record a draw
                    rolloutStart.addDraw(drawScore);
                    break;
                }
                //Get a random child index
                int index = randGen.Next(children.Count);
                //and see if that node is terminal
                int endResult = children[index].getWinner();
                if (endResult >= 0)
                {
                    terminalStateFound = true;
                    if (endResult == 2)
                    {
                        rolloutStart.addDraw(drawScore);
                    }
                    //If it is a win add a win
                    else if (endResult == rolloutStart.playerIndex)
                    {
                        rolloutStart.addWin();
                    }
                    //Else add a loss
                    else
                    {
                        rolloutStart.addLoss();
                    }
                }
                else
                {
                    //Otherwise select that nodes as the childern and continue
                    children = children [index].generateChildren();
                }
            }
            //Reset the children as these are not 'real' children but just ones for the roll out.
            foreach (AIState child in rolloutStart.children)
            {
                child.children = new List <AIState>();
            }
        }
示例#3
0
文件: Model.cs 项目: Peng2017/Monte
        //Simulates a single game game
        private void playForward(StateCreator stateCreator, List <int[]> inputs, List <double[, ]> hiddenLayers, List <double> results, List <double> rewards, List <int> playerIndxs)
        {
            //Makes a new stating state
            AIState currentState = stateCreator();
            //Loop count (for detecting drawn games)
            int count = 0;

            //While there is no winner
            while (currentState.getWinner() < 0)
            {
                //Increment the move count
                count++;
                //And generate all possible moves from this state.
                List <AIState> children = currentState.generateChildren();
                //If we have hit the maximum number of moves or there are no children generated
                if (count == maxForwardIters || children.Count == 0)
                {
                    //It is a draw so work back through the moves
                    while (currentState.parent != null)
                    {
                        //And save the data
                        inputs.Add(preprocess(currentState));
                        results.Add(currentState.stateScore.Value);
                        double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex);
                        hiddenLayers.Add(hiddenLayer);
                        playerIndxs.Add(currentState.playerIndex);
                        //Adding the reward to the user defined reward for a draw
                        rewards.Add(drawReward);
                        //And set the current state as the parent
                        currentState = currentState.parent;
                    }
                    //Once done we are done with this game
                    return;
                }

                //Evaluate all moves
                foreach (AIState child in children)
                {
                    child.stateScore = evaluate(child);
                }
                //and then sort them
                children = AIState.mergeSort(children);

                //Move selection:
                //Default to the best know move is one is not selected.
                int selectedChild = children.Count - 1;
                //Loop backwards through the children
                for (int i = children.Count - 1; i >= 0; i--)
                {
                    double randNum = randGen.NextDouble();
                    //Moves are selected with a probablity = thier score but with a confidence threshold
                    //This forces some exploration even when the network has a high confidence on the moe
                    double numberToBeat = children[i].stateScore > confThreshold ? confThreshold : children[i].stateScore.Value;
                    if (randNum < numberToBeat || children[i].getWinner() == currentState.playerIndex)
                    {
                        selectedChild = i;
                        break;
                    }
                }
                //Once we have selected a move find out if it is a terminal state
                int endResult = children[selectedChild].getWinner();
                if (endResult >= 0)
                {
                    //if it is we have reased the end of the game
                    //If it is winning add a win (which will add a loss to it's parent etc.)
                    if (endResult == children[selectedChild].playerIndex)
                    {
                        children[selectedChild].addWin();
                    }
                    //Else add a loss
                    else
                    {
                        children[selectedChild].addLoss();
                    }
                    break;
                }
                //Otherwise set the current state to that move a repeat.
                currentState = children[selectedChild];
            }

            //Once the game has ended and score have set set etc. store all of the data (for use in backprop)
            while (currentState.parent != null)
            {
                inputs.Add(preprocess(currentState));
                results.Add(currentState.stateScore.Value);
                rewards.Add(currentState.wins > currentState.losses ? 1 : 0);
                double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex);
                hiddenLayers.Add(hiddenLayer);
                playerIndxs.Add(currentState.playerIndex);
                currentState = currentState.parent;
            }
        }
示例#4
0
文件: AIAgent.cs 项目: Peng2017/Monte
 //Main algortim which is implemented by the various agents.
 protected abstract void mainAlgorithm(AIState initalState);
示例#5
0
        //Rollout function (plays random moves till it hits a termination)
        protected override void rollout(AIState rolloutStart)
        {
            //If the rollout start is a terminal state
            int rolloutStartResult = rolloutStart.getWinner();

            if (rolloutStartResult >= 0)
            {
                //Add a win is it is a win, or a loss is a loss or otherwise a draw
                if (rolloutStartResult == rolloutStart.playerIndex)
                {
                    rolloutStart.addWin();
                }
                else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2)
                {
                    rolloutStart.addLoss();
                }
                else
                {
                    rolloutStart.addDraw(drawScore);
                }
                return;
            }
            bool terminalStateFound = false;
            //Get the children
            List <AIState> children = rolloutStart.generateChildren();

            int loopCount = 0;

            while (!terminalStateFound)
            {
                //Loop through till a terminal state is found
                loopCount++;
                //If max roll out is hit or no childern were generated
                if (loopCount >= maxRollout || children.Count == 0)
                {
                    //record a draw
                    rolloutStart.addDraw(drawScore);
                    break;
                }
                //Default is the end of the array (because that will be the best move in a sorted list)
                int selectedChild = children.Count - 1;

                //epsilon greedy move selection.
                if (randGen.NextDouble() < epsilon)
                {
                    //Sort the array (we have all ready selected the most move indx above
                    foreach (AIState child in children)
                    {
                        if (child.stateScore == null)
                        {
                            child.stateScore = model.evaluate(child);
                        }
                    }
                    children = AIState.mergeSort(children);
                }
                else
                {
                    //Just select a random move
                    selectedChild = randGen.Next(children.Count);
                }
                //and see if that node is terminal
                int endResult = children[selectedChild].getWinner();
                if (endResult >= 0)
                {
                    terminalStateFound = true;
                    if (endResult == 2)
                    {
                        rolloutStart.addDraw(drawScore);
                    }
                    //If it is a win add a win
                    else if (endResult == rolloutStart.playerIndex)
                    {
                        rolloutStart.addWin();
                    }
                    //Else add a loss
                    else
                    {
                        rolloutStart.addLoss();
                    }
                }
                else
                {
                    //Otherwise select that nodes as the childern and continue
                    children = children [selectedChild].generateChildren();
                }
            }
            //Reset the children as these are not 'real' children but just ones for the roll out.
            foreach (AIState child in rolloutStart.children)
            {
                child.treeNode = true;
            }
        }
示例#6
0
 //Rollout function (to be written by the implementing agent)
 protected abstract void rollout(AIState rolloutStart);