Ejemplo n.º 1
0
        //Simulates a single game game
        private void playForward(StateCreator stateCreator, List <int[]> inputs, List <double[, ]> hiddenLayers, List <double> results, List <double> rewards, List <int> playerIndxs)
        {
            //Makes a new stating state
            AIState currentState = stateCreator();
            //Loop count (for detecting drawn games)
            int count = 0;

            //While there is no winner
            while (currentState.getWinner() < 0)
            {
                //Increment the move count
                count++;
                //And generate all possible moves from this state.
                List <AIState> children = currentState.generateChildren();
                //If we have hit the maximum number of moves or there are no children generated
                if (count == maxForwardIters || children.Count == 0)
                {
                    //It is a draw so work back through the moves
                    while (currentState.parent != null)
                    {
                        //And save the data
                        inputs.Add(preprocess(currentState));
                        results.Add(currentState.stateScore.Value);
                        double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex);
                        hiddenLayers.Add(hiddenLayer);
                        playerIndxs.Add(currentState.playerIndex);
                        //Adding the reward to the user defined reward for a draw
                        rewards.Add(drawReward);
                        //And set the current state as the parent
                        currentState = currentState.parent;
                    }
                    //Once done we are done with this game
                    return;
                }

                //Evaluate all moves
                foreach (AIState child in children)
                {
                    child.stateScore = evaluate(child);
                }
                //and then sort them
                children = AIState.mergeSort(children);

                //Move selection:
                //Default to the best know move is one is not selected.
                int selectedChild = children.Count - 1;
                //Loop backwards through the children
                for (int i = children.Count - 1; i >= 0; i--)
                {
                    double randNum = randGen.NextDouble();
                    //Moves are selected with a probablity = thier score but with a confidence threshold
                    //This forces some exploration even when the network has a high confidence on the moe
                    double numberToBeat = children[i].stateScore > confThreshold ? confThreshold : children[i].stateScore.Value;
                    if (randNum < numberToBeat || children[i].getWinner() == currentState.playerIndex)
                    {
                        selectedChild = i;
                        break;
                    }
                }
                //Once we have selected a move find out if it is a terminal state
                int endResult = children[selectedChild].getWinner();
                if (endResult >= 0)
                {
                    //if it is we have reased the end of the game
                    //If it is winning add a win (which will add a loss to it's parent etc.)
                    if (endResult == children[selectedChild].playerIndex)
                    {
                        children[selectedChild].addWin();
                    }
                    //Else add a loss
                    else
                    {
                        children[selectedChild].addLoss();
                    }
                    break;
                }
                //Otherwise set the current state to that move a repeat.
                currentState = children[selectedChild];
            }

            //Once the game has ended and score have set set etc. store all of the data (for use in backprop)
            while (currentState.parent != null)
            {
                inputs.Add(preprocess(currentState));
                results.Add(currentState.stateScore.Value);
                rewards.Add(currentState.wins > currentState.losses ? 1 : 0);
                double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex);
                hiddenLayers.Add(hiddenLayer);
                playerIndxs.Add(currentState.playerIndex);
                currentState = currentState.parent;
            }
        }
Ejemplo n.º 2
0
        //Rollout function (plays random moves till it hits a termination)
        protected override void rollout(AIState rolloutStart)
        {
            //If the rollout start is a terminal state
            int rolloutStartResult = rolloutStart.getWinner();

            if (rolloutStartResult >= 0)
            {
                //Add a win is it is a win, or a loss is a loss or otherwise a draw
                if (rolloutStartResult == rolloutStart.playerIndex)
                {
                    rolloutStart.addWin();
                }
                else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2)
                {
                    rolloutStart.addLoss();
                }
                else
                {
                    rolloutStart.addDraw(drawScore);
                }
                return;
            }
            bool terminalStateFound = false;
            //Get the children
            List <AIState> children = rolloutStart.generateChildren();

            int loopCount = 0;

            while (!terminalStateFound)
            {
                //Loop through till a terminal state is found
                loopCount++;
                //If max roll out is hit or no childern were generated
                if (loopCount >= maxRollout || children.Count == 0)
                {
                    //Record a draw
                    rolloutStart.addDraw(drawScore);
                    break;
                }
                //Get a random child index
                int index = randGen.Next(children.Count);
                //and see if that node is terminal
                int endResult = children[index].getWinner();
                if (endResult >= 0)
                {
                    terminalStateFound = true;
                    if (endResult == 2)
                    {
                        rolloutStart.addDraw(drawScore);
                    }
                    //If it is a win add a win
                    else if (endResult == rolloutStart.playerIndex)
                    {
                        rolloutStart.addWin();
                    }
                    //Else add a loss
                    else
                    {
                        rolloutStart.addLoss();
                    }
                }
                else
                {
                    //Otherwise select that nodes as the childern and continue
                    children = children [index].generateChildren();
                }
            }
            //Reset the children as these are not 'real' children but just ones for the roll out.
            foreach (AIState child in rolloutStart.children)
            {
                child.children = new List <AIState>();
            }
        }
Ejemplo n.º 3
0
        //Rollout function (plays random moves till it hits a termination)
        protected override void rollout(AIState rolloutStart)
        {
            //If the rollout start is a terminal state
            int rolloutStartResult = rolloutStart.getWinner();

            if (rolloutStartResult >= 0)
            {
                //Add a win is it is a win, or a loss is a loss or otherwise a draw
                if (rolloutStartResult == rolloutStart.playerIndex)
                {
                    rolloutStart.addWin();
                }
                else if (rolloutStartResult == (rolloutStart.playerIndex + 1) % 2)
                {
                    rolloutStart.addLoss();
                }
                else
                {
                    rolloutStart.addDraw(drawScore);
                }
                return;
            }
            bool terminalStateFound = false;
            //Get the children
            List <AIState> children = rolloutStart.generateChildren();

            int loopCount = 0;

            while (!terminalStateFound)
            {
                //Loop through till a terminal state is found
                loopCount++;
                //If max roll out is hit or no childern were generated
                if (loopCount >= maxRollout || children.Count == 0)
                {
                    //record a draw
                    rolloutStart.addDraw(drawScore);
                    break;
                }
                //Default is the end of the array (because that will be the best move in a sorted list)
                int selectedChild = children.Count - 1;

                //epsilon greedy move selection.
                if (randGen.NextDouble() < epsilon)
                {
                    //Sort the array (we have all ready selected the most move indx above
                    foreach (AIState child in children)
                    {
                        if (child.stateScore == null)
                        {
                            child.stateScore = model.evaluate(child);
                        }
                    }
                    children = AIState.mergeSort(children);
                }
                else
                {
                    //Just select a random move
                    selectedChild = randGen.Next(children.Count);
                }
                //and see if that node is terminal
                int endResult = children[selectedChild].getWinner();
                if (endResult >= 0)
                {
                    terminalStateFound = true;
                    if (endResult == 2)
                    {
                        rolloutStart.addDraw(drawScore);
                    }
                    //If it is a win add a win
                    else if (endResult == rolloutStart.playerIndex)
                    {
                        rolloutStart.addWin();
                    }
                    //Else add a loss
                    else
                    {
                        rolloutStart.addLoss();
                    }
                }
                else
                {
                    //Otherwise select that nodes as the childern and continue
                    children = children [selectedChild].generateChildren();
                }
            }
            //Reset the children as these are not 'real' children but just ones for the roll out.
            foreach (AIState child in rolloutStart.children)
            {
                child.treeNode = true;
            }
        }