Пример #1
0
        private double trainingEpisode(StateCreator stateCreator, int numbItters)
        {
            //total inputs = all the inital states we evaluate
            List <int[]> totalInputs = new List <int[]>();
            //total hidden layers = all the hidden states we evaluate
            List <double[, ]> totalHiddenLayers = new List <double[, ]>();
            //total output = the output each time we evaluate
            List <double> totalResults = new List <double>();
            //total rewards = all the rewards
            List <double> totalRewards = new List <double>();
            //total rewards = all the rewards
            List <int> playerIndxs = new List <int>();

            //Play a game = number of times passed in
            for (int i = 0; i < numbItters; i++)
            {
                //passing in the tracking lists (which track all the values)
                playForward(stateCreator, totalInputs, totalHiddenLayers, totalResults, totalRewards, playerIndxs);
            }
            //Once we have compelted an epoch we now backprop and get the average error (used for seeing how well it is training).
            double avgCost = backpropagate(totalInputs, totalHiddenLayers, totalResults, totalRewards, playerIndxs);

            //Return this value
            return(avgCost);
        }
Пример #2
0
        public FSMState <T> GetState <K>(K k, bool forceCreate = false) where K : IConvertible
        {
            if (m_CreatorMap == null)
            {
                return(null);
            }

            int key = k.ToInt32(null);

            StateCreator creator = null;

            if (!m_CreatorMap.TryGetValue(key, out creator))
            {
                Log.w("Not Find State Creator For: " + k);
                return(null);
            }

            if (forceCreate || m_AlwaysCreate)
            {
                return(creator());
            }

            FSMState <T> result = GetStateFromCache(key);

            if (result == null)
            {
                result = creator();
                AddState2Cache(key, result);
            }

            return(result);
        }
Пример #3
0
        //Training is done in a series of episodes where a number of games are played per episode
        public int train(int gamesPerEpisode, int episodes, StateCreator sc)
        {
            //If there are not games to play...
            if (gamesPerEpisode < 1 || episodes < 1)
            {
                Console.WriteLine("Monte Error: Games per episode or Episodes is < 1, terminating.");
                return(-1);
            }
            //If the state creator is null then we cannot train
            if (sc == null)
            {
                Console.WriteLine("Monte Error: State Creator is null, terminating.");
                return(-1);
            }
            //If we have not set the length of input it means we currently know nothing about the game
            //So to start off we get a state from our state creator and see how long it is
            //And use that length to build the network.
            if (lengthOfInput == 0)
            {
                AIState state = sc();
                if (!validateAIState(state))
                {
                    Console.WriteLine("Monte Error: State failed validation, terminating.");
                    return(-1);
                }
                //Length of the input is the length of a preprocessed empty state.
                lengthOfInput = preprocess(sc()).Length;
                //NumbHiddenLayers coming from the Settings file used.
                player0Network   = new Network(lengthOfInput, numbHiddenLayers, 0);
                player1Network   = new Network(lengthOfInput, numbHiddenLayers, 1);
                tempHiddenLayers = new double[numbHiddenLayers, lengthOfInput];
            }

            //For every episode
            for (int i = 0; i < episodes; i++)
            {
                //Play n games and return the average cost for this episode (or epoch)
                double avgCost = trainingEpisode(sc, gamesPerEpisode);
                //Output this cost (so we can see if our cost is reducing
                Console.WriteLine("Monte: Training Episode " + (i + 1) + " of " + episodes + " complete. Avg cost: " + avgCost);
            }

            //Once done we output it to a file which is the time it was made (so it is unique)
            string dateString = String.Format("{0:HH.mm.ss_dd.MM.yyyy}", DateTime.Now);
            string fileName   = "Model_" + dateString + ".model";

            File.Create(fileName).Close();
            StreamWriter writer = new StreamWriter(fileName);

            //We just write all of the values to file
            writer.WriteLine(lengthOfInput);
            writer.WriteLine(numbHiddenLayers);
            player0Network.writeToFile(writer);
            player1Network.writeToFile(writer);
            writer.Close();
            return(1);
        }
Пример #4
0
        private State ExpandedState(Variable prop, StateCreator creator)
        {
            Cdn.EdgeAction[] actions;

            if (d_actionedVariables.TryGetValue(prop, out actions))
            {
                return(creator(prop, actions));
            }
            else
            {
                return(creator(prop, new Cdn.EdgeAction[] {}));
            }
        }
Пример #5
0
        public void StateBuilderTest()
        {
            // Arrange
            var stateFile   = Path.Combine(this.DeployFolder, "StateSchema.xml");
            var stateCsFile = @"D:\Reps\TwiddleToe\TwiddleToe.Foundation\State.cs";

            var stateBuilder = new StateCreator(stateFile);

            // Act
            stateBuilder.BuildState(stateCsFile);

            // Assert
            Assert.IsTrue(File.Exists(stateCsFile));
        }
Пример #6
0
        public void RegisterCreator(IConvertible key, StateCreator creator)
        {
            if (creator == null)
            {
                Log.e("Can not register null State Creator.");
                return;
            }

            if (m_CreatorMap == null)
            {
                m_CreatorMap = new Dictionary <int, StateCreator>();
            }

            m_CreatorMap.Add(key.ToInt32(null), creator);
        }
Пример #7
0
        //Simulates a single game game
        private void playForward(StateCreator stateCreator, List <int[]> inputs, List <double[, ]> hiddenLayers, List <double> results, List <double> rewards, List <int> playerIndxs)
        {
            //Makes a new stating state
            AIState currentState = stateCreator();
            //Loop count (for detecting drawn games)
            int count = 0;

            //While there is no winner
            while (currentState.getWinner() < 0)
            {
                //Increment the move count
                count++;
                //And generate all possible moves from this state.
                List <AIState> children = currentState.generateChildren();
                //If we have hit the maximum number of moves or there are no children generated
                if (count == maxForwardIters || children.Count == 0)
                {
                    //It is a draw so work back through the moves
                    while (currentState.parent != null)
                    {
                        //And save the data
                        inputs.Add(preprocess(currentState));
                        results.Add(currentState.stateScore.Value);
                        double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex);
                        hiddenLayers.Add(hiddenLayer);
                        playerIndxs.Add(currentState.playerIndex);
                        //Adding the reward to the user defined reward for a draw
                        rewards.Add(drawReward);
                        //And set the current state as the parent
                        currentState = currentState.parent;
                    }
                    //Once done we are done with this game
                    return;
                }

                //Evaluate all moves
                foreach (AIState child in children)
                {
                    child.stateScore = evaluate(child);
                }
                //and then sort them
                children = AIState.mergeSort(children);

                //Move selection:
                //Default to the best know move is one is not selected.
                int selectedChild = children.Count - 1;
                //Loop backwards through the children
                for (int i = children.Count - 1; i >= 0; i--)
                {
                    double randNum = randGen.NextDouble();
                    //Moves are selected with a probablity = thier score but with a confidence threshold
                    //This forces some exploration even when the network has a high confidence on the moe
                    double numberToBeat = children[i].stateScore > confThreshold ? confThreshold : children[i].stateScore.Value;
                    if (randNum < numberToBeat || children[i].getWinner() == currentState.playerIndex)
                    {
                        selectedChild = i;
                        break;
                    }
                }
                //Once we have selected a move find out if it is a terminal state
                int endResult = children[selectedChild].getWinner();
                if (endResult >= 0)
                {
                    //if it is we have reased the end of the game
                    //If it is winning add a win (which will add a loss to it's parent etc.)
                    if (endResult == children[selectedChild].playerIndex)
                    {
                        children[selectedChild].addWin();
                    }
                    //Else add a loss
                    else
                    {
                        children[selectedChild].addLoss();
                    }
                    break;
                }
                //Otherwise set the current state to that move a repeat.
                currentState = children[selectedChild];
            }

            //Once the game has ended and score have set set etc. store all of the data (for use in backprop)
            while (currentState.parent != null)
            {
                inputs.Add(preprocess(currentState));
                results.Add(currentState.stateScore.Value);
                rewards.Add(currentState.wins > currentState.losses ? 1 : 0);
                double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex);
                hiddenLayers.Add(hiddenLayer);
                playerIndxs.Add(currentState.playerIndex);
                currentState = currentState.parent;
            }
        }