private double trainingEpisode(StateCreator stateCreator, int numbItters) { //total inputs = all the inital states we evaluate List <int[]> totalInputs = new List <int[]>(); //total hidden layers = all the hidden states we evaluate List <double[, ]> totalHiddenLayers = new List <double[, ]>(); //total output = the output each time we evaluate List <double> totalResults = new List <double>(); //total rewards = all the rewards List <double> totalRewards = new List <double>(); //total rewards = all the rewards List <int> playerIndxs = new List <int>(); //Play a game = number of times passed in for (int i = 0; i < numbItters; i++) { //passing in the tracking lists (which track all the values) playForward(stateCreator, totalInputs, totalHiddenLayers, totalResults, totalRewards, playerIndxs); } //Once we have compelted an epoch we now backprop and get the average error (used for seeing how well it is training). double avgCost = backpropagate(totalInputs, totalHiddenLayers, totalResults, totalRewards, playerIndxs); //Return this value return(avgCost); }
public FSMState <T> GetState <K>(K k, bool forceCreate = false) where K : IConvertible { if (m_CreatorMap == null) { return(null); } int key = k.ToInt32(null); StateCreator creator = null; if (!m_CreatorMap.TryGetValue(key, out creator)) { Log.w("Not Find State Creator For: " + k); return(null); } if (forceCreate || m_AlwaysCreate) { return(creator()); } FSMState <T> result = GetStateFromCache(key); if (result == null) { result = creator(); AddState2Cache(key, result); } return(result); }
//Training is done in a series of episodes where a number of games are played per episode public int train(int gamesPerEpisode, int episodes, StateCreator sc) { //If there are not games to play... if (gamesPerEpisode < 1 || episodes < 1) { Console.WriteLine("Monte Error: Games per episode or Episodes is < 1, terminating."); return(-1); } //If the state creator is null then we cannot train if (sc == null) { Console.WriteLine("Monte Error: State Creator is null, terminating."); return(-1); } //If we have not set the length of input it means we currently know nothing about the game //So to start off we get a state from our state creator and see how long it is //And use that length to build the network. if (lengthOfInput == 0) { AIState state = sc(); if (!validateAIState(state)) { Console.WriteLine("Monte Error: State failed validation, terminating."); return(-1); } //Length of the input is the length of a preprocessed empty state. lengthOfInput = preprocess(sc()).Length; //NumbHiddenLayers coming from the Settings file used. player0Network = new Network(lengthOfInput, numbHiddenLayers, 0); player1Network = new Network(lengthOfInput, numbHiddenLayers, 1); tempHiddenLayers = new double[numbHiddenLayers, lengthOfInput]; } //For every episode for (int i = 0; i < episodes; i++) { //Play n games and return the average cost for this episode (or epoch) double avgCost = trainingEpisode(sc, gamesPerEpisode); //Output this cost (so we can see if our cost is reducing Console.WriteLine("Monte: Training Episode " + (i + 1) + " of " + episodes + " complete. Avg cost: " + avgCost); } //Once done we output it to a file which is the time it was made (so it is unique) string dateString = String.Format("{0:HH.mm.ss_dd.MM.yyyy}", DateTime.Now); string fileName = "Model_" + dateString + ".model"; File.Create(fileName).Close(); StreamWriter writer = new StreamWriter(fileName); //We just write all of the values to file writer.WriteLine(lengthOfInput); writer.WriteLine(numbHiddenLayers); player0Network.writeToFile(writer); player1Network.writeToFile(writer); writer.Close(); return(1); }
private State ExpandedState(Variable prop, StateCreator creator) { Cdn.EdgeAction[] actions; if (d_actionedVariables.TryGetValue(prop, out actions)) { return(creator(prop, actions)); } else { return(creator(prop, new Cdn.EdgeAction[] {})); } }
public void StateBuilderTest() { // Arrange var stateFile = Path.Combine(this.DeployFolder, "StateSchema.xml"); var stateCsFile = @"D:\Reps\TwiddleToe\TwiddleToe.Foundation\State.cs"; var stateBuilder = new StateCreator(stateFile); // Act stateBuilder.BuildState(stateCsFile); // Assert Assert.IsTrue(File.Exists(stateCsFile)); }
public void RegisterCreator(IConvertible key, StateCreator creator) { if (creator == null) { Log.e("Can not register null State Creator."); return; } if (m_CreatorMap == null) { m_CreatorMap = new Dictionary <int, StateCreator>(); } m_CreatorMap.Add(key.ToInt32(null), creator); }
//Simulates a single game game private void playForward(StateCreator stateCreator, List <int[]> inputs, List <double[, ]> hiddenLayers, List <double> results, List <double> rewards, List <int> playerIndxs) { //Makes a new stating state AIState currentState = stateCreator(); //Loop count (for detecting drawn games) int count = 0; //While there is no winner while (currentState.getWinner() < 0) { //Increment the move count count++; //And generate all possible moves from this state. List <AIState> children = currentState.generateChildren(); //If we have hit the maximum number of moves or there are no children generated if (count == maxForwardIters || children.Count == 0) { //It is a draw so work back through the moves while (currentState.parent != null) { //And save the data inputs.Add(preprocess(currentState)); results.Add(currentState.stateScore.Value); double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex); hiddenLayers.Add(hiddenLayer); playerIndxs.Add(currentState.playerIndex); //Adding the reward to the user defined reward for a draw rewards.Add(drawReward); //And set the current state as the parent currentState = currentState.parent; } //Once done we are done with this game return; } //Evaluate all moves foreach (AIState child in children) { child.stateScore = evaluate(child); } //and then sort them children = AIState.mergeSort(children); //Move selection: //Default to the best know move is one is not selected. int selectedChild = children.Count - 1; //Loop backwards through the children for (int i = children.Count - 1; i >= 0; i--) { double randNum = randGen.NextDouble(); //Moves are selected with a probablity = thier score but with a confidence threshold //This forces some exploration even when the network has a high confidence on the moe double numberToBeat = children[i].stateScore > confThreshold ? confThreshold : children[i].stateScore.Value; if (randNum < numberToBeat || children[i].getWinner() == currentState.playerIndex) { selectedChild = i; break; } } //Once we have selected a move find out if it is a terminal state int endResult = children[selectedChild].getWinner(); if (endResult >= 0) { //if it is we have reased the end of the game //If it is winning add a win (which will add a loss to it's parent etc.) if (endResult == children[selectedChild].playerIndex) { children[selectedChild].addWin(); } //Else add a loss else { children[selectedChild].addLoss(); } break; } //Otherwise set the current state to that move a repeat. currentState = children[selectedChild]; } //Once the game has ended and score have set set etc. store all of the data (for use in backprop) while (currentState.parent != null) { inputs.Add(preprocess(currentState)); results.Add(currentState.stateScore.Value); rewards.Add(currentState.wins > currentState.losses ? 1 : 0); double[,] hiddenLayer = getHiddenLayers(preprocess(currentState), currentState.playerIndex); hiddenLayers.Add(hiddenLayer); playerIndxs.Add(currentState.playerIndex); currentState = currentState.parent; } }