public void TrainWithExperienceReplay(int numGames, int batchSize, float initialRandomChance, bool degradeRandomChance = true, string saveToFile = null) { var gamma = 0.975f; var buffer = batchSize * 2; var h = 0; //# Stores tuples of (S, A, R, S') var replay = new List <object[]>(); _trainer = new SgdTrainer(Net) { LearningRate = 0.01, Momentum = 0.0, BatchSize = batchSize, L2Decay = 0.001 }; var startTime = DateTime.Now; var batches = 0; for (var i = 0; i < numGames; i++) { World = GridWorld.RandomPlayerState(); var gameMoves = 0; double updatedReward; var gameRunning = true; while (gameRunning) { //# We are in state S //# Let's run our Q function on S to get Q values for all possible actions var state = GetInputs(); var qVal = Net.Forward(state); var action = 0; if (Util.Rnd.NextDouble() < initialRandomChance) { //# Choose random action action = Util.Rnd.Next(NumActions); } else { //# Choose best action from Q(s,a) values action = MaxValueIndex(qVal); } //# Take action, observe new state S' World.MovePlayer(action); gameMoves++; TotalTrainingMoves++; var newState = GetInputs(); //# Observe reward, limit turns var reward = World.GetReward(); gameRunning = !World.GameOver(); //# Experience replay storage if (replay.Count < buffer) { replay.Add(new[] { state, (object)action, (object)reward, newState }); } else { h = (h < buffer - 1) ? h + 1 : 0; replay[h] = new[] { state, (object)action, (object)reward, newState }; batches++; var batchInputValues = new Volume[batchSize]; var batchOutputValues = new List <double>(); //# Randomly sample our experience replay memory for (var b = 0; b < batchSize; b++) { var memory = replay[Util.Rnd.Next(buffer)]; var oldState = (Volume)memory[0]; var oldAction = (int)memory[1]; var oldReward = (int)memory[2]; var oldNewState = (Volume)memory[3]; //# Get max_Q(S',a) var newQ = Net.Forward(oldNewState); var y = GetValues(newQ); var maxQ = MaxValue(newQ); if (oldReward == GridWorld.ProgressScore) { //# Non-terminal state updatedReward = (oldReward + (gamma * maxQ)); } else { //# Terminal state updatedReward = oldReward; } //# Target output y[action] = updatedReward; //# Store batched states batchInputValues[b] = oldState; batchOutputValues.AddRange(y); } Console.Write("."); //# Train in batches with multiple scores and actions _trainer.Train(batchOutputValues.ToArray(), batchInputValues); TotalLoss += _trainer.Loss; } } Console.WriteLine($"{(World.GetReward() == GridWorld.WinScore ? " WON!" : string.Empty)}"); Console.Write($"Game: {i + 1}"); TotalTrainingGames++; // Save every 10 games... if (!string.IsNullOrEmpty(saveToFile) && (i % 10 == 0)) { Util.SaveBrainToFile(this, saveToFile); } //# Optinoally: slowly reduce the chance of choosing a random action if (degradeRandomChance && initialRandomChance > 0.05f) { initialRandomChance -= (1f / numGames); } } var duration = (DateTime.Now - startTime); LastLoss = _trainer.Loss; TrainingTime += duration; if (!string.IsNullOrEmpty(saveToFile)) { Util.SaveBrainToFile(this, saveToFile); } Console.WriteLine($"\nAvg loss: {TotalLoss / TotalTrainingMoves}. Last: {LastLoss}"); Console.WriteLine($"Training duration: {duration}. Total: {TrainingTime}"); }
static void Main(string[] args) { Console.WriteLine(" ----------------------- "); Console.WriteLine("| G.R.I.D --- W.O.R.L.D |"); Console.WriteLine(" ----------------------- "); Console.WriteLine("Tutorial: http://outlace.com/Reinforcement-Learning-Part-3/\n"); Brain brain; if (File.Exists(BrainFile)) { brain = Util.ReadBrainFromFile(BrainFile); Console.WriteLine("Brain loaded..."); Console.WriteLine($"Created: {brain.CreatedDate}. Training Time: {brain.TrainingTime} ({brain.TotalTrainingGames} games)"); Console.WriteLine($"Avg loss: {brain.TotalLoss / brain.TotalTrainingMoves}. Last: {brain.LastLoss}"); } else { var numInputs = GridWorld.GridSize * GridWorld.GridSize * GridWorld.GridDepth; var numActions = 4; brain = new Brain(numInputs, numActions); } // Initial output: var initialOutput = brain.DisplayOutput(brain.GetInputs()); //Console.WriteLine("Training..."); //brain.Train(1000, 1f); Console.WriteLine("Batch Training..."); brain.TrainWithExperienceReplay(3000, 32, 1f, true, BrainFile); // Sample output: brain.World = GridWorld.RandomPlayerState(); var trainedOutput = brain.DisplayOutput(brain.GetInputs()); // Show results: Console.WriteLine(brain.World.DisplayGrid()); Console.WriteLine($"Actions: ({_actionNames[0]} {_actionNames[1]} {_actionNames[2]} {_actionNames[3]})"); Console.WriteLine($"Initial output: {initialOutput}"); Console.WriteLine($"Sample output: {trainedOutput}"); Console.WriteLine("\nBrain saved...\nPress enter to play some games..."); Console.ReadLine(); // Play some games: do { Console.Clear(); brain.World = GridWorld.RandomPlayerState(); Console.WriteLine("Initial state:"); Console.WriteLine(brain.World.DisplayGrid()); var moves = 0; while (!brain.World.GameOver()) { var action = brain.GetNextAction(); Console.WriteLine($"\nMove: {++moves}. Taking action: {_actionNames[action]}"); brain.World.MovePlayer(action); Console.WriteLine(brain.World.DisplayGrid()); } if (moves >= 10) { Console.WriteLine($"Game Over. Too many moves!"); } else { Console.WriteLine($"Game {(brain.World.GetReward() == GridWorld.WinScore ? "WON!" : "LOST! :(")}"); } Console.WriteLine("\nPress enter to play another game..."); Console.ReadLine(); } while (true); }