// Replay and Training public void ReplayTrain() { if (replay_buffer.Count > max_replay_batch) { for (int i = 0; i < num_nodes[num_strcture - 1]; i++) { Debug.Log(nodes_list[num_strcture - 1][i]); } // Using Random Batches int index = Random.Range(0, replay_buffer.Count - max_replay_batch); for (int i = 0; i < max_replay_batch; i++) { ReplayMemory replay_ = (ReplayMemory)replay_buffer[index + i]; float target = replay_.reward; if (!replay_.done) { Prediction(replay_.next_states); target += discount_factor * nodes_list[num_strcture - 1].Max(); } Prediction(replay_.states); float[] target_nodes = (float[])nodes_list[num_strcture - 1].Clone(); target_nodes[replay_.action] = target; BackPropagation(target_nodes, replay_.action); } } }
protected BasePlaySession(TGameConfiguration game, Trainer <TGameConfiguration, TData> trainer, ReplayMemory <TData> memory, DataBuilder <TGameConfiguration, TData> dataBuilder) { Game = game; Trainer = trainer; Memory = memory; DataBuilder = dataBuilder; CurrentState = new Step(); }
internal TrainingPlaySession( TGameConfiguration game, Trainer <TGameConfiguration, TData> trainer, ReplayMemory <TData> memory, DataBuilder <TGameConfiguration, TData> dataBuilder) : base(game, trainer, memory, dataBuilder) { _ct = new CancellationTokenSource(); Trainer.StartAsyncTraining(Memory, _ct.Token); }
protected override void AgentReplay( int batchSize, OptimizerBase optimizer, MetricFunction lossMetric, bool shuffle) { var batch = shuffle ? ReplayMemory.ToShuffledBatch(batchSize) : ReplayMemory.ToRandomBatch(batchSize); var states = new DataFrame <float>(StateShape); var statesTarget = new DataFrame <float>(StateShape); foreach (var sample in batch) { states.Add(sample.Item1); statesTarget.Add(sample.Item4 ?? new float[StateShape.TotalSize]); } var prediction = Model.Predict(states); var predictionOfTargetStates = Model.Predict(statesTarget); var predictionTarget = TargetModel.Predict(statesTarget); var data = new DataFrameList <float>(StateShape, ActionShape); for (var i = 0; i < batch.Length; i++) { var sample = batch[i]; var t = prediction[i]; if (sample.Item4 == null) { t[sample.Item2] = sample.Item3; } else { var lastValue = float.MinValue; var valueIndex = 0; for (var j = 0; j < predictionOfTargetStates[i].Length; j++) { if (predictionOfTargetStates[i][j] > lastValue) { lastValue = predictionOfTargetStates[i][j]; valueIndex = j; } } t[sample.Item2] = (float)(sample.Item3 + DiscountFactor * predictionTarget[i][valueIndex]); } data.AddFrame(sample.Item1, t); } Model.Fit(data, 1, batch.Length, optimizer, lossMetric); }
private void AddMemory(List <double> states, float reward) { ReplayMemory lastMemory = new ReplayMemory(states, reward); if (replayMemory.Count > mCapacity) { replayMemory.RemoveAt(0); } replayMemory.Add(lastMemory); }
public ConsoleGridGameRunner() { game = new MarkovGridGame(10, 10, 0); game.buildRandomGame(); ReplayMemory memory = new ReplayMemory(); while (!game.isCompleted) { Console.Clear(); Console.WriteLine(game); memory.Add(game.toQNetState()); ConsoleKeyInfo ki = Console.ReadKey(); switch (ki.Key) { case ConsoleKey.LeftArrow: game.update(MarkovGridGame.Action.left); break; case ConsoleKey.RightArrow: game.update(MarkovGridGame.Action.right); break; case ConsoleKey.UpArrow: game.update(MarkovGridGame.Action.up); break; case ConsoleKey.DownArrow: game.update(MarkovGridGame.Action.down); break; } } foreach (QNetState state in memory) { Console.Clear(); Console.Write(state); System.Threading.Thread.Sleep(100); } }
private void Awake() { if (env == null) { throw new Exception("Environment is null!"); } if (ActionsSize < 2) { throw new Exception("Agent must have 2 or more actions"); } policyNet = GetComponent <SequentialNetBehaviour>().GetSequentialNet(); policyNet.Init(); UpdateTargetNet(); policyNetTrainer = new Trainers.BackPropagation(policyNet, Errors.MeanSquaredError, LearningRate, false); replayMemory = new ReplayMemory(replayMemoryCapacity, memorySampleSize); explorationRate = MaxExplorationRate; }
public TestingPlaySession(TGameConfiguration game, Trainer <TGameConfiguration, TData> trainer, ReplayMemory <TData> memory, DataBuilder <TGameConfiguration, TData> dataBuilder) : base(game, trainer, memory, dataBuilder) { }