Exemple #1
0
    // Replay and Training
    public void ReplayTrain()
    {
        if (replay_buffer.Count > max_replay_batch)
        {
            for (int i = 0; i < num_nodes[num_strcture - 1]; i++)
            {
                Debug.Log(nodes_list[num_strcture - 1][i]);
            }

            // Using Random Batches
            int index = Random.Range(0, replay_buffer.Count - max_replay_batch);
            for (int i = 0; i < max_replay_batch; i++)
            {
                ReplayMemory replay_ = (ReplayMemory)replay_buffer[index + i];
                float        target  = replay_.reward;
                if (!replay_.done)
                {
                    Prediction(replay_.next_states);
                    target += discount_factor * nodes_list[num_strcture - 1].Max();
                }
                Prediction(replay_.states);
                float[] target_nodes = (float[])nodes_list[num_strcture - 1].Clone();
                target_nodes[replay_.action] = target;
                BackPropagation(target_nodes, replay_.action);
            }
        }
    }
 protected BasePlaySession(TGameConfiguration game, Trainer <TGameConfiguration, TData> trainer,
                           ReplayMemory <TData> memory, DataBuilder <TGameConfiguration, TData> dataBuilder)
 {
     Game         = game;
     Trainer      = trainer;
     Memory       = memory;
     DataBuilder  = dataBuilder;
     CurrentState = new Step();
 }
Exemple #3
0
 internal TrainingPlaySession(
     TGameConfiguration game,
     Trainer <TGameConfiguration, TData> trainer,
     ReplayMemory <TData> memory,
     DataBuilder <TGameConfiguration, TData> dataBuilder)
     : base(game, trainer, memory, dataBuilder)
 {
     _ct = new CancellationTokenSource();
     Trainer.StartAsyncTraining(Memory, _ct.Token);
 }
Exemple #4
0
        protected override void AgentReplay(
            int batchSize,
            OptimizerBase optimizer,
            MetricFunction lossMetric,
            bool shuffle)
        {
            var batch = shuffle ? ReplayMemory.ToShuffledBatch(batchSize) : ReplayMemory.ToRandomBatch(batchSize);

            var states       = new DataFrame <float>(StateShape);
            var statesTarget = new DataFrame <float>(StateShape);

            foreach (var sample in batch)
            {
                states.Add(sample.Item1);
                statesTarget.Add(sample.Item4 ?? new float[StateShape.TotalSize]);
            }

            var prediction = Model.Predict(states);
            var predictionOfTargetStates = Model.Predict(statesTarget);
            var predictionTarget         = TargetModel.Predict(statesTarget);

            var data = new DataFrameList <float>(StateShape, ActionShape);

            for (var i = 0; i < batch.Length; i++)
            {
                var sample = batch[i];

                var t = prediction[i];

                if (sample.Item4 == null)
                {
                    t[sample.Item2] = sample.Item3;
                }
                else
                {
                    var lastValue  = float.MinValue;
                    var valueIndex = 0;

                    for (var j = 0; j < predictionOfTargetStates[i].Length; j++)
                    {
                        if (predictionOfTargetStates[i][j] > lastValue)
                        {
                            lastValue  = predictionOfTargetStates[i][j];
                            valueIndex = j;
                        }
                    }

                    t[sample.Item2] = (float)(sample.Item3 + DiscountFactor * predictionTarget[i][valueIndex]);
                }

                data.AddFrame(sample.Item1, t);
            }

            Model.Fit(data, 1, batch.Length, optimizer, lossMetric);
        }
    private void AddMemory(List <double> states, float reward)
    {
        ReplayMemory lastMemory = new ReplayMemory(states, reward);

        if (replayMemory.Count > mCapacity)
        {
            replayMemory.RemoveAt(0);
        }

        replayMemory.Add(lastMemory);
    }
        public ConsoleGridGameRunner()
        {
            game = new MarkovGridGame(10, 10, 0);
            game.buildRandomGame();
            ReplayMemory memory = new ReplayMemory();

            while (!game.isCompleted)
            {
                Console.Clear();
                Console.WriteLine(game);
                memory.Add(game.toQNetState());
                ConsoleKeyInfo ki = Console.ReadKey();

                switch (ki.Key)
                {
                case ConsoleKey.LeftArrow:
                    game.update(MarkovGridGame.Action.left);
                    break;

                case ConsoleKey.RightArrow:
                    game.update(MarkovGridGame.Action.right);
                    break;

                case ConsoleKey.UpArrow:
                    game.update(MarkovGridGame.Action.up);
                    break;

                case ConsoleKey.DownArrow:
                    game.update(MarkovGridGame.Action.down);
                    break;
                }
            }

            foreach (QNetState state in memory)
            {
                Console.Clear();
                Console.Write(state);
                System.Threading.Thread.Sleep(100);
            }
        }
Exemple #7
0
        private void Awake()
        {
            if (env == null)
            {
                throw new Exception("Environment is null!");
            }

            if (ActionsSize < 2)
            {
                throw new Exception("Agent must have 2 or more actions");
            }

            policyNet = GetComponent <SequentialNetBehaviour>().GetSequentialNet();
            policyNet.Init();
            UpdateTargetNet();

            policyNetTrainer = new Trainers.BackPropagation(policyNet, Errors.MeanSquaredError, LearningRate, false);


            replayMemory = new ReplayMemory(replayMemoryCapacity, memorySampleSize);

            explorationRate = MaxExplorationRate;
        }
 public TestingPlaySession(TGameConfiguration game, Trainer <TGameConfiguration, TData> trainer, ReplayMemory <TData> memory, DataBuilder <TGameConfiguration, TData> dataBuilder)
     : base(game, trainer, memory, dataBuilder)
 {
 }