Esempio n. 1
0
 protected DQN(Shape inputShape, int numberOfActions, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory)
     : base(inputShape, numberOfActions, learningRate, discountFactor)
 {
     BatchSize  = batchSize;
     Memory     = memory;
     ErrorChart = new ChartGenerator($"dqn_error", "Q prediction error", "Episode");
     ErrorChart.AddSeries(0, "Abs error", System.Drawing.Color.LightGray);
     ErrorChart.AddSeries(1, $"Avg({ErrorAvg.N}) abs error", System.Drawing.Color.Firebrick);
     ImportanceSamplingWeights = new Tensor(new Shape(1, numberOfActions, 1, batchSize));
 }
Esempio n. 2
0
        public DQNConv(int[] inputSize, int numberOfActions, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory)
            : base(null, numberOfActions, learningRate, discountFactor, batchSize, memory)
        {
            Tensor.SetOpMode(Tensor.OpMode.GPU);

            InputSize = inputSize;
            Shape inputShape = new Shape(inputSize[0], inputSize[1], TemporalDataSize);

            Net = new NeuralNetwork("DQNConv");
            var Model = new Sequential();

            Model.AddLayer(new Convolution(inputShape, 8, 32, 2, Activation.ELU));
            Model.AddLayer(new Convolution(Model.LastLayer, 4, 64, 2, Activation.ELU));
            Model.AddLayer(new Convolution(Model.LastLayer, 4, 128, 2, Activation.ELU));
            Model.AddLayer(new Flatten(Model.LastLayer));
            Model.AddLayer(new Dense(Model.LastLayer, 512, Activation.ELU));
            Model.AddLayer(new Dense(Model.LastLayer, numberOfActions, Activation.Softmax));
            Net.Model = Model;
            Net.Optimize(new Adam(learningRate), new CustomHuberLoss(ImportanceSamplingWeights));
        }
Esempio n. 3
0
        public DQN(Shape inputShape, int numberOfActions, int[] hiddenLayersNeurons, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory)
            : this(inputShape, numberOfActions, learningRate, discountFactor, batchSize, memory)
        {
            Net = new NeuralNetwork("dqn");
            var Model = new Sequential();

            Model.AddLayer(new Flatten(inputShape));
            for (int i = 0; i < hiddenLayersNeurons.Length; ++i)
            {
                Model.AddLayer(new Dense(Model.LastLayer, hiddenLayersNeurons[i], Activation.ReLU));
            }
            Model.AddLayer(new Dense(Model.LastLayer, numberOfActions, Activation.Linear));
            Net.Model = Model;
            Net.Optimize(new Adam(learningRate), new CustomHuberLoss(ImportanceSamplingWeights));
        }
Esempio n. 4
0
        public DuelingDQN(Shape inputShape, int numberOfActions, int[] hiddenLayersNeurons, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory)
            : base(inputShape, numberOfActions, hiddenLayersNeurons, learningRate, discountFactor, batchSize, memory)
        {
            Net = new NeuralNetwork("DuelingDQN");
            var       input     = new Flatten(inputShape);
            LayerBase lastLayer = input;

            for (int i = 0; i < hiddenLayersNeurons.Length; ++i)
            {
                lastLayer = new Dense(lastLayer, hiddenLayersNeurons[i], Activation.ReLU);
            }

            LayerBase stateValue = new Dense(lastLayer, 1);

            stateValue = new Lambda(new [] { stateValue }, new Shape(1, numberOfActions), (inps, outp) => { outp.Zero(); }, (outpG, inpsG) => { });

            var actionAdvantage = new Dense(lastLayer, numberOfActions);

            var output = new Merge(new [] { stateValue, actionAdvantage }, Merge.Mode.Sum, Activation.Linear);

            Net.Model = new Flow(new [] { input }, new [] { output });
        }