protected DQN(Shape inputShape, int numberOfActions, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory) : base(inputShape, numberOfActions, learningRate, discountFactor) { BatchSize = batchSize; Memory = memory; ErrorChart = new ChartGenerator($"dqn_error", "Q prediction error", "Episode"); ErrorChart.AddSeries(0, "Abs error", System.Drawing.Color.LightGray); ErrorChart.AddSeries(1, $"Avg({ErrorAvg.N}) abs error", System.Drawing.Color.Firebrick); ImportanceSamplingWeights = new Tensor(new Shape(1, numberOfActions, 1, batchSize)); }
public DQNConv(int[] inputSize, int numberOfActions, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory) : base(null, numberOfActions, learningRate, discountFactor, batchSize, memory) { Tensor.SetOpMode(Tensor.OpMode.GPU); InputSize = inputSize; Shape inputShape = new Shape(inputSize[0], inputSize[1], TemporalDataSize); Net = new NeuralNetwork("DQNConv"); var Model = new Sequential(); Model.AddLayer(new Convolution(inputShape, 8, 32, 2, Activation.ELU)); Model.AddLayer(new Convolution(Model.LastLayer, 4, 64, 2, Activation.ELU)); Model.AddLayer(new Convolution(Model.LastLayer, 4, 128, 2, Activation.ELU)); Model.AddLayer(new Flatten(Model.LastLayer)); Model.AddLayer(new Dense(Model.LastLayer, 512, Activation.ELU)); Model.AddLayer(new Dense(Model.LastLayer, numberOfActions, Activation.Softmax)); Net.Model = Model; Net.Optimize(new Adam(learningRate), new CustomHuberLoss(ImportanceSamplingWeights)); }
public DQN(Shape inputShape, int numberOfActions, int[] hiddenLayersNeurons, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory) : this(inputShape, numberOfActions, learningRate, discountFactor, batchSize, memory) { Net = new NeuralNetwork("dqn"); var Model = new Sequential(); Model.AddLayer(new Flatten(inputShape)); for (int i = 0; i < hiddenLayersNeurons.Length; ++i) { Model.AddLayer(new Dense(Model.LastLayer, hiddenLayersNeurons[i], Activation.ReLU)); } Model.AddLayer(new Dense(Model.LastLayer, numberOfActions, Activation.Linear)); Net.Model = Model; Net.Optimize(new Adam(learningRate), new CustomHuberLoss(ImportanceSamplingWeights)); }
public DuelingDQN(Shape inputShape, int numberOfActions, int[] hiddenLayersNeurons, float learningRate, float discountFactor, int batchSize, BaseExperienceReplay memory) : base(inputShape, numberOfActions, hiddenLayersNeurons, learningRate, discountFactor, batchSize, memory) { Net = new NeuralNetwork("DuelingDQN"); var input = new Flatten(inputShape); LayerBase lastLayer = input; for (int i = 0; i < hiddenLayersNeurons.Length; ++i) { lastLayer = new Dense(lastLayer, hiddenLayersNeurons[i], Activation.ReLU); } LayerBase stateValue = new Dense(lastLayer, 1); stateValue = new Lambda(new [] { stateValue }, new Shape(1, numberOfActions), (inps, outp) => { outp.Zero(); }, (outpG, inpsG) => { }); var actionAdvantage = new Dense(lastLayer, numberOfActions); var output = new Merge(new [] { stateValue, actionAdvantage }, Merge.Mode.Sum, Activation.Linear); Net.Model = new Flow(new [] { input }, new [] { output }); }