public QNetworkSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(hiddenSize, null, OutputLayerDense.LossFunction.None); outputA.HasBias = false; outputA.InitialWeightScale = initialWeightScale; SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, false, NormalizationMethod.None, 0, initialWeightScale, new ReluDef()), outputA, device); //seperate the advantage and value part. It is said to be better var midStream = outputA.GetOutputVariable(); var advantageStream = CNTKLib.Slice(midStream, AxisVector.Repeat(new Axis(0), 1), IntVector.Repeat(0, 1), IntVector.Repeat(hiddenSize / 2, 1)); var valueStream = CNTKLib.Slice(midStream, AxisVector.Repeat(new Axis(0), 1), IntVector.Repeat(hiddenSize / 2, 1), IntVector.Repeat(hiddenSize, 1)); var adv = Layers.Dense(advantageStream, actionSize, device, false, "QNetworkAdvantage", initialWeightScale); var value = Layers.Dense(valueStream, 1, device, false, "QNetworkValue", initialWeightScale); InputState = inputA.InputVariable; //OutputQs = outputA.GetOutputVariable(); OutputQs = value.Output + CNTKLib.Minus(adv, CNTKLib.ReduceMean(adv, Axis.AllStaticAxes())).Output; }
public PPONetworkContinuousSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None); outputA.InitialWeightScale = initialWeightScale; valueNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device); InputState = inputA.InputVariable; OutputMean = outputA.GetOutputVariable(); OutputProbabilities = null; //this is for discrete action only. //the variance output will use a seperate parameter as in Unity's implementation var log_sigma_sq = new Parameter(new int[] { actionSize }, DataType.Float, CNTKLib.ConstantInitializer(0), device, "PPO.log_sigma_square"); //test OutputVariance = CNTKLib.Exp(log_sigma_sq); PolicyFunction = Function.Combine(new Variable[] { OutputMean, OutputVariance }); //create value network var inputC = new InputLayerCNTKVar(InputState); var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None); outputC.InitialWeightScale = initialWeightScale; policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device); OutputValue = outputC.GetOutputVariable(); ValueFunction = OutputValue.ToFunction(); //PolicyParameters.Add(log_sigma_sq); }
public PPONetworkDiscreteSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(actionSize, new SoftmaxDef(), OutputLayerDense.LossFunction.None); outputA.InitialWeightScale = initialWeightScale; valueNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device); InputState = inputA.InputVariable; OutputMean = null; OutputVariance = null; OutputProbabilities = outputA.GetOutputVariable(); //this is for discrete action only. PolicyFunction = OutputProbabilities.ToFunction(); //create value network var inputC = new InputLayerCNTKVar(InputState); var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None); outputC.InitialWeightScale = initialWeightScale; policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device); OutputValue = outputC.GetOutputVariable(); ValueFunction = OutputValue.ToFunction(); }
public void CreateResnode() { var input = new InputLayerDense(2); //outputLayer = new OutputLayerDenseBayesian(1); outputLayer = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.Square); network = new SequentialNetworkDense(input, LayerDefineHelper.DenseLayers(10, 5, true, NormalizationMethod.None), outputLayer, DeviceDescriptor.CPUDevice); //network = new SequentialNetworkDense(input, LayerDefineHelper.ResNodeLayers(10, 5), outputLayer, DeviceDescriptor.CPUDevice); trainer = new TrainerSimpleNN(network, LearnerDefs.AdamLearner(lr), DeviceDescriptor.CPUDevice); dataPlane.network = this; }