public QNetworkSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(hiddenSize, null, OutputLayerDense.LossFunction.None); outputA.HasBias = false; outputA.InitialWeightScale = initialWeightScale; SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, false, NormalizationMethod.None, 0, initialWeightScale, new ReluDef()), outputA, device); //seperate the advantage and value part. It is said to be better var midStream = outputA.GetOutputVariable(); var advantageStream = CNTKLib.Slice(midStream, AxisVector.Repeat(new Axis(0), 1), IntVector.Repeat(0, 1), IntVector.Repeat(hiddenSize / 2, 1)); var valueStream = CNTKLib.Slice(midStream, AxisVector.Repeat(new Axis(0), 1), IntVector.Repeat(hiddenSize / 2, 1), IntVector.Repeat(hiddenSize, 1)); var adv = Layers.Dense(advantageStream, actionSize, device, false, "QNetworkAdvantage", initialWeightScale); var value = Layers.Dense(valueStream, 1, device, false, "QNetworkValue", initialWeightScale); InputState = inputA.InputVariable; //OutputQs = outputA.GetOutputVariable(); OutputQs = value.Output + CNTKLib.Minus(adv, CNTKLib.ReduceMean(adv, Axis.AllStaticAxes())).Output; }
public PPONetworkContinuousSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None); outputA.InitialWeightScale = initialWeightScale; valueNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device); InputState = inputA.InputVariable; OutputMean = outputA.GetOutputVariable(); OutputProbabilities = null; //this is for discrete action only. //the variance output will use a seperate parameter as in Unity's implementation var log_sigma_sq = new Parameter(new int[] { actionSize }, DataType.Float, CNTKLib.ConstantInitializer(0), device, "PPO.log_sigma_square"); //test OutputVariance = CNTKLib.Exp(log_sigma_sq); PolicyFunction = Function.Combine(new Variable[] { OutputMean, OutputVariance }); //create value network var inputC = new InputLayerCNTKVar(InputState); var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None); outputC.InitialWeightScale = initialWeightScale; policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device); OutputValue = outputC.GetOutputVariable(); ValueFunction = OutputValue.ToFunction(); //PolicyParameters.Add(log_sigma_sq); }
public PPONetworkDiscreteSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(actionSize, new SoftmaxDef(), OutputLayerDense.LossFunction.None); outputA.InitialWeightScale = initialWeightScale; valueNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device); InputState = inputA.InputVariable; OutputMean = null; OutputVariance = null; OutputProbabilities = outputA.GetOutputVariable(); //this is for discrete action only. PolicyFunction = OutputProbabilities.ToFunction(); //create value network var inputC = new InputLayerCNTKVar(InputState); var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None); outputC.InitialWeightScale = initialWeightScale; policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device); OutputValue = outputC.GetOutputVariable(); ValueFunction = OutputValue.ToFunction(); }
public void CreateResnode() { var input = new InputLayerDense(2); //outputLayer = new OutputLayerDenseBayesian(1); outputLayer = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.Square); network = new SequentialNetworkDense(input, LayerDefineHelper.DenseLayers(10, 5, true, NormalizationMethod.None), outputLayer, DeviceDescriptor.CPUDevice); //network = new SequentialNetworkDense(input, LayerDefineHelper.ResNodeLayers(10, 5), outputLayer, DeviceDescriptor.CPUDevice); trainer = new TrainerSimpleNN(network, LearnerDefs.AdamLearner(lr), DeviceDescriptor.CPUDevice); dataPlane.network = this; }
public QNetworkConvSimple(int inputWidth, int inputHeight, int inputDepth, int actionSize, int[] filterSizes, int[] filterDepths, int[] strides, bool[] pooling, int densehiddenLayers, int densehiddenSize, bool denseUseBias, DeviceDescriptor device, float denseInitialWeightScale = 0.01f) { Device = device; StateSize = inputWidth * inputHeight * inputDepth; ActionSize = actionSize; InputDimension = new int[3] { inputWidth, inputHeight, inputDepth }; //create actor network part InputState = CNTKLib.InputVariable(InputDimension, DataType.Float); Debug.Assert(filterSizes.Length == strides.Length && filterDepths.Length == filterSizes.Length, "Length of filterSizes,strides and filterDepth are not the same"); var lastLayer = InputState; for (int i = 0; i < filterSizes.Length; ++i) { //conv layers. Use selu activaion and selu initlaization lastLayer = Layers.Convolution2D(lastLayer, filterDepths[i], filterSizes[i], filterSizes[i], device, strides[i], true, true, "QConv_" + i, Mathf.Sqrt((1.0f / (filterSizes[i] * filterSizes[i])))); lastLayer = new SELUDef().BuildNew(lastLayer, device, ""); //pooling if (pooling[i]) { lastLayer = CNTKLib.Pooling(lastLayer, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool2"); } } lastLayer = CNTKLib.Flatten(lastLayer, new Axis(3), "Flatten"); //dense layers var inputA = new InputLayerCNTKVar(lastLayer); var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None); outputA.HasBias = false; outputA.InitialWeightScale = denseInitialWeightScale; SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(densehiddenLayers, densehiddenSize, denseUseBias, NormalizationMethod.None, 0, denseInitialWeightScale, new ReluDef()), outputA, device); //OutputQs = outputA.GetOutputVariable(); OutputQs = outputA.GetOutputVariable(); }