public PPONetworkDiscreteSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(actionSize, new SoftmaxDef(), OutputLayerDense.LossFunction.None); outputA.InitialWeightScale = initialWeightScale; valueNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device); InputState = inputA.InputVariable; OutputMean = null; OutputVariance = null; OutputProbabilities = outputA.GetOutputVariable(); //this is for discrete action only. PolicyFunction = OutputProbabilities.ToFunction(); //create value network var inputC = new InputLayerCNTKVar(InputState); var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None); outputC.InitialWeightScale = initialWeightScale; policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device); OutputValue = outputC.GetOutputVariable(); ValueFunction = OutputValue.ToFunction(); }
/// <summary> /// Helper functoin to create gerantor; /// </summary> /// <param name="inputNoiseSize"></param> /// <param name="inputConditionSize"></param> /// <param name="outputSize"></param> /// <param name="generatorLayerSize"></param> /// <param name="generatorLayerCount"></param> /// <param name="device"></param> protected void CreateGenerator(int inputNoiseSize, int inputConditionSize, int outputSize, int generatorLayerSize, int generatorLayerCount, DeviceDescriptor device) { //create generator Variable concatenatedInput; if (inputNoiseSize > 0 && inputConditionSize > 0) { InputNoiseGenerator = CNTKLib.InputVariable(new int[] { inputNoiseSize }, DataType.Float); InputConditionGenerator = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float); var vsgenerator = new VariableVector(); vsgenerator.Add(InputNoiseGenerator); vsgenerator.Add(InputConditionGenerator); concatenatedInput = CNTKLib.Splice(vsgenerator, new Axis(0)); } else if (inputNoiseSize > 0) { InputNoiseGenerator = CNTKLib.InputVariable(new int[] { inputNoiseSize }, DataType.Float); InputConditionGenerator = null; concatenatedInput = InputNoiseGenerator; } else { InputNoiseGenerator = null; InputConditionGenerator = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float); concatenatedInput = InputConditionGenerator; } var inputG = new InputLayerCNTKVar(concatenatedInput); var outputLayerG = new OutputLayerDense(outputSize, null, OutputLayerDense.LossFunction.Square); GeneratorSequentialModel = new SequentialNetworkDense(inputG, LayerDefineHelper.DenseLayers(generatorLayerCount, generatorLayerSize, true, NormalizationMethod.None), outputLayerG, device); GeneratorOutput = GeneratorSequentialModel.OutputLayer.GetOutputVariable(); InputTargetGenerator = GeneratorSequentialModel.OutputLayer.GetTargetInputVariable(); }
public PPONetworkContinuousSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f) { Device = device; StateSize = stateSize; ActionSize = actionSize; //create actor network part var inputA = new InputLayerDense(stateSize); var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None); outputA.InitialWeightScale = initialWeightScale; valueNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device); InputState = inputA.InputVariable; OutputMean = outputA.GetOutputVariable(); OutputProbabilities = null; //this is for discrete action only. //the variance output will use a seperate parameter as in Unity's implementation var log_sigma_sq = new Parameter(new int[] { actionSize }, DataType.Float, CNTKLib.ConstantInitializer(0), device, "PPO.log_sigma_square"); //test OutputVariance = CNTKLib.Exp(log_sigma_sq); PolicyFunction = Function.Combine(new Variable[] { OutputMean, OutputVariance }); //create value network var inputC = new InputLayerCNTKVar(InputState); var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None); outputC.InitialWeightScale = initialWeightScale; policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device); OutputValue = outputC.GetOutputVariable(); ValueFunction = OutputValue.ToFunction(); //PolicyParameters.Add(log_sigma_sq); }
/// <summary> /// Helper functio to create discriminators /// </summary> /// <param name="fakeDataFromGenerator"></param> /// <param name="inputConditionSize"></param> /// <param name="outputSize"></param> /// <param name=""></param> /// <param name="discriminatorLayerSize"></param> /// <param name="discriminatorLayerCount"></param> /// <param name="device"></param> protected void CreateDiscriminators(Variable fakeDataFromGenerator, int inputConditionSize, int outputSize, int discriminatorLayerSize, int discriminatorLayerCount, DeviceDescriptor device) { //create discriminator Variable concatenatedInput = null; //create input based on whether it is a conditional gan if (inputConditionSize > 0) { InputDataDiscriminatorReal = CNTKLib.InputVariable(new int[] { outputSize }, DataType.Float); InputConditionDiscriminatorReal = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float); InputConditionDiscriminatorFake = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float); var vsDiscriminator = new VariableVector(); vsDiscriminator.Add(InputDataDiscriminatorReal); vsDiscriminator.Add(InputConditionDiscriminatorReal); concatenatedInput = CNTKLib.Splice(vsDiscriminator, new Axis(0)); } else { InputDataDiscriminatorReal = CNTKLib.InputVariable(new int[] { outputSize }, DataType.Float); InputConditionDiscriminatorReal = null; InputConditionDiscriminatorFake = null; concatenatedInput = InputDataDiscriminatorReal; } var inputD = new InputLayerCNTKVar(concatenatedInput); var outputLayerD = new OutputLayerDense(1, new SigmoidDef(), OutputLayerDense.LossFunction.Square); //create the discriminator sequential model DiscriminatorSequentialModel = new SequentialNetworkDense(inputD, LayerDefineHelper.DenseLayers(discriminatorLayerCount, discriminatorLayerSize, true, NormalizationMethod.None), outputLayerD, device); //real discriminator output DiscriminatorRealOutput = DiscriminatorSequentialModel.OutputLayer.GetOutputVariable(); //clone the discriminator with shared parameters if (inputConditionSize > 0) { DiscriminatorFakeOutput = ((Function)DiscriminatorRealOutput).Clone(ParameterCloningMethod.Share, new Dictionary <Variable, Variable>() { { InputDataDiscriminatorReal, fakeDataFromGenerator }, { InputConditionDiscriminatorReal, InputConditionDiscriminatorFake } }); } else { DiscriminatorFakeOutput = ((Function)DiscriminatorRealOutput).Clone(ParameterCloningMethod.Share, new Dictionary <Variable, Variable>() { { InputDataDiscriminatorReal, fakeDataFromGenerator } }); } DiscriminatorMerged = Function.Combine(new List <Variable>() { DiscriminatorRealOutput, DiscriminatorFakeOutput }); }
public QNetworkConvSimple(int inputWidth, int inputHeight, int inputDepth, int actionSize, int[] filterSizes, int[] filterDepths, int[] strides, bool[] pooling, int densehiddenLayers, int densehiddenSize, bool denseUseBias, DeviceDescriptor device, float denseInitialWeightScale = 0.01f) { Device = device; StateSize = inputWidth * inputHeight * inputDepth; ActionSize = actionSize; InputDimension = new int[3] { inputWidth, inputHeight, inputDepth }; //create actor network part InputState = CNTKLib.InputVariable(InputDimension, DataType.Float); Debug.Assert(filterSizes.Length == strides.Length && filterDepths.Length == filterSizes.Length, "Length of filterSizes,strides and filterDepth are not the same"); var lastLayer = InputState; for (int i = 0; i < filterSizes.Length; ++i) { //conv layers. Use selu activaion and selu initlaization lastLayer = Layers.Convolution2D(lastLayer, filterDepths[i], filterSizes[i], filterSizes[i], device, strides[i], true, true, "QConv_" + i, Mathf.Sqrt((1.0f / (filterSizes[i] * filterSizes[i])))); lastLayer = new SELUDef().BuildNew(lastLayer, device, ""); //pooling if (pooling[i]) { lastLayer = CNTKLib.Pooling(lastLayer, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool2"); } } lastLayer = CNTKLib.Flatten(lastLayer, new Axis(3), "Flatten"); //dense layers var inputA = new InputLayerCNTKVar(lastLayer); var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None); outputA.HasBias = false; outputA.InitialWeightScale = denseInitialWeightScale; SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(densehiddenLayers, densehiddenSize, denseUseBias, NormalizationMethod.None, 0, denseInitialWeightScale, new ReluDef()), outputA, device); //OutputQs = outputA.GetOutputVariable(); OutputQs = outputA.GetOutputVariable(); }