Example #1
0
        public PPONetworkDiscreteSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f)
        {
            Device     = device;
            StateSize  = stateSize;
            ActionSize = actionSize;

            //create actor network part
            var inputA  = new InputLayerDense(stateSize);
            var outputA = new OutputLayerDense(actionSize, new SoftmaxDef(), OutputLayerDense.LossFunction.None);

            outputA.InitialWeightScale = initialWeightScale;
            valueNetwork        = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device);
            InputState          = inputA.InputVariable;
            OutputMean          = null;
            OutputVariance      = null;
            OutputProbabilities = outputA.GetOutputVariable(); //this is for discrete action only.

            PolicyFunction = OutputProbabilities.ToFunction();

            //create value network
            var inputC  = new InputLayerCNTKVar(InputState);
            var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None);

            outputC.InitialWeightScale = initialWeightScale;
            policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device);
            OutputValue   = outputC.GetOutputVariable();
            ValueFunction = OutputValue.ToFunction();
        }
Example #2
0
        /// <summary>
        /// Helper functoin to create gerantor;
        /// </summary>
        /// <param name="inputNoiseSize"></param>
        /// <param name="inputConditionSize"></param>
        /// <param name="outputSize"></param>
        /// <param name="generatorLayerSize"></param>
        /// <param name="generatorLayerCount"></param>
        /// <param name="device"></param>
        protected void CreateGenerator(int inputNoiseSize, int inputConditionSize, int outputSize, int generatorLayerSize, int generatorLayerCount, DeviceDescriptor device)
        {
            //create generator
            Variable concatenatedInput;

            if (inputNoiseSize > 0 && inputConditionSize > 0)
            {
                InputNoiseGenerator     = CNTKLib.InputVariable(new int[] { inputNoiseSize }, DataType.Float);
                InputConditionGenerator = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float);
                var vsgenerator = new VariableVector();
                vsgenerator.Add(InputNoiseGenerator);
                vsgenerator.Add(InputConditionGenerator);
                concatenatedInput = CNTKLib.Splice(vsgenerator, new Axis(0));
            }
            else if (inputNoiseSize > 0)
            {
                InputNoiseGenerator     = CNTKLib.InputVariable(new int[] { inputNoiseSize }, DataType.Float);
                InputConditionGenerator = null;
                concatenatedInput       = InputNoiseGenerator;
            }
            else
            {
                InputNoiseGenerator     = null;
                InputConditionGenerator = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float);
                concatenatedInput       = InputConditionGenerator;
            }

            var inputG       = new InputLayerCNTKVar(concatenatedInput);
            var outputLayerG = new OutputLayerDense(outputSize, null, OutputLayerDense.LossFunction.Square);

            GeneratorSequentialModel = new SequentialNetworkDense(inputG, LayerDefineHelper.DenseLayers(generatorLayerCount, generatorLayerSize, true, NormalizationMethod.None), outputLayerG, device);
            GeneratorOutput          = GeneratorSequentialModel.OutputLayer.GetOutputVariable();
            InputTargetGenerator     = GeneratorSequentialModel.OutputLayer.GetTargetInputVariable();
        }
Example #3
0
        public PPONetworkContinuousSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f)
        {
            Device     = device;
            StateSize  = stateSize;
            ActionSize = actionSize;

            //create actor network part
            var inputA  = new InputLayerDense(stateSize);
            var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None);

            outputA.InitialWeightScale = initialWeightScale;
            valueNetwork        = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device);
            InputState          = inputA.InputVariable;
            OutputMean          = outputA.GetOutputVariable();
            OutputProbabilities = null; //this is for discrete action only.

            //the variance output will use a seperate parameter as in Unity's implementation
            var log_sigma_sq = new Parameter(new int[] { actionSize }, DataType.Float, CNTKLib.ConstantInitializer(0), device, "PPO.log_sigma_square");

            //test
            OutputVariance = CNTKLib.Exp(log_sigma_sq);

            PolicyFunction = Function.Combine(new Variable[] { OutputMean, OutputVariance });

            //create value network
            var inputC  = new InputLayerCNTKVar(InputState);
            var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None);

            outputC.InitialWeightScale = initialWeightScale;
            policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device);
            OutputValue   = outputC.GetOutputVariable();
            ValueFunction = OutputValue.ToFunction();

            //PolicyParameters.Add(log_sigma_sq);
        }
Example #4
0
        /// <summary>
        /// Helper functio to create discriminators
        /// </summary>
        /// <param name="fakeDataFromGenerator"></param>
        /// <param name="inputConditionSize"></param>
        /// <param name="outputSize"></param>
        /// <param name=""></param>
        /// <param name="discriminatorLayerSize"></param>
        /// <param name="discriminatorLayerCount"></param>
        /// <param name="device"></param>
        protected void CreateDiscriminators(Variable fakeDataFromGenerator, int inputConditionSize, int outputSize, int discriminatorLayerSize, int discriminatorLayerCount, DeviceDescriptor device)
        {
            //create discriminator
            Variable concatenatedInput = null;

            //create input based on whether it is a conditional gan
            if (inputConditionSize > 0)
            {
                InputDataDiscriminatorReal      = CNTKLib.InputVariable(new int[] { outputSize }, DataType.Float);
                InputConditionDiscriminatorReal = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float);
                InputConditionDiscriminatorFake = CNTKLib.InputVariable(new int[] { inputConditionSize }, DataType.Float);
                var vsDiscriminator = new VariableVector();
                vsDiscriminator.Add(InputDataDiscriminatorReal);
                vsDiscriminator.Add(InputConditionDiscriminatorReal);
                concatenatedInput = CNTKLib.Splice(vsDiscriminator, new Axis(0));
            }
            else
            {
                InputDataDiscriminatorReal      = CNTKLib.InputVariable(new int[] { outputSize }, DataType.Float);
                InputConditionDiscriminatorReal = null;
                InputConditionDiscriminatorFake = null;
                concatenatedInput = InputDataDiscriminatorReal;
            }

            var inputD       = new InputLayerCNTKVar(concatenatedInput);
            var outputLayerD = new OutputLayerDense(1, new SigmoidDef(), OutputLayerDense.LossFunction.Square);

            //create the discriminator sequential model
            DiscriminatorSequentialModel = new SequentialNetworkDense(inputD, LayerDefineHelper.DenseLayers(discriminatorLayerCount, discriminatorLayerSize, true, NormalizationMethod.None), outputLayerD, device);
            //real discriminator output
            DiscriminatorRealOutput = DiscriminatorSequentialModel.OutputLayer.GetOutputVariable();

            //clone the discriminator with shared parameters
            if (inputConditionSize > 0)
            {
                DiscriminatorFakeOutput = ((Function)DiscriminatorRealOutput).Clone(ParameterCloningMethod.Share,
                                                                                    new Dictionary <Variable, Variable>()
                {
                    { InputDataDiscriminatorReal, fakeDataFromGenerator }, { InputConditionDiscriminatorReal, InputConditionDiscriminatorFake }
                });
            }
            else
            {
                DiscriminatorFakeOutput = ((Function)DiscriminatorRealOutput).Clone(ParameterCloningMethod.Share,
                                                                                    new Dictionary <Variable, Variable>()
                {
                    { InputDataDiscriminatorReal, fakeDataFromGenerator }
                });
            }
            DiscriminatorMerged = Function.Combine(new List <Variable>()
            {
                DiscriminatorRealOutput, DiscriminatorFakeOutput
            });
        }
Example #5
0
        public QNetworkConvSimple(int inputWidth, int inputHeight, int inputDepth, int actionSize,
                                  int[] filterSizes, int[] filterDepths, int[] strides, bool[] pooling,
                                  int densehiddenLayers, int densehiddenSize, bool denseUseBias, DeviceDescriptor device, float denseInitialWeightScale = 0.01f)
        {
            Device         = device;
            StateSize      = inputWidth * inputHeight * inputDepth;
            ActionSize     = actionSize;
            InputDimension = new int[3] {
                inputWidth, inputHeight, inputDepth
            };


            //create actor network part
            InputState = CNTKLib.InputVariable(InputDimension, DataType.Float);

            Debug.Assert(filterSizes.Length == strides.Length && filterDepths.Length == filterSizes.Length, "Length of filterSizes,strides and filterDepth are not the same");

            var lastLayer = InputState;

            for (int i = 0; i < filterSizes.Length; ++i)
            {
                //conv layers. Use selu activaion and selu initlaization
                lastLayer = Layers.Convolution2D(lastLayer, filterDepths[i],
                                                 filterSizes[i], filterSizes[i], device, strides[i], true, true, "QConv_" + i, Mathf.Sqrt((1.0f / (filterSizes[i] * filterSizes[i]))));
                lastLayer = new SELUDef().BuildNew(lastLayer, device, "");
                //pooling
                if (pooling[i])
                {
                    lastLayer = CNTKLib.Pooling(lastLayer, PoolingType.Max, new int[] { 2, 2 }, new int[] { 2, 2 }, BoolVector.Repeat(true, 2), false, true, "pool2");
                }
            }

            lastLayer = CNTKLib.Flatten(lastLayer, new Axis(3), "Flatten");

            //dense layers
            var inputA  = new InputLayerCNTKVar(lastLayer);
            var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None);

            outputA.HasBias            = false;
            outputA.InitialWeightScale = denseInitialWeightScale;
            SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(densehiddenLayers, densehiddenSize, denseUseBias, NormalizationMethod.None, 0, denseInitialWeightScale, new ReluDef()), outputA, device);

            //OutputQs = outputA.GetOutputVariable();
            OutputQs = outputA.GetOutputVariable();
        }