示例#1
0
        public QNetworkSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f)
        {
            Device     = device;
            StateSize  = stateSize;
            ActionSize = actionSize;

            //create actor network part
            var inputA  = new InputLayerDense(stateSize);
            var outputA = new OutputLayerDense(hiddenSize, null, OutputLayerDense.LossFunction.None);

            outputA.HasBias            = false;
            outputA.InitialWeightScale = initialWeightScale;
            SequentialNetworkDense qNetwork = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, false, NormalizationMethod.None, 0, initialWeightScale, new ReluDef()), outputA, device);

            //seperate the advantage and value part. It is said to be better
            var midStream       = outputA.GetOutputVariable();
            var advantageStream = CNTKLib.Slice(midStream, AxisVector.Repeat(new Axis(0), 1), IntVector.Repeat(0, 1), IntVector.Repeat(hiddenSize / 2, 1));
            var valueStream     = CNTKLib.Slice(midStream, AxisVector.Repeat(new Axis(0), 1), IntVector.Repeat(hiddenSize / 2, 1), IntVector.Repeat(hiddenSize, 1));
            var adv             = Layers.Dense(advantageStream, actionSize, device, false, "QNetworkAdvantage", initialWeightScale);
            var value           = Layers.Dense(valueStream, 1, device, false, "QNetworkValue", initialWeightScale);

            InputState = inputA.InputVariable;
            //OutputQs = outputA.GetOutputVariable();
            OutputQs = value.Output + CNTKLib.Minus(adv, CNTKLib.ReduceMean(adv, Axis.AllStaticAxes())).Output;
        }
示例#2
0
        public PPONetworkContinuousSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f)
        {
            Device     = device;
            StateSize  = stateSize;
            ActionSize = actionSize;

            //create actor network part
            var inputA  = new InputLayerDense(stateSize);
            var outputA = new OutputLayerDense(actionSize, null, OutputLayerDense.LossFunction.None);

            outputA.InitialWeightScale = initialWeightScale;
            valueNetwork        = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device);
            InputState          = inputA.InputVariable;
            OutputMean          = outputA.GetOutputVariable();
            OutputProbabilities = null; //this is for discrete action only.

            //the variance output will use a seperate parameter as in Unity's implementation
            var log_sigma_sq = new Parameter(new int[] { actionSize }, DataType.Float, CNTKLib.ConstantInitializer(0), device, "PPO.log_sigma_square");

            //test
            OutputVariance = CNTKLib.Exp(log_sigma_sq);

            PolicyFunction = Function.Combine(new Variable[] { OutputMean, OutputVariance });

            //create value network
            var inputC  = new InputLayerCNTKVar(InputState);
            var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None);

            outputC.InitialWeightScale = initialWeightScale;
            policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device);
            OutputValue   = outputC.GetOutputVariable();
            ValueFunction = OutputValue.ToFunction();

            //PolicyParameters.Add(log_sigma_sq);
        }
示例#3
0
        public PPONetworkDiscreteSimple(int stateSize, int actionSize, int numLayers, int hiddenSize, DeviceDescriptor device, float initialWeightScale = 0.01f)
        {
            Device     = device;
            StateSize  = stateSize;
            ActionSize = actionSize;

            //create actor network part
            var inputA  = new InputLayerDense(stateSize);
            var outputA = new OutputLayerDense(actionSize, new SoftmaxDef(), OutputLayerDense.LossFunction.None);

            outputA.InitialWeightScale = initialWeightScale;
            valueNetwork        = new SequentialNetworkDense(inputA, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputA, device);
            InputState          = inputA.InputVariable;
            OutputMean          = null;
            OutputVariance      = null;
            OutputProbabilities = outputA.GetOutputVariable(); //this is for discrete action only.

            PolicyFunction = OutputProbabilities.ToFunction();

            //create value network
            var inputC  = new InputLayerCNTKVar(InputState);
            var outputC = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.None);

            outputC.InitialWeightScale = initialWeightScale;
            policyNetwork = new SequentialNetworkDense(inputC, LayerDefineHelper.DenseLayers(numLayers, hiddenSize, true, NormalizationMethod.None, 0, initialWeightScale, new TanhDef()), outputC, device);
            OutputValue   = outputC.GetOutputVariable();
            ValueFunction = OutputValue.ToFunction();
        }
示例#4
0
    public void CreateResnode()
    {
        var input = new InputLayerDense(2);

        //outputLayer = new OutputLayerDenseBayesian(1);
        outputLayer = new OutputLayerDense(1, null, OutputLayerDense.LossFunction.Square);

        network = new SequentialNetworkDense(input, LayerDefineHelper.DenseLayers(10, 5, true, NormalizationMethod.None), outputLayer, DeviceDescriptor.CPUDevice);
        //network = new SequentialNetworkDense(input, LayerDefineHelper.ResNodeLayers(10, 5), outputLayer, DeviceDescriptor.CPUDevice);

        trainer = new TrainerSimpleNN(network, LearnerDefs.AdamLearner(lr), DeviceDescriptor.CPUDevice);

        dataPlane.network = this;
    }