/// <summary>
        /// return the output tensor and list of weights
        /// </summary>
        /// <param name="x">input </param>
        /// <returns>(output tensor, list of weights)</returns>
        public ValueTuple <Tensor, List <Tensor> > Call(Tensor x)
        {
            var layer  = new Dense(size, Activation.GetActivationFunction(activationFunction), useBias, kernel_initializer: new VarianceScaling(scale: initialScale));
            var output = layer.Call(x)[0];

            return(ValueTuple.Create(output, layer.weights));
        }
示例#2
0
    protected void CreateCommonLayers(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, out Tensor outValue, out Tensor encodedAllActor, bool shareEncoder = false)
    {
        actorWeights  = new List <Tensor>();
        criticWeights = new List <Tensor>();

        ValueTuple <Tensor, List <Tensor> > actorEncoded, criticEncoded;

        if (!shareEncoder)
        {
            actorEncoded  = CreateObservationStream(inVectorObs, inVisualObs, inMemery, inPrevAction, "Actor");
            criticEncoded = CreateObservationStream(inVectorObs, inVisualObs, inMemery, inPrevAction, "Critic");
        }
        else
        {
            actorEncoded  = CreateObservationStream(inVectorObs, inVisualObs, inMemery, inPrevAction, "ActorCritic");
            criticEncoded = actorEncoded;
        }

        actorWeights.AddRange(actorEncoded.Item2);
        criticWeights.AddRange(criticEncoded.Item2);

        var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale));

        outValue = criticOutput.Call(criticEncoded.Item1)[0];
        criticWeights.AddRange(criticOutput.weights);

        encodedAllActor = actorEncoded.Item1;
    }
示例#3
0
    public override List <Tensor> BuildNetworkForDiscreteActionSpace(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, int[] outActionSizes)
    {
        Tensor encodedAllActor = CreateCommonLayers(inVectorObs, inVisualObs, inMemery, null);

        List <Tensor> policy_branches = new List <Tensor>();

        foreach (var size in outActionSizes)
        {
            var tempOutput = new Dense(units: size, activation: null, use_bias: outputLayerBias, kernel_initializer: new VarianceScaling(scale: outputLayerInitialScale));
            policy_branches.Add(tempOutput.Call(encodedAllActor)[0]);
            weights.AddRange(tempOutput.weights);
        }
        return(policy_branches);
    }
    public override void BuildNetworkForContinuousActionSapce(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, int outActionSize,
                                                              out Tensor outActionMean, out Tensor outValue, out Tensor outActionLogVariance)
    {
        Debug.Assert(inMemery == null, "Currently recurrent input is not supported by RLNetworkSimpleAC");
        Debug.Assert(inPrevAction == null, "Currently previous action input is not supported by RLNetworkSimpleAC");
        Debug.Assert(!(inVectorObs == null && inVisualObs == null), "Network need at least one vector observation or visual observation");
        criticWeights   = new List <Tensor>();
        actorWeights    = new List <Tensor>();
        actorVarWeights = new List <Tensor>();


        var actorMeanEncoded = CreateObservationStream(inVectorObs, actorHiddenLayers, inVisualObs, inMemery, inPrevAction, "ActorMean");
        var actorVarEncoded  = CreateObservationStream(inVectorObs, actorHiddenLayers, inVisualObs, inMemery, inPrevAction, "ActorVar");
        var criticEncoded    = CreateObservationStream(inVectorObs, criticHiddenLayers, inVisualObs, inMemery, inPrevAction, "Critic");

        actorWeights.AddRange(actorMeanEncoded.Item2);
        actorVarWeights.AddRange(actorVarEncoded.Item2);
        criticWeights.AddRange(criticEncoded.Item2);

        //concat all inputs
        Tensor encodedAllActorMean = actorMeanEncoded.Item1;
        Tensor encodedAllActorVar  = actorVarEncoded.Item1;
        Tensor encodedAllCritic    = criticEncoded.Item1;


        //outputs
        //mean
        var actorOutputMean = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale));

        outActionMean = actorOutputMean.Call(encodedAllActorMean)[0];
        if (useSoftclipForMean)
        {
            outActionMean = SoftClip(outActionMean, minMean, maxMean);
        }
        actorWeights.AddRange(actorOutputMean.weights);

        //var
        var actorOutputVar = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale));

        outActionLogVariance = actorOutputVar.Call(encodedAllActorVar)[0];
        //outActionLogVariance = Current.K.exp(outActionLogVariance);
        actorVarWeights.AddRange(actorOutputVar.weights);

        //critic
        var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale));

        outValue = criticOutput.Call(encodedAllCritic)[0];
        criticWeights.AddRange(criticOutput.weights);
    }
示例#5
0
    public override void BuildNetworkForDiscreteActionSpace(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, int[] outActionSizes, out Tensor[] outActionLogits, out Tensor outValue)
    {
        Tensor encodedAllActor = null;

        CreateCommonLayers(inVectorObs, inVisualObs, inMemery, inPrevAction, out outValue, out encodedAllActor, shareEncoder);

        List <Tensor> policy_branches = new List <Tensor>();

        foreach (var size in outActionSizes)
        {
            var tempOutput = new Dense(units: size, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale));
            policy_branches.Add(tempOutput.Call(encodedAllActor)[0]);
            actorWeights.AddRange(tempOutput.weights);
        }
        outActionLogits = policy_branches.ToArray();
    }
示例#6
0
    protected void CreateDiscriminators(Tensor inputExternal, Tensor inputFromGenerator, Tensor inputCondition, out Tensor discriminatorOutputExternal, out Tensor discriminatorOutputFromGenerator)
    {
        Tensor        inputAllReal           = null;
        List <Tensor> inputListExternal      = null;
        List <Tensor> inputListFromGenerator = null;

        if (inputCondition != null)
        {
            inputListExternal = new List <Tensor>()
            {
                inputCondition, inputExternal
            };
            inputListFromGenerator = new List <Tensor>()
            {
                inputCondition, inputFromGenerator
            };
            inputAllReal = new Concat(1).Call(inputListExternal)[0];
        }
        else
        {
            inputListFromGenerator = new List <Tensor>()
            {
                inputFromGenerator
            };
            inputListExternal = new List <Tensor>()
            {
                inputExternal
            };
            inputAllReal = inputExternal;
        }

        var beforeOutput = BuildSequentialLayers(discriminatorHiddenLayers, inputAllReal);

        var outputLayer = new Dense(1, new Sigmoid(), discriminatorOutputLayerBias, kernel_initializer: new GlorotUniform(scale: discriminatorOutputLayerInitialScale));

        discriminatorOutputExternal = outputLayer.Call(beforeOutput.Item1)[0];

        Model model = new Model(inputListExternal, new List <Tensor>()
        {
            discriminatorOutputExternal
        });

        discriminatorOutputFromGenerator = model.Call(inputListFromGenerator)[0];

        discriminatorWeights = model.weights;
    }
示例#7
0
    public override void BuildNetworkForContinuousActionSapce(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, int outActionSize,
                                                              out Tensor outActionMean, out Tensor outValue, out Tensor outActionLogVariance)
    {
        Tensor encodedAllActor = null;

        CreateCommonLayers(inVectorObs, inVisualObs, inMemery, inPrevAction, out outValue, out encodedAllActor, shareEncoder);

        //outputs
        var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale));

        outActionMean = actorOutput.Call(encodedAllActor)[0];
        actorWeights.AddRange(actorOutput.weights);

        var logSigmaSq = Current.K.variable((new Constant(0)).Call(new int[] { outActionSize }, DataType.Float), name: "PPO.log_sigma_square");

        outActionLogVariance = logSigmaSq;
        actorWeights.Add(logSigmaSq);
    }
示例#8
0
    protected Tensor CreateGenerator(Tensor inputCondition, Tensor inputNoise, int outputSize)
    {
        Debug.Assert(inputCondition != null || inputNoise != null, "GAN needs at least one of input condition or input noise ");

        Tensor        inputAll  = null;
        List <Tensor> inputList = null;

        if (inputNoise != null && inputCondition != null)
        {
            inputList = new List <Tensor>()
            {
                inputCondition, inputNoise
            };
            inputAll = new Concat(1).Call(inputList)[0];
        }
        else if (inputNoise != null)
        {
            inputAll  = inputNoise;
            inputList = new List <Tensor>()
            {
                inputNoise
            };
        }
        else
        {
            inputAll  = inputCondition;
            inputList = new List <Tensor>()
            {
                inputCondition
            };
        }

        var beforeOutput = BuildSequentialLayers(generatorHiddenLayers, inputAll);

        var outputLayer = new Dense(outputSize, null, generatorOutputLayerBias, kernel_initializer: new GlorotUniform(scale: generatorOutputLayerInitialScale));
        var output      = outputLayer.Call(beforeOutput.Item1)[0];

        generatorWeights = new List <Tensor>();
        generatorWeights.AddRange(beforeOutput.Item2);
        generatorWeights.AddRange(outputLayer.weights);

        return(output);
    }
示例#9
0
    public void TestLayer()
    {
        var inputLayer = UnityTFUtils.Input(shape: new int?[] { 3 });

        var dense1 = new Dense(10, new ReLU(), true);
        var dense2 = new Dense(1, new ReLU(), true);

        var target = UnityTFUtils.Input(shape: new int?[] { 1 });

        var o = dense1.Call(inputLayer[0]);

        o = dense2.Call(o[0]);

        var lossM = new MeanSquareError();

        lossM.Call(target[0], o[0]);



        ((UnityTFBackend)K).ExportGraphDef("SavedGraph/testLayer.pb");
    }
示例#10
0
    public override ValueTuple <Tensor, Tensor> BuildNetworkForContinuousActionSapce(Tensor inVectorObservation, List <Tensor> inVisualObservation, Tensor inMemery, int outActionSize)
    {
        var encodedActor = CreateCommonLayers(inVectorObservation, inVisualObservation, inMemery, null);


        //outputs
        var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale));
        var outAction   = actorOutput.Call(encodedActor)[0];

        weights.AddRange(actorOutput.weights);

        Tensor outVar = null;

        if (useVarianceForContinuousAction)
        {
            var logSigmaSq = new Dense(units: 1, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale));
            outVar = Current.K.exp(logSigmaSq.Call(encodedActor)[0]) + minStd * minStd;
            weights.AddRange(logSigmaSq.weights);
        }

        return(ValueTuple.Create(outAction, outVar));
    }
示例#11
0
    public override void BuildNetwork(Tensor inVectorstate, List <Tensor> inVisualState, Tensor inMemery, Tensor inPrevAction, int outActionSize, SpaceType actionSpace,
                                      out Tensor outAction, out Tensor outValue, out Tensor outVariance)
    {
        Debug.Assert(inMemery == null, "Currently recurrent input is not supported by RLNetworkSimpleAC");
        Debug.Assert(inPrevAction == null, "Currently previous action input is not supported by RLNetworkSimpleAC");
        Debug.Assert(!(inVectorstate == null && inVisualState == null), "Network need at least one vector observation or visual observation");
        //Debug.Assert(actionSpace == SpaceType.continuous, "Only continuous action space is supported by RLNetworkSimpleAC");
        criticWeights = new List <Tensor>();
        actorWeights  = new List <Tensor>();

        //visual encoders
        Tensor encodedVisualActor  = null;
        Tensor encodedVisualCritic = null;

        if (inVisualState != null)
        {
            List <Tensor> visualEncodedActor  = new List <Tensor>();
            List <Tensor> visualEncodedCritic = new List <Tensor>();
            foreach (var v in inVisualState)
            {
                var ha = CreateVisualEncoder(v, actorHiddenLayers, "ActorVisualEncoder");
                var hc = CreateVisualEncoder(v, criticHiddenLayers, "CriticVisualEncoder");

                actorWeights.AddRange(ha.Item2);
                visualEncodedActor.Add(ha.Item1);

                criticWeights.AddRange(hc.Item2);
                visualEncodedCritic.Add(hc.Item1);
            }
            if (inVisualState.Count > 1)
            {
                //Debug.LogError("Tensorflow does not have gradient for concat operation in C yet. Please only use one observation.");
                encodedVisualActor  = Current.K.stack(visualEncodedActor, 1);
                encodedVisualActor  = Current.K.batch_flatten(encodedVisualActor);
                encodedVisualCritic = Current.K.stack(visualEncodedCritic, 1);
                encodedVisualCritic = Current.K.batch_flatten(encodedVisualCritic);
            }
            else
            {
                encodedVisualActor  = visualEncodedActor[0];
                encodedVisualCritic = visualEncodedCritic[0];
            }
        }



        //vector states encode
        Tensor encodedVectorStateActor  = null;
        Tensor encodedVectorStateCritic = null;

        if (inVectorstate != null)
        {
            var output = BuildSequentialLayers(actorHiddenLayers, inVectorstate, "ActorStateEncoder");
            encodedVectorStateActor = output.Item1;
            actorWeights.AddRange(output.Item2);
            output = BuildSequentialLayers(criticHiddenLayers, inVectorstate, "CriticStateEncoder");
            encodedVectorStateCritic = output.Item1;
            criticWeights.AddRange(output.Item2);
        }

        //concat all inputs
        Tensor encodedAllActor  = null;
        Tensor encodedAllCritic = null;

        if (inVisualState == null && inVectorstate != null)
        {
            encodedAllActor  = encodedVectorStateActor;
            encodedAllCritic = encodedVectorStateCritic;
        }
        else if (inVisualState != null && inVectorstate == null)
        {
            encodedAllActor  = encodedVisualActor;
            encodedAllCritic = encodedVisualCritic;
        }
        else if (inVisualState != null && inVectorstate != null)
        {
            //Debug.LogWarning("Tensorflow does not have gradient for concat operation in C yet. Please only use one type of observation if you need training.");
            encodedAllActor = Current.K.concat(new List <Tensor>()
            {
                encodedVectorStateActor, encodedVisualActor
            }, 1);
            encodedAllCritic = Current.K.concat(new List <Tensor>()
            {
                encodedVectorStateCritic, encodedVisualCritic
            }, 1);
        }


        //outputs
        var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale));

        outAction = actorOutput.Call(encodedAllActor)[0];
        if (actionSpace == SpaceType.discrete)
        {
            outAction = Current.K.softmax(outAction);
        }
        actorWeights.AddRange(actorOutput.weights);

        var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale));

        outValue = criticOutput.Call(encodedAllCritic)[0];
        criticWeights.AddRange(criticOutput.weights);

        //output variance. Currently not depending on the inputs for this simple network implementation
        if (actionSpace == SpaceType.continuous)
        {
            var logSigmaSq = Current.K.variable((new Constant(0)).Call(new int[] { outActionSize }, DataType.Float), name: "PPO.log_sigma_square");
            outVariance = Current.K.exp(logSigmaSq);
            actorWeights.Add(logSigmaSq);
        }
        else
        {
            outVariance = null;
        }
    }
示例#12
0
    public override ValueTuple <Tensor, Tensor> BuildNetwork(Tensor inVectorstate, List <Tensor> inVisualState, Tensor inMemery, int outActionSize, SpaceType actionSpace)
    {
        Debug.Assert(inMemery == null, "Currently recurrent input is not supported by SupervisedLearningNetworkSimple");
        Debug.Assert(!(inVectorstate == null && inVisualState == null), "Network need at least one vector observation or visual observation");


        weights = new List <Tensor>();

        //visual encoders
        Tensor encodedVisualActor = null;

        if (inVisualState != null)
        {
            List <Tensor> visualEncodedActor = new List <Tensor>();
            foreach (var v in inVisualState)
            {
                var ha = CreateVisualEncoder(v, hiddenLayers, "ActorVisualEncoder");
                visualEncodedActor.Add(ha);
            }
            if (inVisualState.Count > 1)
            {
                //Debug.LogError("Tensorflow does not have gradient for concat operation in C yet. Please only use one observation.");
                encodedVisualActor = Current.K.stack(visualEncodedActor, 1);
                encodedVisualActor = Current.K.batch_flatten(encodedVisualActor);
            }
            else
            {
                encodedVisualActor = visualEncodedActor[0];
            }
        }

        //vector states encode
        Tensor encodedVectorStateActor = null;

        if (inVectorstate != null)
        {
            var hiddens = BuildSequentialLayers(hiddenLayers, inVectorstate, "ActorStateEncoder");
            encodedVectorStateActor = hiddens.Item1;
            weights.AddRange(hiddens.Item2);
        }

        //concat all inputs
        Tensor encodedAllActor = null;

        if (inVisualState == null && inVectorstate != null)
        {
            encodedAllActor = encodedVectorStateActor;
        }
        else if (inVisualState != null && inVectorstate == null)
        {
            encodedAllActor = encodedVisualActor;
        }
        else if (inVisualState != null && inVectorstate != null)
        {
            //Debug.LogError("Tensorflow does not have gradient for concat operation in C yet. Please only use one observation.");
            encodedAllActor = Current.K.concat(new List <Tensor>()
            {
                encodedVectorStateActor, encodedVisualActor
            }, 1);
        }


        //outputs
        var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale));
        var outAction   = actorOutput.Call(encodedAllActor)[0];

        if (actionSpace == SpaceType.discrete)
        {
            outAction = Current.K.softmax(outAction);
        }

        weights.AddRange(actorOutput.weights);

        Tensor outVar = null;

        if (useVarianceForContinuousAction && actionSpace == SpaceType.continuous)
        {
            var logSigmaSq = new Dense(units: 1, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale));
            outVar = Current.K.exp(logSigmaSq.Call(encodedAllActor)[0]) + minStd * minStd;
            weights.AddRange(logSigmaSq.weights);
        }

        return(ValueTuple.Create(outAction, outVar));
    }
示例#13
0
    public void BuildNetwork(Tensor inVectorstateLowlevel, Tensor inVectorstateHighlevel, int outActionSize, SpaceType actionSpace,
                             out Tensor outAction, out Tensor outValue, out Tensor outVariance)
    {
        weightsLowlevel  = new List <Tensor>();
        weightsHighLevel = new List <Tensor>();


        //lowlevel encoder
        var    lowlevelEncoder = BuildSequentialLayers(inLowlevelLayers, inVectorstateLowlevel, "LowlevelEncoder");
        Tensor encodedLowlevel = lowlevelEncoder.Item1;

        weightsLowlevel.AddRange(lowlevelEncoder.Item2);



        //highlevel
        Tensor concatedStates = null;

        if (inVectorstateHighlevel != null)
        {
            concatedStates = Current.K.concat(new List <Tensor>()
            {
                encodedLowlevel, inVectorstateHighlevel
            }, 1);
        }
        else
        {
            concatedStates = encodedLowlevel;
        }

        var    highlevelEncoder = BuildSequentialLayers(actorHighlevelLayers, concatedStates, "ActorHighevelEncoder");
        Tensor outputHighlevel  = highlevelEncoder.Item1;

        weightsHighLevel.AddRange(highlevelEncoder.Item2);

        //lowlevel actor output
        var    actorFinal      = BuildSequentialLayers(actorLowlevelLayers, outputHighlevel, "ActorLowlevelOut");
        Tensor encodedAllActor = actorFinal.Item1;

        weightsLowlevel.AddRange(actorFinal.Item2);

        //highlevel value output
        var    valueFinal       = BuildSequentialLayers(valueHighlevelLayers, concatedStates, "ValueHighlevelOut");
        Tensor encodedAllCritic = valueFinal.Item1;

        weightsHighLevel.AddRange(valueFinal.Item2);

        //outputs
        using (Current.K.name_scope("ActorOutput"))
        {
            var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale));
            outAction = actorOutput.Call(encodedAllActor)[0];
            if (actionSpace == SpaceType.discrete)
            {
                outAction = Current.K.softmax(outAction);
            }

            weightsLowlevel.AddRange(actorOutput.weights);
        }

        using (Current.K.name_scope("CriticOutput"))
        {
            var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale));
            outValue = criticOutput.Call(encodedAllCritic)[0];
            weightsHighLevel.AddRange(criticOutput.weights);
        }
        //variance
        //actor network output variance
        if (actionSpace == SpaceType.continuous)
        {
            using (Current.K.name_scope("ActorVarianceOutput"))
            {
                logSigmaSq  = Current.K.variable((new Constant(0)).Call(new int[] { outActionSize }, DataType.Float), name: "PPO.log_sigma_square");
                outVariance = Current.K.exp(logSigmaSq);
                weightsHighLevel.Add(logSigmaSq);
            }
        }
        else
        {
            outVariance = null;
        }
    }