/// <summary> /// return the output tensor and list of weights /// </summary> /// <param name="x">input </param> /// <returns>(output tensor, list of weights)</returns> public ValueTuple <Tensor, List <Tensor> > Call(Tensor x) { var layer = new Dense(size, Activation.GetActivationFunction(activationFunction), useBias, kernel_initializer: new VarianceScaling(scale: initialScale)); var output = layer.Call(x)[0]; return(ValueTuple.Create(output, layer.weights)); }
protected void CreateCommonLayers(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, out Tensor outValue, out Tensor encodedAllActor, bool shareEncoder = false) { actorWeights = new List <Tensor>(); criticWeights = new List <Tensor>(); ValueTuple <Tensor, List <Tensor> > actorEncoded, criticEncoded; if (!shareEncoder) { actorEncoded = CreateObservationStream(inVectorObs, inVisualObs, inMemery, inPrevAction, "Actor"); criticEncoded = CreateObservationStream(inVectorObs, inVisualObs, inMemery, inPrevAction, "Critic"); } else { actorEncoded = CreateObservationStream(inVectorObs, inVisualObs, inMemery, inPrevAction, "ActorCritic"); criticEncoded = actorEncoded; } actorWeights.AddRange(actorEncoded.Item2); criticWeights.AddRange(criticEncoded.Item2); var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale)); outValue = criticOutput.Call(criticEncoded.Item1)[0]; criticWeights.AddRange(criticOutput.weights); encodedAllActor = actorEncoded.Item1; }
public override List <Tensor> BuildNetworkForDiscreteActionSpace(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, int[] outActionSizes) { Tensor encodedAllActor = CreateCommonLayers(inVectorObs, inVisualObs, inMemery, null); List <Tensor> policy_branches = new List <Tensor>(); foreach (var size in outActionSizes) { var tempOutput = new Dense(units: size, activation: null, use_bias: outputLayerBias, kernel_initializer: new VarianceScaling(scale: outputLayerInitialScale)); policy_branches.Add(tempOutput.Call(encodedAllActor)[0]); weights.AddRange(tempOutput.weights); } return(policy_branches); }
public override void BuildNetworkForContinuousActionSapce(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, int outActionSize, out Tensor outActionMean, out Tensor outValue, out Tensor outActionLogVariance) { Debug.Assert(inMemery == null, "Currently recurrent input is not supported by RLNetworkSimpleAC"); Debug.Assert(inPrevAction == null, "Currently previous action input is not supported by RLNetworkSimpleAC"); Debug.Assert(!(inVectorObs == null && inVisualObs == null), "Network need at least one vector observation or visual observation"); criticWeights = new List <Tensor>(); actorWeights = new List <Tensor>(); actorVarWeights = new List <Tensor>(); var actorMeanEncoded = CreateObservationStream(inVectorObs, actorHiddenLayers, inVisualObs, inMemery, inPrevAction, "ActorMean"); var actorVarEncoded = CreateObservationStream(inVectorObs, actorHiddenLayers, inVisualObs, inMemery, inPrevAction, "ActorVar"); var criticEncoded = CreateObservationStream(inVectorObs, criticHiddenLayers, inVisualObs, inMemery, inPrevAction, "Critic"); actorWeights.AddRange(actorMeanEncoded.Item2); actorVarWeights.AddRange(actorVarEncoded.Item2); criticWeights.AddRange(criticEncoded.Item2); //concat all inputs Tensor encodedAllActorMean = actorMeanEncoded.Item1; Tensor encodedAllActorVar = actorVarEncoded.Item1; Tensor encodedAllCritic = criticEncoded.Item1; //outputs //mean var actorOutputMean = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale)); outActionMean = actorOutputMean.Call(encodedAllActorMean)[0]; if (useSoftclipForMean) { outActionMean = SoftClip(outActionMean, minMean, maxMean); } actorWeights.AddRange(actorOutputMean.weights); //var var actorOutputVar = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale)); outActionLogVariance = actorOutputVar.Call(encodedAllActorVar)[0]; //outActionLogVariance = Current.K.exp(outActionLogVariance); actorVarWeights.AddRange(actorOutputVar.weights); //critic var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale)); outValue = criticOutput.Call(encodedAllCritic)[0]; criticWeights.AddRange(criticOutput.weights); }
public override void BuildNetworkForDiscreteActionSpace(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, int[] outActionSizes, out Tensor[] outActionLogits, out Tensor outValue) { Tensor encodedAllActor = null; CreateCommonLayers(inVectorObs, inVisualObs, inMemery, inPrevAction, out outValue, out encodedAllActor, shareEncoder); List <Tensor> policy_branches = new List <Tensor>(); foreach (var size in outActionSizes) { var tempOutput = new Dense(units: size, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale)); policy_branches.Add(tempOutput.Call(encodedAllActor)[0]); actorWeights.AddRange(tempOutput.weights); } outActionLogits = policy_branches.ToArray(); }
protected void CreateDiscriminators(Tensor inputExternal, Tensor inputFromGenerator, Tensor inputCondition, out Tensor discriminatorOutputExternal, out Tensor discriminatorOutputFromGenerator) { Tensor inputAllReal = null; List <Tensor> inputListExternal = null; List <Tensor> inputListFromGenerator = null; if (inputCondition != null) { inputListExternal = new List <Tensor>() { inputCondition, inputExternal }; inputListFromGenerator = new List <Tensor>() { inputCondition, inputFromGenerator }; inputAllReal = new Concat(1).Call(inputListExternal)[0]; } else { inputListFromGenerator = new List <Tensor>() { inputFromGenerator }; inputListExternal = new List <Tensor>() { inputExternal }; inputAllReal = inputExternal; } var beforeOutput = BuildSequentialLayers(discriminatorHiddenLayers, inputAllReal); var outputLayer = new Dense(1, new Sigmoid(), discriminatorOutputLayerBias, kernel_initializer: new GlorotUniform(scale: discriminatorOutputLayerInitialScale)); discriminatorOutputExternal = outputLayer.Call(beforeOutput.Item1)[0]; Model model = new Model(inputListExternal, new List <Tensor>() { discriminatorOutputExternal }); discriminatorOutputFromGenerator = model.Call(inputListFromGenerator)[0]; discriminatorWeights = model.weights; }
public override void BuildNetworkForContinuousActionSapce(Tensor inVectorObs, List <Tensor> inVisualObs, Tensor inMemery, Tensor inPrevAction, int outActionSize, out Tensor outActionMean, out Tensor outValue, out Tensor outActionLogVariance) { Tensor encodedAllActor = null; CreateCommonLayers(inVectorObs, inVisualObs, inMemery, inPrevAction, out outValue, out encodedAllActor, shareEncoder); //outputs var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale)); outActionMean = actorOutput.Call(encodedAllActor)[0]; actorWeights.AddRange(actorOutput.weights); var logSigmaSq = Current.K.variable((new Constant(0)).Call(new int[] { outActionSize }, DataType.Float), name: "PPO.log_sigma_square"); outActionLogVariance = logSigmaSq; actorWeights.Add(logSigmaSq); }
protected Tensor CreateGenerator(Tensor inputCondition, Tensor inputNoise, int outputSize) { Debug.Assert(inputCondition != null || inputNoise != null, "GAN needs at least one of input condition or input noise "); Tensor inputAll = null; List <Tensor> inputList = null; if (inputNoise != null && inputCondition != null) { inputList = new List <Tensor>() { inputCondition, inputNoise }; inputAll = new Concat(1).Call(inputList)[0]; } else if (inputNoise != null) { inputAll = inputNoise; inputList = new List <Tensor>() { inputNoise }; } else { inputAll = inputCondition; inputList = new List <Tensor>() { inputCondition }; } var beforeOutput = BuildSequentialLayers(generatorHiddenLayers, inputAll); var outputLayer = new Dense(outputSize, null, generatorOutputLayerBias, kernel_initializer: new GlorotUniform(scale: generatorOutputLayerInitialScale)); var output = outputLayer.Call(beforeOutput.Item1)[0]; generatorWeights = new List <Tensor>(); generatorWeights.AddRange(beforeOutput.Item2); generatorWeights.AddRange(outputLayer.weights); return(output); }
public void TestLayer() { var inputLayer = UnityTFUtils.Input(shape: new int?[] { 3 }); var dense1 = new Dense(10, new ReLU(), true); var dense2 = new Dense(1, new ReLU(), true); var target = UnityTFUtils.Input(shape: new int?[] { 1 }); var o = dense1.Call(inputLayer[0]); o = dense2.Call(o[0]); var lossM = new MeanSquareError(); lossM.Call(target[0], o[0]); ((UnityTFBackend)K).ExportGraphDef("SavedGraph/testLayer.pb"); }
public override ValueTuple <Tensor, Tensor> BuildNetworkForContinuousActionSapce(Tensor inVectorObservation, List <Tensor> inVisualObservation, Tensor inMemery, int outActionSize) { var encodedActor = CreateCommonLayers(inVectorObservation, inVisualObservation, inMemery, null); //outputs var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale)); var outAction = actorOutput.Call(encodedActor)[0]; weights.AddRange(actorOutput.weights); Tensor outVar = null; if (useVarianceForContinuousAction) { var logSigmaSq = new Dense(units: 1, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale)); outVar = Current.K.exp(logSigmaSq.Call(encodedActor)[0]) + minStd * minStd; weights.AddRange(logSigmaSq.weights); } return(ValueTuple.Create(outAction, outVar)); }
public override void BuildNetwork(Tensor inVectorstate, List <Tensor> inVisualState, Tensor inMemery, Tensor inPrevAction, int outActionSize, SpaceType actionSpace, out Tensor outAction, out Tensor outValue, out Tensor outVariance) { Debug.Assert(inMemery == null, "Currently recurrent input is not supported by RLNetworkSimpleAC"); Debug.Assert(inPrevAction == null, "Currently previous action input is not supported by RLNetworkSimpleAC"); Debug.Assert(!(inVectorstate == null && inVisualState == null), "Network need at least one vector observation or visual observation"); //Debug.Assert(actionSpace == SpaceType.continuous, "Only continuous action space is supported by RLNetworkSimpleAC"); criticWeights = new List <Tensor>(); actorWeights = new List <Tensor>(); //visual encoders Tensor encodedVisualActor = null; Tensor encodedVisualCritic = null; if (inVisualState != null) { List <Tensor> visualEncodedActor = new List <Tensor>(); List <Tensor> visualEncodedCritic = new List <Tensor>(); foreach (var v in inVisualState) { var ha = CreateVisualEncoder(v, actorHiddenLayers, "ActorVisualEncoder"); var hc = CreateVisualEncoder(v, criticHiddenLayers, "CriticVisualEncoder"); actorWeights.AddRange(ha.Item2); visualEncodedActor.Add(ha.Item1); criticWeights.AddRange(hc.Item2); visualEncodedCritic.Add(hc.Item1); } if (inVisualState.Count > 1) { //Debug.LogError("Tensorflow does not have gradient for concat operation in C yet. Please only use one observation."); encodedVisualActor = Current.K.stack(visualEncodedActor, 1); encodedVisualActor = Current.K.batch_flatten(encodedVisualActor); encodedVisualCritic = Current.K.stack(visualEncodedCritic, 1); encodedVisualCritic = Current.K.batch_flatten(encodedVisualCritic); } else { encodedVisualActor = visualEncodedActor[0]; encodedVisualCritic = visualEncodedCritic[0]; } } //vector states encode Tensor encodedVectorStateActor = null; Tensor encodedVectorStateCritic = null; if (inVectorstate != null) { var output = BuildSequentialLayers(actorHiddenLayers, inVectorstate, "ActorStateEncoder"); encodedVectorStateActor = output.Item1; actorWeights.AddRange(output.Item2); output = BuildSequentialLayers(criticHiddenLayers, inVectorstate, "CriticStateEncoder"); encodedVectorStateCritic = output.Item1; criticWeights.AddRange(output.Item2); } //concat all inputs Tensor encodedAllActor = null; Tensor encodedAllCritic = null; if (inVisualState == null && inVectorstate != null) { encodedAllActor = encodedVectorStateActor; encodedAllCritic = encodedVectorStateCritic; } else if (inVisualState != null && inVectorstate == null) { encodedAllActor = encodedVisualActor; encodedAllCritic = encodedVisualCritic; } else if (inVisualState != null && inVectorstate != null) { //Debug.LogWarning("Tensorflow does not have gradient for concat operation in C yet. Please only use one type of observation if you need training."); encodedAllActor = Current.K.concat(new List <Tensor>() { encodedVectorStateActor, encodedVisualActor }, 1); encodedAllCritic = Current.K.concat(new List <Tensor>() { encodedVectorStateCritic, encodedVisualCritic }, 1); } //outputs var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale)); outAction = actorOutput.Call(encodedAllActor)[0]; if (actionSpace == SpaceType.discrete) { outAction = Current.K.softmax(outAction); } actorWeights.AddRange(actorOutput.weights); var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale)); outValue = criticOutput.Call(encodedAllCritic)[0]; criticWeights.AddRange(criticOutput.weights); //output variance. Currently not depending on the inputs for this simple network implementation if (actionSpace == SpaceType.continuous) { var logSigmaSq = Current.K.variable((new Constant(0)).Call(new int[] { outActionSize }, DataType.Float), name: "PPO.log_sigma_square"); outVariance = Current.K.exp(logSigmaSq); actorWeights.Add(logSigmaSq); } else { outVariance = null; } }
public override ValueTuple <Tensor, Tensor> BuildNetwork(Tensor inVectorstate, List <Tensor> inVisualState, Tensor inMemery, int outActionSize, SpaceType actionSpace) { Debug.Assert(inMemery == null, "Currently recurrent input is not supported by SupervisedLearningNetworkSimple"); Debug.Assert(!(inVectorstate == null && inVisualState == null), "Network need at least one vector observation or visual observation"); weights = new List <Tensor>(); //visual encoders Tensor encodedVisualActor = null; if (inVisualState != null) { List <Tensor> visualEncodedActor = new List <Tensor>(); foreach (var v in inVisualState) { var ha = CreateVisualEncoder(v, hiddenLayers, "ActorVisualEncoder"); visualEncodedActor.Add(ha); } if (inVisualState.Count > 1) { //Debug.LogError("Tensorflow does not have gradient for concat operation in C yet. Please only use one observation."); encodedVisualActor = Current.K.stack(visualEncodedActor, 1); encodedVisualActor = Current.K.batch_flatten(encodedVisualActor); } else { encodedVisualActor = visualEncodedActor[0]; } } //vector states encode Tensor encodedVectorStateActor = null; if (inVectorstate != null) { var hiddens = BuildSequentialLayers(hiddenLayers, inVectorstate, "ActorStateEncoder"); encodedVectorStateActor = hiddens.Item1; weights.AddRange(hiddens.Item2); } //concat all inputs Tensor encodedAllActor = null; if (inVisualState == null && inVectorstate != null) { encodedAllActor = encodedVectorStateActor; } else if (inVisualState != null && inVectorstate == null) { encodedAllActor = encodedVisualActor; } else if (inVisualState != null && inVectorstate != null) { //Debug.LogError("Tensorflow does not have gradient for concat operation in C yet. Please only use one observation."); encodedAllActor = Current.K.concat(new List <Tensor>() { encodedVectorStateActor, encodedVisualActor }, 1); } //outputs var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale)); var outAction = actorOutput.Call(encodedAllActor)[0]; if (actionSpace == SpaceType.discrete) { outAction = Current.K.softmax(outAction); } weights.AddRange(actorOutput.weights); Tensor outVar = null; if (useVarianceForContinuousAction && actionSpace == SpaceType.continuous) { var logSigmaSq = new Dense(units: 1, activation: null, use_bias: outputLayerBias, kernel_initializer: new GlorotUniform(scale: outputLayerInitialScale)); outVar = Current.K.exp(logSigmaSq.Call(encodedAllActor)[0]) + minStd * minStd; weights.AddRange(logSigmaSq.weights); } return(ValueTuple.Create(outAction, outVar)); }
public void BuildNetwork(Tensor inVectorstateLowlevel, Tensor inVectorstateHighlevel, int outActionSize, SpaceType actionSpace, out Tensor outAction, out Tensor outValue, out Tensor outVariance) { weightsLowlevel = new List <Tensor>(); weightsHighLevel = new List <Tensor>(); //lowlevel encoder var lowlevelEncoder = BuildSequentialLayers(inLowlevelLayers, inVectorstateLowlevel, "LowlevelEncoder"); Tensor encodedLowlevel = lowlevelEncoder.Item1; weightsLowlevel.AddRange(lowlevelEncoder.Item2); //highlevel Tensor concatedStates = null; if (inVectorstateHighlevel != null) { concatedStates = Current.K.concat(new List <Tensor>() { encodedLowlevel, inVectorstateHighlevel }, 1); } else { concatedStates = encodedLowlevel; } var highlevelEncoder = BuildSequentialLayers(actorHighlevelLayers, concatedStates, "ActorHighevelEncoder"); Tensor outputHighlevel = highlevelEncoder.Item1; weightsHighLevel.AddRange(highlevelEncoder.Item2); //lowlevel actor output var actorFinal = BuildSequentialLayers(actorLowlevelLayers, outputHighlevel, "ActorLowlevelOut"); Tensor encodedAllActor = actorFinal.Item1; weightsLowlevel.AddRange(actorFinal.Item2); //highlevel value output var valueFinal = BuildSequentialLayers(valueHighlevelLayers, concatedStates, "ValueHighlevelOut"); Tensor encodedAllCritic = valueFinal.Item1; weightsHighLevel.AddRange(valueFinal.Item2); //outputs using (Current.K.name_scope("ActorOutput")) { var actorOutput = new Dense(units: outActionSize, activation: null, use_bias: actorOutputLayerBias, kernel_initializer: new VarianceScaling(scale: actorOutputLayerInitialScale)); outAction = actorOutput.Call(encodedAllActor)[0]; if (actionSpace == SpaceType.discrete) { outAction = Current.K.softmax(outAction); } weightsLowlevel.AddRange(actorOutput.weights); } using (Current.K.name_scope("CriticOutput")) { var criticOutput = new Dense(units: 1, activation: null, use_bias: criticOutputLayerBias, kernel_initializer: new GlorotUniform(scale: criticOutputLayerInitialScale)); outValue = criticOutput.Call(encodedAllCritic)[0]; weightsHighLevel.AddRange(criticOutput.weights); } //variance //actor network output variance if (actionSpace == SpaceType.continuous) { using (Current.K.name_scope("ActorVarianceOutput")) { logSigmaSq = Current.K.variable((new Constant(0)).Call(new int[] { outActionSize }, DataType.Float), name: "PPO.log_sigma_square"); outVariance = Current.K.exp(logSigmaSq); weightsHighLevel.Add(logSigmaSq); } } else { outVariance = null; } }