Esempio n. 1
0
    protected void CreatePPOOptimizer(TrainerParamsPPO trainingParams, Tensor entropy, Tensor actionLogProb, Tensor outputValueFromNetwork, List <Tensor> extraInputTensors, List <Tensor> weightsToUpdate)
    {
        ClipEpsilon       = trainingParams.clipEpsilon;
        ValueLossWeight   = trainingParams.valueLossWeight;
        EntropyLossWeight = trainingParams.entropyLossWeight;
        ClipValueLoss     = trainingParams.clipValueLoss;


        var inputOldLogProb  = UnityTFUtils.Input(new int?[] { ActionSpace == SpaceType.continuous ? ActionSizes[0] : ActionSizes.Length }, name: "InputOldLogProb")[0];
        var inputAdvantage   = UnityTFUtils.Input(new int?[] { 1 }, name: "InputAdvantage")[0];
        var inputTargetValue = UnityTFUtils.Input(new int?[] { 1 }, name: "InputTargetValue")[0];
        var inputOldValue    = UnityTFUtils.Input(new int?[] { 1 }, name: "InputOldValue")[0];

        var inputClipEpsilon       = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "ClipEpsilon", dtype: DataType.Float)[0];
        var inputClipValueLoss     = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "ClipValueLoss", dtype: DataType.Float)[0];
        var inputValuelossWeight   = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "ValueLossWeight", dtype: DataType.Float)[0];
        var inputEntropyLossWeight = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "EntropyLossWeight", dtype: DataType.Float)[0];


        // value loss
        Tensor outputValueLoss = null;

        using (K.name_scope("ValueLoss"))
        {
            var clippedValueEstimate = inputOldValue + K.clip(outputValueFromNetwork - inputOldValue, 0.0f - inputClipValueLoss, inputClipValueLoss);
            var valueLoss1           = new MeanSquareError().Call(outputValueFromNetwork, inputTargetValue);
            var valueLoss2           = new MeanSquareError().Call(clippedValueEstimate, inputTargetValue);
            outputValueLoss = K.mean(K.maximum(valueLoss1, valueLoss2));
        }
        //var outputValueLoss = K.mean(valueLoss1);

        // Clipped Surrogate loss
        Tensor outputPolicyLoss;

        using (K.name_scope("ClippedCurreogateLoss"))
        {
            //Debug.LogWarning("testnew");
            //var probStopGradient = K.stop_gradient(actionProb);
            var probRatio = K.exp(actionLogProb - inputOldLogProb);
            var p_opt_a   = probRatio * inputAdvantage;
            var p_opt_b   = K.clip(probRatio, 1.0f - inputClipEpsilon, 1.0f + inputClipEpsilon) * inputAdvantage;

            outputPolicyLoss = (-1f) * K.mean(K.mean(K.minimun(p_opt_a, p_opt_b)), name: "ClippedCurreogateLoss");
        }
        //final weighted loss
        var outputLoss = outputPolicyLoss + inputValuelossWeight * outputValueLoss;

        outputLoss = outputLoss - inputEntropyLossWeight * entropy;
        outputLoss = K.identity(outputLoss, "OutputLoss");

        //add inputs, outputs and parameters to the list
        List <Tensor> allInputs = new List <Tensor>();

        allInputs.Add(inputOldLogProb);
        allInputs.Add(inputTargetValue);
        allInputs.Add(inputOldValue);
        allInputs.Add(inputAdvantage);
        allInputs.Add(inputClipEpsilon);
        allInputs.Add(inputClipValueLoss);
        allInputs.Add(inputValuelossWeight);
        allInputs.Add(inputEntropyLossWeight);

        allInputs.AddRange(extraInputTensors);

        //create optimizer and create necessary functions
        var updates = AddOptimizer(weightsToUpdate, outputLoss, optimizer);

        UpdatePPOFunction = K.function(allInputs, new List <Tensor> {
            outputLoss, outputValueLoss, outputPolicyLoss, entropy
        }, updates, "UpdateFunction");
    }
    protected void InitializePPOStructureDiscreteAction(Tensor vectorObs, Tensor normalizedVectorObs, List <Tensor> visualObs, TrainerParams trainerParams)
    {
        //all inputs list
        List <Tensor> allObservationInputs = new List <Tensor>();

        if (HasVectorObservation)
        {
            allObservationInputs.Add(vectorObs);
        }
        if (HasVisualObservation)
        {
            allObservationInputs.AddRange(visualObs);
        }

        Tensor[] outputActionsLogits = null; Tensor outputValue = null;
        network.BuildNetworkForDiscreteActionSpace(normalizedVectorObs, visualObs, null, null, ActionSizes, out outputActionsLogits, out outputValue);

        ValueFunction = K.function(allObservationInputs, new List <Tensor> {
            outputValue
        }, null, "ValueFunction");

        //the action masks input placeholders
        List <Tensor> actionMasksInputs = new List <Tensor>();

        for (int i = 0; i < ActionSizes.Length; ++i)
        {
            actionMasksInputs.Add(UnityTFUtils.Input(new int?[] { ActionSizes[i] }, name: "AcionMask" + i)[0]);
        }

        Tensor[] outputActions, outputNormalizedLogits;
        CreateDiscreteActionMaskingLayer(outputActionsLogits, actionMasksInputs.ToArray(), out outputActions, out outputNormalizedLogits);

        //output tensors for discrete actions. Includes all action selected actions and the normalized logits of all actions
        var outputDiscreteActions = new List <Tensor>();

        outputDiscreteActions.Add(K.identity(K.cast(ActionSizes.Length == 1 ? outputActions[0] : K.concat(outputActions.ToList(), 1), DataType.Float), "OutputAction"));
        outputDiscreteActions.AddRange(outputNormalizedLogits);
        var actionFunctionInputs = new List <Tensor>();

        actionFunctionInputs.AddRange(allObservationInputs); actionFunctionInputs.AddRange(actionMasksInputs);
        PolicyFunction = K.function(actionFunctionInputs, outputDiscreteActions, null, "ActionFunction");


        TrainerParamsPPO trainingParams = trainerParams as TrainerParamsPPO;

        if (trainingParams != null)
        {
            // action probability from input action
            Tensor        outputEntropy;
            List <Tensor> inputActionsDiscreteSeperated = null, onehotInputActions = null;    //for discrete action space

            Tensor inputAction   = UnityTFUtils.Input(new int?[] { ActionSizes.Length }, name: "InputActions", dtype: DataType.Int32)[0];

            //split the input for each discrete branch
            var splits           = new int[ActionSizes.Length];
            for (int i = 0; i < splits.Length; ++i)
            {
                splits[i] = 1;
            }
            inputActionsDiscreteSeperated = K.split(inputAction, K.constant(splits, dtype: DataType.Int32), K.constant(1, dtype: DataType.Int32), ActionSizes.Length);

            Tensor actionLogProb = null;
            using (K.name_scope("ActionProbAndEntropy"))
            {
                onehotInputActions = inputActionsDiscreteSeperated.Select((x, i) => K.reshape(K.one_hot(x, K.constant <int>(ActionSizes[i], dtype: DataType.Int32), K.constant(1.0f), K.constant(0.0f)), new int[] { -1, ActionSizes[i] })).ToList();

                //entropy
                var entropies = outputActionsLogits.Select((t) => { return(K.mean((-1.0f) * K.sum(K.softmax(t) * K.log(K.softmax(t) + 0.00000001f), axis: 1), 0)); });
                outputEntropy = entropies.Aggregate((x, y) => { return(x + y); });

                //probabilities
                var actionProbsArray = ActionSizes.Select((x, i) => { return(K.sum(outputNormalizedLogits[i] * onehotInputActions[i], 1, true)); }).ToList();
                //actionLogProb = K.reshape(K.sum(K.log(outputActionFromNetwork) * onehotInputAction, 1), new int[] { -1, 1 });
                actionLogProb = ActionSizes.Length == 1 ? actionProbsArray[0] : K.concat(actionProbsArray, 1);
            }

            List <Tensor> extraPolicyInputs = new List <Tensor>();
            extraPolicyInputs.AddRange(actionFunctionInputs);
            extraPolicyInputs.Add(inputAction);
            CreatePPOOptimizer(trainingParams, outputEntropy, actionLogProb, outputValue, allObservationInputs, network.GetCriticWeights(), extraPolicyInputs, network.GetActorWeights());
        }
    }
Esempio n. 3
0
        /// <summary>
        /// Adds a layer instance on top of the layer stack.
        /// </summary>
        ///
        /// <param name="layer">The layer.</param>
        ///
        public void Add(Layer layer)
        {
            if (outputs.Count == 0)
            {
                // first layer in model: check that it is an input layer
                if (layer.inbound_nodes.Count == 0)
                {
                    // create an input layer
                    if (layer.batch_input_shape == null)
                    {
                        throw new Exception("The first layer in a Sequential model must get an 'input_shape' or 'batch_input_shape' argument.");
                    }

                    // Instantiate the input layer.
                    var x = UnityTFUtils.Input(batch_shape: layer.batch_input_shape, dtype: layer.dtype, name: $"{layer.name}_input");

                    //Debug.Assert(x[0]._keras_history.Value.layer.GetType() == typeof(InputLayer));
                    Debug.Assert(x[0]._keras_history.Value.Item1.GetType() == typeof(InputLayer));

                    // This will build the current layer and create the node connecting
                    // the current layer to the input layer we just created.
                    layer.Call(x);

                    //Debug.Assert(x[0]._keras_history.Value.layer.GetType() == typeof(InputLayer));
                    Debug.Assert(x[0]._keras_history.Value.Item1.GetType() == typeof(InputLayer));
                }


                if (layer.inbound_nodes.Count != 1)
                {
                    throw new Exception($"A layer added to a Sequential model must not already be connected somewhere else. Model received layer '{layer.name}' which has {layer.inbound_nodes.Count} pre-existing inbound connections.");
                }

                if (layer.inbound_nodes[0].output_tensors.Count != 1)
                {
                    throw new Exception("All layers in a Sequential model should have a single output tensor. For multi-output layers, use the functional API.");
                }

                this.outputs = new List <Tensor> {
                    layer.inbound_nodes[0].output_tensors[0]
                };
                this.inputs = base.get_source_inputs(this.outputs[0]);

                // We create an input node, which we will keep updated
                // as we add more layers
                var node = new Node(outbound_layer: this,
                                    inbound_layers: new List <Layer>(),
                                    node_indices: new List <int?>(),
                                    tensor_indices: new List <int?>(),
                                    input_tensors: this.inputs,
                                    output_tensors: this.outputs,
                                    // no model-level masking for now
                                    input_masks: this.inputs.Select(x => (Tensor)null).ToList(),
                                    output_masks: new List <Tensor>()
                {
                    null
                },
                                    input_shapes: this.inputs.Select(x => x._keras_shape).ToList(),
                                    output_shapes: this.outputs.Select(x => x._keras_shape).ToList()
                                    );
            }
            else
            {
                List <Tensor> output_tensor = layer.Call(this.outputs);
                if (output_tensor.Count > 1)
                {
                    throw new Exception("All layers in a Sequential model should have a single output tensor. For multi-output layers, use the functional API.");
                }

                this.outputs = output_tensor;

                // update this.inbound_nodes
                this.inbound_nodes[0].output_tensors = this.outputs;
                this.inbound_nodes[0].output_shapes  = new List <int?[]> {
                    this.outputs[0]._keras_shape
                };
            }

            this.layers.Add(layer);
            this.built = false;
        }
    /// <summary>
    /// Initialize the GAN model based on the current value fields, without considering the MLAgent stuff.
    /// </summary>
    public void Initialize(bool enableTraining = true)
    {
        Debug.Assert(Initialized == false, "model already initialized");

        HasNoiseInput      = inputNoiseShape != null && inputNoiseShape.Length > 0;
        HasConditionInput  = inputConditionShape != null && inputConditionShape.Length > 0;
        HasGeneratorL2Loss = hasGeneratorL2Loss;


        //create generator input tensors
        Tensor inputCondition = null;

        if (HasConditionInput)
        {
            inputCondition = UnityTFUtils.Input(inputConditionShape.Select((t) => (int?)t).ToArray(), name: "InputConditoin")[0];
        }
        Tensor inputNoise = null;

        if (HasNoiseInput)
        {
            inputNoise = UnityTFUtils.Input(inputNoiseShape.Select((t) => (int?)t).ToArray(), name: "InputNoise")[0];
        }

        Debug.Assert(HasNoiseInput || HasConditionInput, "GAN needs at least one of noise or condition input");

        Tensor inputTargetToJudge = UnityTFUtils.Input(outputShape.Select((t) => (int?)t).ToArray(), name: "InputTargetToJudge")[0];

        //build the network
        Tensor generatorOutput, disOutForGenerator, dicOutTarget;

        network.BuildNetwork(inputCondition, inputNoise, inputTargetToJudge, outputShape, out generatorOutput, out dicOutTarget, out disOutForGenerator);

        //build the loss
        //generator gan loss
        Tensor genGANLoss = K.constant(0.0f, new int[] { }, DataType.Float) - K.mean(K.binary_crossentropy(disOutForGenerator, K.constant(0.0f, new int[] { }, DataType.Float), false), new int[] { 0, 1 });
        Tensor genLoss    = genGANLoss;
        //generator l2Loss if use it
        Tensor l2Loss = null;
        Tensor inputGeneratorTarget = null;
        Tensor inputL2LossWeight    = null;

        if (hasGeneratorL2Loss)
        {
            inputL2LossWeight    = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "l2LossWeight", dtype: DataType.Float)[0];
            inputGeneratorTarget = UnityTFUtils.Input(outputShape.Select((t) => (int?)t).ToArray(), name: "GeneratorTarget")[0];

            int[] reduceDim = new int[outputShape.Length];
            for (int i = 0; i < reduceDim.Length; ++i)
            {
                reduceDim[i] = i;
            }
            l2Loss  = K.mul(inputL2LossWeight, K.mean(new MeanSquareError().Call(inputGeneratorTarget, generatorOutput), reduceDim));
            genLoss = genGANLoss + l2Loss;
        }

        //discriminator loss
        inputCorrectLabel = UnityTFUtils.Input(new int?[] { 1 }, name: "InputCorrectLabel")[0];
        Tensor discLoss = K.mean(K.binary_crossentropy(dicOutTarget, inputCorrectLabel, false), new int[] { 0, 1 });



        //create the Functions inputs
        List <Tensor> generatorTrainInputs     = new List <Tensor>();
        List <Tensor> generateInputs           = new List <Tensor>();
        List <Tensor> discriminatorTrainInputs = new List <Tensor>();

        discriminatorTrainInputs.Add(inputTargetToJudge);
        discriminatorTrainInputs.Add(inputCorrectLabel);
        if (HasConditionInput)
        {
            generatorTrainInputs.Add(inputCondition);
            generateInputs.Add(inputCondition);
            discriminatorTrainInputs.Add(inputCondition);
        }
        if (HasNoiseInput)
        {
            generatorTrainInputs.Add(inputNoise);
            generateInputs.Add(inputNoise);
        }
        if (hasGeneratorL2Loss)
        {
            generatorTrainInputs.Add(inputGeneratorTarget);
            generatorTrainInputs.Add(inputL2LossWeight);
        }

        //create optimizers
        if (enableTraining)
        {
            var generatorUpdate = AddOptimizer(network.GetGeneratorWeights(), genLoss, generatorOptimizer);
            trainGeneratorFunction = K.function(generatorTrainInputs, new List <Tensor> {
                genLoss
            }, generatorUpdate, "GeneratorUpdateFunction");

            var discriminatorUpdate = AddOptimizer(network.GetDiscriminatorWeights(), discLoss, discriminatorOptimizer);
            trainDiscriminatorFunction = K.function(discriminatorTrainInputs, new List <Tensor> {
                discLoss
            }, discriminatorUpdate, "DiscriminatorUpdateFunction");
        }
        generateFunction = K.function(generateInputs, new List <Tensor> {
            generatorOutput
        }, null, "GenerateFunction");

        //create functoin for training with prediction method
        CreateTrainWithPredictionFunctions();

        Initialized     = true;
        TrainingEnabled = enableTraining;
    }
Esempio n. 5
0
    /// <summary>
    /// Initialize the model for PPO
    /// </summary>
    /// <param name="trainerParams"></param>
    /// <param name="stateTensor"></param>
    /// <param name="inputVisualTensors"></param>
    /// <param name="outputValueFromNetwork"></param>
    /// <param name="outputActionFromNetwork"></param>
    /// <param name="outputVarianceFromNetwork"></param>
    protected void InitializePPOCMAStructures(TrainerParams trainerParams, Tensor stateTensor, List <Tensor> inputVisualTensors, Tensor outputValueFromNetwork, Tensor outputActionMeanFromNetwork, Tensor outActionLogVarianceFromNetwork, List <Tensor> valueWeights, List <Tensor> meanWeights, List <Tensor> varweights)
    {
        List <Tensor> allobservationInputs = new List <Tensor>();

        if (HasVectorObservation)
        {
            allobservationInputs.Add(stateTensor);
        }
        if (HasVisualObservation)
        {
            allobservationInputs.AddRange(inputVisualTensors);
        }

        ValueFunction = K.function(allobservationInputs, new List <Tensor> {
            outputValueFromNetwork
        }, null, "ValueFunction");

        Tensor outputActualAction = null;
        Tensor outputVariance     = K.exp(outActionLogVarianceFromNetwork);

        using (K.name_scope("SampleAction"))
        {
            outputActualAction = K.standard_normal(K.shape(outputActionMeanFromNetwork), DataType.Float) * K.sqrt(outputVariance) + outputActionMeanFromNetwork;
        }

        ActionFunction = K.function(allobservationInputs, new List <Tensor> {
            outputActualAction, outputActionMeanFromNetwork, outputVariance
        }, null, "ActionFunction");

        TrainerParamsPPO trainingParams = trainerParams as TrainerParamsPPO;

        if (trainingParams != null)
        {
            //training needed inputs
            var inputOldAction   = UnityTFUtils.Input(new int?[] { ActionSizes[0] }, name: "InputOldAction")[0];
            var inputAdvantage   = UnityTFUtils.Input(new int?[] { 1 }, name: "InputAdvantage")[0];
            var inputTargetValue = UnityTFUtils.Input(new int?[] { 1 }, name: "InputTargetValue")[0];
            var inputOldValue    = UnityTFUtils.Input(new int?[] { 1 }, name: "InputOldValue")[0];

            //var inputClipEpsilon = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "ClipEpsilon", dtype: DataType.Float)[0];

            var inputClipEpsilonValue = UnityTFUtils.Input(batch_shape: new int?[] { }, name: "ClipEpsilonValue", dtype: DataType.Float)[0];
            // value loss
            Tensor outputValueLoss = null;
            using (K.name_scope("ValueLoss"))
            {
                var clippedValueEstimate = inputOldValue + K.clip(outputValueFromNetwork - inputOldValue, 0.0f - inputClipEpsilonValue, inputClipEpsilonValue);
                var valueLoss1           = new MeanSquareError().Call(outputValueFromNetwork, inputTargetValue);
                var valueLoss2           = new MeanSquareError().Call(clippedValueEstimate, inputTargetValue);
                outputValueLoss = K.mean(K.maximum(valueLoss1, valueLoss2));
                outputValueLoss = K.mean(valueLoss1);
            }

            var           valueUpdates = AddOptimizer(valueWeights, outputValueLoss, optimizerValue);
            List <Tensor> valueInputs  = new List <Tensor>();
            if (HasVectorObservation)
            {
                valueInputs.Add(stateTensor);
            }
            if (HasVisualObservation)
            {
                valueInputs.AddRange(inputVisualTensors);
            }
            valueInputs.Add(inputOldValue);
            valueInputs.Add(inputTargetValue);
            valueInputs.Add(inputClipEpsilonValue);
            TrainValueFunction = K.function(valueInputs, new List <Tensor> {
                outputValueLoss
            }, valueUpdates, "TrainValueFunction");

            // actor losses
            Tensor meanLoss, varLoss;
            using (K.name_scope("ActorLosses"))
            {
                Tensor posAdvantage;
                if (usePositiveAdvOnly)
                {
                    posAdvantage = K.identity(K.relu(K.mean(inputAdvantage)), "ClipedPositiveAdv");
                }
                else
                {
                    posAdvantage = K.identity(K.mean(inputAdvantage), "Adv");
                }
                var meanNoGrad   = K.stop_gradient(outputActionMeanFromNetwork, "MeanNoGrad");
                var varNoGrad    = K.stop_gradient(outputVariance, "VarNoGrad");
                var logVar       = outActionLogVarianceFromNetwork;
                var logVarNoGrad = K.stop_gradient(logVar, "LogVarNoGrad");
                using (K.name_scope("VarLoss"))
                {
                    var logpNoMeanGrad = -1.0f * K.sum(0.5f * K.square(inputOldAction - meanNoGrad) / outputVariance + 0.5f * logVar, 1);
                    varLoss = K.identity(-1.0f * K.mean(posAdvantage * logpNoMeanGrad), "VarLoss");
                }
                using (K.name_scope("MeanLoss"))
                {
                    var logpNoVarGrad = -1.0f * K.sum(0.5f * K.square(inputOldAction - outputActionMeanFromNetwork) / varNoGrad + 0.5f * logVarNoGrad, 1);
                    meanLoss = K.identity(-1.0f * K.mean(posAdvantage * logpNoVarGrad), "MeanLoss");
                }
            }

            //add inputs, outputs and parameters to the list
            List <Tensor> allInputs = new List <Tensor>();
            if (HasVectorObservation)
            {
                allInputs.Add(stateTensor);
            }
            if (HasVisualObservation)
            {
                allInputs.AddRange(inputVisualTensors);
            }
            allInputs.Add(inputOldAction);
            allInputs.Add(inputAdvantage);


            //create optimizer and create necessary functions
            var updatesMean = AddOptimizer(meanWeights, meanLoss, optimizerMean);
            var updatesVar  = AddOptimizer(varweights, varLoss, optimizerVariance);

            TrainMeanFunction = K.function(allInputs, new List <Tensor> {
                meanLoss
            }, updatesMean, "UpdateMeanFunction");
            TrainVarianceFunction = K.function(allInputs, new List <Tensor> {
                varLoss
            }, updatesVar, "UpdateMeanFunction");

            //pretraining for output mean and var
            var inputInitialStd  = UnityTFUtils.Input(new int?[] { ActionSizes[0] }, name: "InputInitialStd")[0];
            var inputInitialMean = UnityTFUtils.Input(new int?[] { ActionSizes[0] }, name: "InputInitialMean")[0];
            var policyInitLoss   = K.mean(K.mean(K.square(inputInitialMean - outputActionMeanFromNetwork)));
            policyInitLoss += K.mean(K.mean(K.square(inputInitialStd - K.sqrt(outputVariance))));

            var updatesPretrain = AddOptimizer(network.GetActorWeights(), policyInitLoss, optimizerPretrain);
            var pretrainInputs  = new List <Tensor>();
            pretrainInputs.Add(stateTensor);
            pretrainInputs.Add(inputInitialMean);
            pretrainInputs.Add(inputInitialStd);
            PretrainFunction = K.function(pretrainInputs, new List <Tensor> {
                policyInitLoss
            }, updatesPretrain, "PretrainFunction");
        }
    }
Esempio n. 6
0
    public override void InitializeInner(BrainParameters brainParameters, Tensor inputStateTensor, List <Tensor> inputVisualTensors, TrainerParams trainerParams)
    {
        //build the network
        Debug.Assert(ActionSizes.Length <= 1, "Action branching is not supported yet");
        var    networkOutputs = network.BuildNetwork(inputStateTensor, inputVisualTensors, null, ActionSizes[0], ActionSpace);
        Tensor outputAction   = networkOutputs.Item1;
        Tensor outputVar      = networkOutputs.Item2;

        hasVariance = outputVar != null && brainParameters.vectorActionSpaceType == SpaceType.continuous;

        List <Tensor> observationInputs = new List <Tensor>();

        if (HasVectorObservation)
        {
            observationInputs.Add(inputStateTensor);
        }
        if (HasVisualObservation)
        {
            observationInputs.AddRange(inputVisualTensors);
        }
        if (hasVariance)
        {
            ActionFunction = K.function(observationInputs, new List <Tensor> {
                outputAction, outputVar
            }, null, "ActionFunction");
        }
        else
        {
            ActionFunction = K.function(observationInputs, new List <Tensor> {
                outputAction
            }, null, "ActionFunction");
        }

        //build the parts for training
        TrainerParamsMimic trainingParams = trainerParams as TrainerParamsMimic;

        if (trainerParams != null && trainingParams == null)
        {
            Debug.LogError("Trainer params for Supervised learning mode needs to be a TrainerParamsMimic type");
        }
        if (trainingParams != null)
        {
            //training inputs
            var inputActionLabel = UnityTFUtils.Input(new int?[] { ActionSpace == SpaceType.continuous ? ActionSizes[0] : 1 }, name: "InputAction", dtype: ActionSpace == SpaceType.continuous ? DataType.Float : DataType.Int32)[0];
            //creat the loss
            Tensor loss = null;
            if (ActionSpace == SpaceType.discrete)
            {
                Tensor actionOnehot   = K.one_hot(inputActionLabel, K.constant(ActionSizes, dtype: DataType.Int32), K.constant(1.0f), K.constant(0.0f));
                Tensor reshapedOnehot = K.reshape(actionOnehot, new int[] { -1, ActionSizes[0] });
                loss = K.mean(K.categorical_crossentropy(reshapedOnehot, outputAction, false));
            }
            else
            {
                if (hasVariance)
                {
                    loss = K.mean(K.mean(0.5 * K.square(inputActionLabel - outputAction) / outputVar + 0.5 * K.log(outputVar)));
                }
                else
                {
                    loss = K.mean(new MeanSquareError().Call(inputActionLabel, outputAction));
                }
            }
            //add inputs, outputs and parameters to the list
            List <Tensor> updateParameters = network.GetWeights();
            List <Tensor> allInputs        = new List <Tensor>();


            if (HasVectorObservation)
            {
                allInputs.Add(inputStateTensor);
                observationInputs.Add(inputStateTensor);
            }
            if (HasVisualObservation)
            {
                allInputs.AddRange(inputVisualTensors);
                observationInputs.AddRange(inputVisualTensors);
            }
            allInputs.Add(inputActionLabel);

            //create optimizer and create necessary functions
            var updates = AddOptimizer(updateParameters, loss, optimizer);
            UpdateFunction = K.function(allInputs, new List <Tensor> {
                loss
            }, updates, "UpdateFunction");
        }
    }
Esempio n. 7
0
        public override List <Tensor> Call(List <Array> inputs)
        {
            var feed_dict = new Dictionary <Tensor, Array>();

            if (this.inputs != null && this.inputs.Count > 0)
            {
                foreach (var tuple in Enumerable.Zip(this.inputs, inputs, (a, b) => Tuple.Create(a, b)))
                {
                    // if (is_sparse(tensor))
                    // {
                    //     sparse_coo = value.tocoo()
                    //     indices = np.concatenate((np.expand_dims(sparse_coo.row, 1),
                    //                               np.expand_dims(sparse_coo.col, 1)), 1)
                    //     value = (indices, sparse_coo.data, sparse_coo.shape)
                    // }
                    feed_dict[tuple.Item1] = tuple.Item2;
                }
            }
            var session = backend.Session;

            var init = graph.GetGlobalVariablesInitializer();

            if (init.Length > 0)
            {
                Debug.Log("Initializing variables in function" + name + " call.");
                foreach (var op in init)
                {
                    //Debug.Log(" - " + op.Name);
                    session.Run(new TFOutput[0], new TFTensor[0], new TFOutput[0], new[] { op });
                }
                Debug.Log("Initializing variables in function" + name + " done.");
                //Debug.Log("Operations:");
                //foreach (var op in graph.GetEnumerator())
                //   Debug.Log(" - " + op.Name);
            }

            //Console.WriteLine("Before:");
            //PrintVariables(feed_dict, session);
            // Console.ReadKey();

            var runner = session.GetRunner();

            if (this.outputs != null)
            {
                foreach (var o in this.outputs)
                {
                    runner.Fetch(backend.In(o).Output);
                }
            }

            if (this.updates_op != null)
            {
                foreach (var op in this.updates_op)
                {
                    runner.AddTarget(op);
                }
            }


            List <TFTensor> tensors = new List <TFTensor>();

            foreach (KeyValuePair <Tensor, Array> pair in feed_dict)
            {
                UnityTFTensor t = backend.In(pair.Key);

                //get the shape based on the tensor and input data length
                TFTensor data = null;
                if (t.TF_Shape == null || t.TF_Shape.Length == 0)
                {
                    Debug.Assert(pair.Value.Length == 1, "Input tensor is a  scalar but feed data has more than 1 data");
                    data = UnityTFUtils.TFTensorFromT(pair.Value);
                }
                else
                {
                    long[] actualShape = t.TF_Shape.Copy();
                    int    totalLength = Mathf.Abs((int)actualShape.Aggregate((s, n) => n * s));

                    int indexOfBatch = actualShape.IndexOf(-1);
                    if (indexOfBatch >= 0)
                    {
                        actualShape[indexOfBatch] = pair.Value.Length / totalLength;
                    }
                    Debug.Assert(totalLength <= pair.Value.Length, "Feed array does not have enough data");

                    //Debug.Log("totalLength:"+totalLength + "  Shape:" + string.Join(",", actualShape));

                    //TFTensor data = TFTensor.FromBuffer(new TFShape(actualShape), (dynamic)pair.Value, 0, totalLength *(pair.Value.Length / totalLength));
                    data = UnityTFUtils.TFTensorFromArray(pair.Value, new TFShape(actualShape));
                }
                tensors.Add(data);
                runner.AddInput(t.Output, data);
            }



            var updated = runner.Run();

            foreach (var d in tensors)
            {
                d.Dispose();
            }
            //Console.WriteLine();

            //foreach (var v in updated)
            //{
            //    object obj = v.GetValue();
            //    if (obj is float[,])
            //        Console.WriteLine((obj as float[,]).ToCSharp());
            //    else if (obj is float[])
            //        Console.WriteLine((obj as float[]).ToCSharp());
            //    else
            //        Console.WriteLine(obj);
            //}

            //Console.WriteLine();
            //Console.WriteLine();

            //Console.WriteLine("After:");
            //PrintVariables(feed_dict, session);
            if (updated != null && updated.Length > 0)
            {
                return(updated.Get(0, this.outputs.Count).Select(t =>
                {
                    var result = new UnityTFTensor(backend);
                    result.TensorValue = t.GetValue();
                    result.TensorType = t.TensorType;
                    return (Tensor)result;
                }).ToList());
            }
            else
            {
                return(null);
            }
        }