コード例 #1
0
    public virtual float[] TrainBatch(float[,] vectorObservations, List <float[, , , ]> visualObservations, float[,] actions, float[,] actionProbs, float[] targetValues, float[] oldValues, float[] advantages)
    {
        Debug.Assert(mode == Mode.PPO, "This method is for PPO mode only");
        Debug.Assert(TrainingEnabled == true, "The model needs to initalized with Training enabled to use TrainBatch()");

        List <Array> inputs = new List <Array>();

        if (vectorObservations != null)
        {
            inputs.Add(vectorObservations);
        }
        if (visualObservations != null)
        {
            inputs.AddRange(visualObservations);
        }
        if (ActionSpace == SpaceType.continuous)
        {
            inputs.Add(actions);
        }
        else if (ActionSpace == SpaceType.discrete)
        {
            int[,] actionsInt = actions.Convert(t => Mathf.RoundToInt(t));
            inputs.Add(actionsInt);
        }

        inputs.Add(actionProbs);
        inputs.Add(targetValues);
        inputs.Add(oldValues);
        inputs.Add(advantages);
        inputs.Add(new float[] { ClipEpsilon });
        inputs.Add(new float[] { ValueLossWeight });
        inputs.Add(new float[] { EntropyLossWeight });

        var loss   = UpdatePPOFunction.Call(inputs);
        var result = new float[] { (float)loss[0].eval(), (float)loss[1].eval(), (float)loss[2].eval(), (float)loss[3].eval() };

        //float[,] outActionProbs = (float[,])loss[4].eval();
        return(result);
        //Debug.LogWarning("test save graph");
        //((UnityTFBackend)K).ExportGraphDef("SavedGraph/PPOTest.pb");
        //return new float[] { 0, 0, 0 }; //test for memeory allocation
    }
コード例 #2
0
    public virtual float[] TrainBatch(float[,] vectorObservations, List <float[, , , ]> visualObservations, float[,] actions, float[,] actionProbs, float[] targetValues, float[] oldValues, float[] advantages, List <float[, ]> actionsMask = null)
    {
        Debug.Assert(mode == Mode.PPO, "This method is for PPO mode only");
        Debug.Assert(TrainingEnabled == true, "The model needs to initalized with Training enabled to use TrainBatch()");

        List <Array> inputs = new List <Array>();

        inputs.Add(actionProbs);
        inputs.Add(targetValues);
        inputs.Add(oldValues);
        inputs.Add(advantages);
        inputs.Add(new float[] { ClipEpsilon });
        inputs.Add(new float[] { ClipValueLoss });
        inputs.Add(new float[] { ValueLossWeight });
        inputs.Add(new float[] { EntropyLossWeight });

        if (vectorObservations != null)
        {
            inputs.Add(vectorObservations);
        }
        if (visualObservations != null)
        {
            inputs.AddRange(visualObservations);
        }
        if (ActionSpace == SpaceType.continuous)
        {
            inputs.Add(actions);
        }
        else if (ActionSpace == SpaceType.discrete)
        {
            inputs.AddRange(actionsMask);
            int[,] actionsInt = actions.Convert(t => Mathf.RoundToInt(t));
            inputs.Add(actionsInt);
        }

        var loss   = UpdatePPOFunction.Call(inputs);
        var result = new float[] { (float)loss[0].eval(), (float)loss[1].eval(), (float)loss[2].eval(), (float)loss[3].eval() };

        return(result);
    }