public virtual float[] TrainBatch(float[,] vectorObservations, List <float[, , , ]> visualObservations, float[,] actions, float[,] actionProbs, float[] targetValues, float[] oldValues, float[] advantages) { Debug.Assert(mode == Mode.PPO, "This method is for PPO mode only"); Debug.Assert(TrainingEnabled == true, "The model needs to initalized with Training enabled to use TrainBatch()"); List <Array> inputs = new List <Array>(); if (vectorObservations != null) { inputs.Add(vectorObservations); } if (visualObservations != null) { inputs.AddRange(visualObservations); } if (ActionSpace == SpaceType.continuous) { inputs.Add(actions); } else if (ActionSpace == SpaceType.discrete) { int[,] actionsInt = actions.Convert(t => Mathf.RoundToInt(t)); inputs.Add(actionsInt); } inputs.Add(actionProbs); inputs.Add(targetValues); inputs.Add(oldValues); inputs.Add(advantages); inputs.Add(new float[] { ClipEpsilon }); inputs.Add(new float[] { ValueLossWeight }); inputs.Add(new float[] { EntropyLossWeight }); var loss = UpdatePPOFunction.Call(inputs); var result = new float[] { (float)loss[0].eval(), (float)loss[1].eval(), (float)loss[2].eval(), (float)loss[3].eval() }; //float[,] outActionProbs = (float[,])loss[4].eval(); return(result); //Debug.LogWarning("test save graph"); //((UnityTFBackend)K).ExportGraphDef("SavedGraph/PPOTest.pb"); //return new float[] { 0, 0, 0 }; //test for memeory allocation }
public virtual float[] TrainBatch(float[,] vectorObservations, List <float[, , , ]> visualObservations, float[,] actions, float[,] actionProbs, float[] targetValues, float[] oldValues, float[] advantages, List <float[, ]> actionsMask = null) { Debug.Assert(mode == Mode.PPO, "This method is for PPO mode only"); Debug.Assert(TrainingEnabled == true, "The model needs to initalized with Training enabled to use TrainBatch()"); List <Array> inputs = new List <Array>(); inputs.Add(actionProbs); inputs.Add(targetValues); inputs.Add(oldValues); inputs.Add(advantages); inputs.Add(new float[] { ClipEpsilon }); inputs.Add(new float[] { ClipValueLoss }); inputs.Add(new float[] { ValueLossWeight }); inputs.Add(new float[] { EntropyLossWeight }); if (vectorObservations != null) { inputs.Add(vectorObservations); } if (visualObservations != null) { inputs.AddRange(visualObservations); } if (ActionSpace == SpaceType.continuous) { inputs.Add(actions); } else if (ActionSpace == SpaceType.discrete) { inputs.AddRange(actionsMask); int[,] actionsInt = actions.Convert(t => Mathf.RoundToInt(t)); inputs.Add(actionsInt); } var loss = UpdatePPOFunction.Call(inputs); var result = new float[] { (float)loss[0].eval(), (float)loss[1].eval(), (float)loss[2].eval(), (float)loss[3].eval() }; return(result); }