/// <summary> /// Step the enviormenet for training. /// </summary> /// <param name="environment"></param> public void Step(IRLEnvironment environment) { float[][] actions = new float[NumberOfActor][]; float[] statesAll = new float[NumberOfActor * Model.StateSize]; for (int i = 0; i < NumberOfActor; ++i) { var states = environment.CurrentState(i).CopyToArray(); LastState[i] = states; Array.Copy(states, 0, statesAll, i * Model.StateSize, Model.StateSize); } if (Model.IsActionContinuous) { float[] actionProbs = null; float[] tempAction = Model.EvaluateActionContinuous(statesAll, out actionProbs); for (int i = 0; i < NumberOfActor; ++i) { actions[i] = new float[Model.ActionSize]; Array.Copy(tempAction, i * Model.ActionSize, actions[i], 0, Model.ActionSize); LastAction[i] = actions[i]; LastActionProbs[i] = new float[Model.ActionSize]; Array.Copy(actionProbs, i * Model.ActionSize, LastActionProbs[i], 0, Model.ActionSize); } } else { float[] actionProbs = null; int[] tempAction = Model.EvaluateActionDiscrete(statesAll, out actionProbs, true); for (int i = 0; i < NumberOfActor; ++i) { actions[i] = new float[] { tempAction[i] }; LastAction[i] = actions[i]; LastActionProbs[i] = new float[] { actionProbs[i] }; } } for (int i = 0; i < NumberOfActor; ++i) { LastValue[i] = Model.EvaluateValue(statesAll)[i]; } environment.Step(actions); Steps++; }
/// <summary> /// Step the enviormenet for training. /// </summary> /// <param name="environment"></param> public void Step(IRLEnvironment environment) { float[][] actions = new float[NumberOfActor][]; float[] statesAll = new float[NumberOfActor * Model.StateSize]; for (int i = 0; i < NumberOfActor; ++i) { var states = environment.CurrentState(i).CopyToArray(); LastState[i] = states; Array.Copy(states, 0, statesAll, i * Model.StateSize, Model.StateSize); } bool random = UnityEngine.Random.Range(0, 1.0f) < CurrentRandomChance; if (random) { for (int i = 0; i < NumberOfActor; ++i) { actions[i] = new float[] { UnityEngine.Random.Range(0, Model.ActionSize) }; LastAction[i] = Mathf.RoundToInt(actions[i][0]); } } else { float[] maxQs; int[] tempAction = Model.EvaluateAction(statesAll, out maxQs); for (int i = 0; i < NumberOfActor; ++i) { actions[i] = new float[] { tempAction[i] }; LastAction[i] = tempAction[i]; } } environment.Step(actions); Steps++; }