/// <summary> /// called after step and when the enviorment is resolved. return whether the enviourment should reset /// </summary> /// <param name="environment"></param> public bool Record(IRLEnvironment environment) { Debug.Assert(environment.IsResolved()); bool isEnd = environment.IsEnd(); for (int i = 0; i < NumberOfActor; ++i) { float reward = environment.LastReward(); AddHistory(LastState[i], reward, LastAction[i], LastActionProbs[i], LastValue[i], i); } if (isEnd || environment.CurrentStep() >= MaxStepHorizon) { float[] nextValues = new float[NumberOfActor]; if (!isEnd) { nextValues = Model.EvaluateValue(environment.CurrentState()); } else { for (int i = 0; i < NumberOfActor; ++i) { nextValues[i] = 0; } } for (int i = 0; i < NumberOfActor; ++i) { ProcessEpisodeHistory(nextValues[i], i); } return(true); } return(false); }
/// <summary> /// called after step and when the enviorment is resolved. return whether the enviourment should reset /// </summary> /// <param name="environment"></param> public virtual bool Record(IRLEnvironment environment) { Debug.Assert(environment.IsResolved()); bool isEnd = environment.IsEnd(); for (int i = 0; i < NumberOfActor; ++i) { float reward = environment.LastReward(i); AddHistory(i, LastState[i], reward, LastAction[i], isEnd); } if (isEnd || environment.CurrentStep() >= MaxStepHorizon) { for (int i = 0; i < NumberOfActor; ++i) { UpdateReplayBuffer(i); } return(true); } return(false); }