示例#1
0
        /// <summary>
        /// called after step and when the enviorment is resolved. return whether the enviourment should reset
        /// </summary>
        /// <param name="environment"></param>
        public bool Record(IRLEnvironment environment)
        {
            Debug.Assert(environment.IsResolved());
            bool isEnd = environment.IsEnd();

            for (int i = 0; i < NumberOfActor; ++i)
            {
                float reward = environment.LastReward();
                AddHistory(LastState[i], reward, LastAction[i], LastActionProbs[i], LastValue[i], i);
            }

            if (isEnd || environment.CurrentStep() >= MaxStepHorizon)
            {
                float[] nextValues = new float[NumberOfActor];
                if (!isEnd)
                {
                    nextValues = Model.EvaluateValue(environment.CurrentState());
                }
                else
                {
                    for (int i = 0; i < NumberOfActor; ++i)
                    {
                        nextValues[i] = 0;
                    }
                }

                for (int i = 0; i < NumberOfActor; ++i)
                {
                    ProcessEpisodeHistory(nextValues[i], i);
                }

                return(true);
            }
            return(false);
        }
示例#2
0
        /// <summary>
        /// called after step and when the enviorment is resolved. return whether the enviourment should reset
        /// </summary>
        /// <param name="environment"></param>
        public virtual bool Record(IRLEnvironment environment)
        {
            Debug.Assert(environment.IsResolved());
            bool isEnd = environment.IsEnd();

            for (int i = 0; i < NumberOfActor; ++i)
            {
                float reward = environment.LastReward(i);
                AddHistory(i, LastState[i], reward, LastAction[i], isEnd);
            }

            if (isEnd || environment.CurrentStep() >= MaxStepHorizon)
            {
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    UpdateReplayBuffer(i);
                }

                return(true);
            }
            return(false);
        }