示例#1
0
        /// <summary>
        /// called after step and when the enviorment is resolved. return whether the enviourment should reset
        /// </summary>
        /// <param name="environment"></param>
        public bool Record(IRLEnvironment environment)
        {
            Debug.Assert(environment.IsResolved());
            bool isEnd = environment.IsEnd();

            for (int i = 0; i < NumberOfActor; ++i)
            {
                float reward = environment.LastReward();
                AddHistory(LastState[i], reward, LastAction[i], LastActionProbs[i], LastValue[i], i);
            }

            if (isEnd || environment.CurrentStep() >= MaxStepHorizon)
            {
                float[] nextValues = new float[NumberOfActor];
                if (!isEnd)
                {
                    nextValues = Model.EvaluateValue(environment.CurrentState());
                }
                else
                {
                    for (int i = 0; i < NumberOfActor; ++i)
                    {
                        nextValues[i] = 0;
                    }
                }

                for (int i = 0; i < NumberOfActor; ++i)
                {
                    ProcessEpisodeHistory(nextValues[i], i);
                }

                return(true);
            }
            return(false);
        }
示例#2
0
        /// <summary>
        /// Step the enviormenet for training.
        /// </summary>
        /// <param name="environment"></param>
        public void Step(IRLEnvironment environment)
        {
            float[][] actions = new float[NumberOfActor][];


            float[] statesAll = new float[NumberOfActor * Model.StateSize];
            for (int i = 0; i < NumberOfActor; ++i)
            {
                var states = environment.CurrentState(i).CopyToArray();
                LastState[i] = states;
                Array.Copy(states, 0, statesAll, i * Model.StateSize, Model.StateSize);
            }

            if (Model.IsActionContinuous)
            {
                float[] actionProbs = null;
                float[] tempAction  = Model.EvaluateActionContinuous(statesAll, out actionProbs);
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i] = new float[Model.ActionSize];
                    Array.Copy(tempAction, i * Model.ActionSize, actions[i], 0, Model.ActionSize);
                    LastAction[i]      = actions[i];
                    LastActionProbs[i] = new float[Model.ActionSize];
                    Array.Copy(actionProbs, i * Model.ActionSize, LastActionProbs[i], 0, Model.ActionSize);
                }
            }
            else
            {
                float[] actionProbs = null;
                int[]   tempAction  = Model.EvaluateActionDiscrete(statesAll, out actionProbs, true);
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i]         = new float[] { tempAction[i] };
                    LastAction[i]      = actions[i];
                    LastActionProbs[i] = new float[] { actionProbs[i] };
                }
            }
            for (int i = 0; i < NumberOfActor; ++i)
            {
                LastValue[i] = Model.EvaluateValue(statesAll)[i];
            }

            environment.Step(actions);
            Steps++;
        }
示例#3
0
        /// <summary>
        /// called after step and when the enviorment is resolved. return whether the enviourment should reset
        /// </summary>
        /// <param name="environment"></param>
        public virtual bool Record(IRLEnvironment environment)
        {
            Debug.Assert(environment.IsResolved());
            bool isEnd = environment.IsEnd();

            for (int i = 0; i < NumberOfActor; ++i)
            {
                float reward = environment.LastReward(i);
                AddHistory(i, LastState[i], reward, LastAction[i], isEnd);
            }

            if (isEnd || environment.CurrentStep() >= MaxStepHorizon)
            {
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    UpdateReplayBuffer(i);
                }

                return(true);
            }
            return(false);
        }
示例#4
0
        /// <summary>
        /// Step the enviormenet for training.
        /// </summary>
        /// <param name="environment"></param>
        public void Step(IRLEnvironment environment)
        {
            float[][] actions = new float[NumberOfActor][];

            float[] statesAll = new float[NumberOfActor * Model.StateSize];
            for (int i = 0; i < NumberOfActor; ++i)
            {
                var states = environment.CurrentState(i).CopyToArray();
                LastState[i] = states;
                Array.Copy(states, 0, statesAll, i * Model.StateSize, Model.StateSize);
            }

            bool random = UnityEngine.Random.Range(0, 1.0f) < CurrentRandomChance;

            if (random)
            {
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i]    = new float[] { UnityEngine.Random.Range(0, Model.ActionSize) };
                    LastAction[i] = Mathf.RoundToInt(actions[i][0]);
                }
            }
            else
            {
                float[] maxQs;
                int[]   tempAction = Model.EvaluateAction(statesAll, out maxQs);

                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i]    = new float[] { tempAction[i] };
                    LastAction[i] = tempAction[i];
                }
            }

            environment.Step(actions);
            Steps++;
        }