示例#1
0
        /// <summary>
        /// Step the enviormenet for training.
        /// </summary>
        /// <param name="environment"></param>
        public void Step(IRLEnvironment environment)
        {
            float[][] actions = new float[NumberOfActor][];


            float[] statesAll = new float[NumberOfActor * Model.StateSize];
            for (int i = 0; i < NumberOfActor; ++i)
            {
                var states = environment.CurrentState(i).CopyToArray();
                LastState[i] = states;
                Array.Copy(states, 0, statesAll, i * Model.StateSize, Model.StateSize);
            }

            if (Model.IsActionContinuous)
            {
                float[] actionProbs = null;
                float[] tempAction  = Model.EvaluateActionContinuous(statesAll, out actionProbs);
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i] = new float[Model.ActionSize];
                    Array.Copy(tempAction, i * Model.ActionSize, actions[i], 0, Model.ActionSize);
                    LastAction[i]      = actions[i];
                    LastActionProbs[i] = new float[Model.ActionSize];
                    Array.Copy(actionProbs, i * Model.ActionSize, LastActionProbs[i], 0, Model.ActionSize);
                }
            }
            else
            {
                float[] actionProbs = null;
                int[]   tempAction  = Model.EvaluateActionDiscrete(statesAll, out actionProbs, true);
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i]         = new float[] { tempAction[i] };
                    LastAction[i]      = actions[i];
                    LastActionProbs[i] = new float[] { actionProbs[i] };
                }
            }
            for (int i = 0; i < NumberOfActor; ++i)
            {
                LastValue[i] = Model.EvaluateValue(statesAll)[i];
            }

            environment.Step(actions);
            Steps++;
        }
示例#2
0
        /// <summary>
        /// Step the enviormenet for training.
        /// </summary>
        /// <param name="environment"></param>
        public void Step(IRLEnvironment environment)
        {
            float[][] actions = new float[NumberOfActor][];

            float[] statesAll = new float[NumberOfActor * Model.StateSize];
            for (int i = 0; i < NumberOfActor; ++i)
            {
                var states = environment.CurrentState(i).CopyToArray();
                LastState[i] = states;
                Array.Copy(states, 0, statesAll, i * Model.StateSize, Model.StateSize);
            }

            bool random = UnityEngine.Random.Range(0, 1.0f) < CurrentRandomChance;

            if (random)
            {
                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i]    = new float[] { UnityEngine.Random.Range(0, Model.ActionSize) };
                    LastAction[i] = Mathf.RoundToInt(actions[i][0]);
                }
            }
            else
            {
                float[] maxQs;
                int[]   tempAction = Model.EvaluateAction(statesAll, out maxQs);

                for (int i = 0; i < NumberOfActor; ++i)
                {
                    actions[i]    = new float[] { tempAction[i] };
                    LastAction[i] = tempAction[i];
                }
            }

            environment.Step(actions);
            Steps++;
        }